From c0d389c2836dbc5c7eb63386d31cf68ae3868ed7 Mon Sep 17 00:00:00 2001 From: rembo10 Date: Sat, 5 Sep 2015 16:07:51 -0700 Subject: [PATCH 01/44] Updated changelog with right diff urls --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d45d383b..af56c17f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ Highlights: * Fixed: Pushover notifications * Improved: Rutracker logging, switched to requests lib -The full list of commits can be found [here](https://github.com/rembo10/headphones/compare/v0.5.6...v0.5.7). +The full list of commits can be found [here](https://github.com/rembo10/headphones/compare/v0.5.8...v0.5.9). ## v0.5.8 Released 13 July 2015 @@ -30,7 +30,7 @@ Highlights: * Improved: Set localhost as default * Improved: Better single artist scanning -The full list of commits can be found [here](https://github.com/rembo10/headphones/compare/v0.5.6...v0.5.7). +The full list of commits can be found [here](https://github.com/rembo10/headphones/compare/v0.5.7...v0.5.8). ## v0.5.7 Released 01 July 2015 From 715e4977893bf175703ee65f720385b0e9fff1e1 Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Thu, 10 Sep 2015 19:42:07 +0200 Subject: [PATCH 02/44] Upgraded requests to git-master and forced urllib3 to git-master. Fixes strange errors with pyOpenSSL. See https://github.com/shazow/urllib3/issues/556. --- lib/requests/LICENSE | 13 - lib/requests/NOTICE | 54 ---- lib/requests/README.rst | 85 ----- lib/requests/__init__.py | 10 +- lib/requests/adapters.py | 25 +- lib/requests/api.py | 40 ++- lib/requests/auth.py | 9 +- lib/requests/compat.py | 53 ---- lib/requests/cookies.py | 116 ++++--- lib/requests/models.py | 101 +++--- lib/requests/packages/README.rst | 8 + lib/requests/packages/urllib3/__init__.py | 9 +- lib/requests/packages/urllib3/_collections.py | 227 ++++++++++--- lib/requests/packages/urllib3/connection.py | 31 +- .../packages/urllib3/connectionpool.py | 133 +++++--- .../packages/urllib3/contrib/appengine.py | 222 +++++++++++++ .../packages/urllib3/contrib/pyopenssl.py | 60 ++-- lib/requests/packages/urllib3/exceptions.py | 34 ++ lib/requests/packages/urllib3/poolmanager.py | 23 +- lib/requests/packages/urllib3/request.py | 12 +- lib/requests/packages/urllib3/response.py | 300 +++++++++++++----- .../packages/urllib3/util/connection.py | 11 +- .../packages/urllib3/util/response.py | 51 +++ lib/requests/packages/urllib3/util/retry.py | 4 +- lib/requests/packages/urllib3/util/ssl_.py | 108 ++++--- lib/requests/packages/urllib3/util/url.py | 2 + lib/requests/sessions.py | 54 ++-- lib/requests/utils.py | 41 ++- 28 files changed, 1219 insertions(+), 617 deletions(-) delete mode 100644 lib/requests/LICENSE delete mode 100644 lib/requests/NOTICE delete mode 100644 lib/requests/README.rst create mode 100644 lib/requests/packages/README.rst create mode 100644 lib/requests/packages/urllib3/contrib/appengine.py diff --git a/lib/requests/LICENSE b/lib/requests/LICENSE deleted file mode 100644 index 8c5e7584..00000000 --- a/lib/requests/LICENSE +++ /dev/null @@ -1,13 +0,0 @@ -Copyright 2014 Kenneth Reitz - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/lib/requests/NOTICE b/lib/requests/NOTICE deleted file mode 100644 index f583e47a..00000000 --- a/lib/requests/NOTICE +++ /dev/null @@ -1,54 +0,0 @@ -Requests includes some vendorized python libraries to ease installation. - -Urllib3 License -=============== - -This is the MIT license: http://www.opensource.org/licenses/mit-license.php - -Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt), -Modifications copyright 2012 Kenneth Reitz. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -Chardet License -=============== - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -02110-1301 USA - - -CA Bundle License -================= - -This Source Code Form is subject to the terms of the Mozilla Public -License, v. 2.0. If a copy of the MPL was not distributed with this -file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/lib/requests/README.rst b/lib/requests/README.rst deleted file mode 100644 index e9f63ef8..00000000 --- a/lib/requests/README.rst +++ /dev/null @@ -1,85 +0,0 @@ -Requests: HTTP for Humans -========================= - -.. image:: https://badge.fury.io/py/requests.png - :target: http://badge.fury.io/py/requests - -.. image:: https://pypip.in/d/requests/badge.png - :target: https://crate.io/packages/requests/ - - -Requests is an Apache2 Licensed HTTP library, written in Python, for human -beings. - -Most existing Python modules for sending HTTP requests are extremely -verbose and cumbersome. Python's builtin urllib2 module provides most of -the HTTP capabilities you should need, but the api is thoroughly broken. -It requires an enormous amount of work (even method overrides) to -perform the simplest of tasks. - -Things shouldn't be this way. Not in Python. - -.. code-block:: python - - >>> r = requests.get('https://api.github.com', auth=('user', 'pass')) - >>> r.status_code - 204 - >>> r.headers['content-type'] - 'application/json' - >>> r.text - ... - -See `the same code, without Requests `_. - -Requests allow you to send HTTP/1.1 requests. You can add headers, form data, -multipart files, and parameters with simple Python dictionaries, and access the -response data in the same way. It's powered by httplib and `urllib3 -`_, but it does all the hard work and crazy -hacks for you. - - -Features --------- - -- International Domains and URLs -- Keep-Alive & Connection Pooling -- Sessions with Cookie Persistence -- Browser-style SSL Verification -- Basic/Digest Authentication -- Elegant Key/Value Cookies -- Automatic Decompression -- Unicode Response Bodies -- Multipart File Uploads -- Connection Timeouts -- Thread-safety -- HTTP(S) proxy support - - -Installation ------------- - -To install Requests, simply: - -.. code-block:: bash - - $ pip install requests - - -Documentation -------------- - -Documentation is available at http://docs.python-requests.org/. - - -Contribute ----------- - -#. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. There is a `Contributor Friendly`_ tag for issues that should be ideal for people who are not very familiar with the codebase yet. -#. If you feel uncomfortable or uncertain about an issue or your changes, feel free to email @sigmavirus24 and he will happily help you via email, Skype, remote pairing or whatever you are comfortable with. -#. Fork `the repository`_ on GitHub to start making your changes to the **master** branch (or branch off of it). -#. Write a test which shows that the bug was fixed or that the feature works as expected. -#. Send a pull request and bug the maintainer until it gets merged and published. :) Make sure to add yourself to AUTHORS_. - -.. _`the repository`: http://github.com/kennethreitz/requests -.. _AUTHORS: https://github.com/kennethreitz/requests/blob/master/AUTHORS.rst -.. _Contributor Friendly: https://github.com/kennethreitz/requests/issues?direction=desc&labels=Contributor+Friendly&page=1&sort=updated&state=open diff --git a/lib/requests/__init__.py b/lib/requests/__init__.py index ac2b06c8..d2471284 100644 --- a/lib/requests/__init__.py +++ b/lib/requests/__init__.py @@ -6,7 +6,7 @@ # / """ -requests HTTP library +Requests HTTP library ~~~~~~~~~~~~~~~~~~~~~ Requests is an HTTP library, written in Python, for human beings. Basic GET @@ -36,17 +36,17 @@ usage: The other HTTP methods are supported - see `requests.api`. Full documentation is at . -:copyright: (c) 2014 by Kenneth Reitz. +:copyright: (c) 2015 by Kenneth Reitz. :license: Apache 2.0, see LICENSE for more details. """ __title__ = 'requests' -__version__ = '2.5.1' -__build__ = 0x020501 +__version__ = '2.7.0' +__build__ = 0x020700 __author__ = 'Kenneth Reitz' __license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2014 Kenneth Reitz' +__copyright__ = 'Copyright 2015 Kenneth Reitz' # Attempt to enable urllib3's SNI support, if possible try: diff --git a/lib/requests/adapters.py b/lib/requests/adapters.py index c892853b..f911fc57 100644 --- a/lib/requests/adapters.py +++ b/lib/requests/adapters.py @@ -11,13 +11,14 @@ and maintain connections. import socket from .models import Response -from .packages.urllib3 import Retry from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.response import HTTPResponse from .packages.urllib3.util import Timeout as TimeoutSauce +from .packages.urllib3.util.retry import Retry from .compat import urlparse, basestring from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, - prepend_scheme_if_needed, get_auth_from_url, urldefragauth) + prepend_scheme_if_needed, get_auth_from_url, urldefragauth, + select_proxy) from .structures import CaseInsensitiveDict from .packages.urllib3.exceptions import ConnectTimeoutError from .packages.urllib3.exceptions import HTTPError as _HTTPError @@ -35,6 +36,7 @@ from .auth import _basic_auth_str DEFAULT_POOLBLOCK = False DEFAULT_POOLSIZE = 10 DEFAULT_RETRIES = 0 +DEFAULT_POOL_TIMEOUT = None class BaseAdapter(object): @@ -237,8 +239,7 @@ class HTTPAdapter(BaseAdapter): :param url: The URL to connect to. :param proxies: (optional) A Requests-style dictionary of proxies used on this request. """ - proxies = proxies or {} - proxy = proxies.get(urlparse(url.lower()).scheme) + proxy = select_proxy(url, proxies) if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') @@ -271,12 +272,10 @@ class HTTPAdapter(BaseAdapter): :class:`HTTPAdapter `. :param request: The :class:`PreparedRequest ` being sent. - :param proxies: A dictionary of schemes to proxy URLs. + :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs. """ - proxies = proxies or {} + proxy = select_proxy(request.url, proxies) scheme = urlparse(request.url).scheme - proxy = proxies.get(scheme) - if proxy and scheme != 'https': url = urldefragauth(request.url) else: @@ -309,7 +308,6 @@ class HTTPAdapter(BaseAdapter): :class:`HTTPAdapter `. :param proxies: The url of the proxy being used for this request. - :param kwargs: Optional additional keyword arguments. """ headers = {} username, password = get_auth_from_url(proxy) @@ -326,8 +324,8 @@ class HTTPAdapter(BaseAdapter): :param request: The :class:`PreparedRequest ` being sent. :param stream: (optional) Whether to stream the request content. :param timeout: (optional) How long to wait for the server to send - data before giving up, as a float, or a (`connect timeout, read - timeout `_) tuple. + data before giving up, as a float, or a :ref:`(connect timeout, + read timeout) ` tuple. :type timeout: float or tuple :param verify: (optional) Whether to verify SSL certificates. :param cert: (optional) Any user-provided SSL certificate to be trusted. @@ -375,7 +373,7 @@ class HTTPAdapter(BaseAdapter): if hasattr(conn, 'proxy_pool'): conn = conn.proxy_pool - low_conn = conn._get_conn(timeout=timeout) + low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) try: low_conn.putrequest(request.method, @@ -407,9 +405,6 @@ class HTTPAdapter(BaseAdapter): # Then, reraise so that we can handle the actual exception. low_conn.close() raise - else: - # All is well, return the connection to the pool. - conn._put_conn(low_conn) except (ProtocolError, socket.error) as err: raise ConnectionError(err, request=request) diff --git a/lib/requests/api.py b/lib/requests/api.py index 1469b05c..72a777b2 100644 --- a/lib/requests/api.py +++ b/lib/requests/api.py @@ -16,7 +16,6 @@ from . import sessions def request(method, url, **kwargs): """Constructs and sends a :class:`Request `. - Returns :class:`Response ` object. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. @@ -28,8 +27,8 @@ def request(method, url, **kwargs): :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) How long to wait for the server to send data - before giving up, as a float, or a (`connect timeout, read timeout - `_) tuple. + before giving up, as a float, or a :ref:`(connect timeout, read + timeout) ` tuple. :type timeout: float or tuple :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. :type allow_redirects: bool @@ -37,6 +36,8 @@ def request(method, url, **kwargs): :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. :param stream: (optional) if ``False``, the response content will be immediately downloaded. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. + :return: :class:`Response ` object + :rtype: requests.Response Usage:: @@ -54,22 +55,27 @@ def request(method, url, **kwargs): return response -def get(url, **kwargs): - """Sends a GET request. Returns :class:`Response` object. +def get(url, params=None, **kwargs): + """Sends a GET request. :param url: URL for the new :class:`Request` object. + :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) - return request('get', url, **kwargs) + return request('get', url, params=params, **kwargs) def options(url, **kwargs): - """Sends a OPTIONS request. Returns :class:`Response` object. + """Sends a OPTIONS request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) @@ -77,10 +83,12 @@ def options(url, **kwargs): def head(url, **kwargs): - """Sends a HEAD request. Returns :class:`Response` object. + """Sends a HEAD request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', False) @@ -88,44 +96,52 @@ def head(url, **kwargs): def post(url, data=None, json=None, **kwargs): - """Sends a POST request. Returns :class:`Response` object. + """Sends a POST request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('post', url, data=data, json=json, **kwargs) def put(url, data=None, **kwargs): - """Sends a PUT request. Returns :class:`Response` object. + """Sends a PUT request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('put', url, data=data, **kwargs) def patch(url, data=None, **kwargs): - """Sends a PATCH request. Returns :class:`Response` object. + """Sends a PATCH request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('patch', url, data=data, **kwargs) def delete(url, **kwargs): - """Sends a DELETE request. Returns :class:`Response` object. + """Sends a DELETE request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response """ return request('delete', url, **kwargs) diff --git a/lib/requests/auth.py b/lib/requests/auth.py index b950181d..03c3302a 100644 --- a/lib/requests/auth.py +++ b/lib/requests/auth.py @@ -103,7 +103,8 @@ class HTTPDigestAuth(AuthBase): # XXX not implemented yet entdig = None p_parsed = urlparse(url) - path = p_parsed.path + #: path is request-uri defined in RFC 2616 which should not be empty + path = p_parsed.path or "/" if p_parsed.query: path += '?' + p_parsed.query @@ -124,13 +125,15 @@ class HTTPDigestAuth(AuthBase): s += os.urandom(8) cnonce = (hashlib.sha1(s).hexdigest()[:16]) - noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, HA2) if _algorithm == 'MD5-SESS': HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) if qop is None: respdig = KD(HA1, "%s:%s" % (nonce, HA2)) elif qop == 'auth' or 'auth' in qop.split(','): + noncebit = "%s:%s:%s:%s:%s" % ( + nonce, ncvalue, cnonce, 'auth', HA2 + ) respdig = KD(HA1, noncebit) else: # XXX handle auth-int. @@ -176,7 +179,7 @@ class HTTPDigestAuth(AuthBase): # Consume content and release the original connection # to allow our new request to reuse the same one. r.content - r.raw.release_conn() + r.close() prep = r.request.copy() extract_cookies_to_jar(prep._cookies, r.request, r.raw) prep.prepare_cookies(prep._cookies) diff --git a/lib/requests/compat.py b/lib/requests/compat.py index c07726ee..70edff78 100644 --- a/lib/requests/compat.py +++ b/lib/requests/compat.py @@ -21,58 +21,6 @@ is_py2 = (_ver[0] == 2) #: Python 3.x? is_py3 = (_ver[0] == 3) -#: Python 3.0.x -is_py30 = (is_py3 and _ver[1] == 0) - -#: Python 3.1.x -is_py31 = (is_py3 and _ver[1] == 1) - -#: Python 3.2.x -is_py32 = (is_py3 and _ver[1] == 2) - -#: Python 3.3.x -is_py33 = (is_py3 and _ver[1] == 3) - -#: Python 3.4.x -is_py34 = (is_py3 and _ver[1] == 4) - -#: Python 2.7.x -is_py27 = (is_py2 and _ver[1] == 7) - -#: Python 2.6.x -is_py26 = (is_py2 and _ver[1] == 6) - -#: Python 2.5.x -is_py25 = (is_py2 and _ver[1] == 5) - -#: Python 2.4.x -is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice. - - -# --------- -# Platforms -# --------- - - -# Syntax sugar. -_ver = sys.version.lower() - -is_pypy = ('pypy' in _ver) -is_jython = ('jython' in _ver) -is_ironpython = ('iron' in _ver) - -# Assume CPython, if nothing else. -is_cpython = not any((is_pypy, is_jython, is_ironpython)) - -# Windows-based system. -is_windows = 'win32' in str(sys.platform).lower() - -# Standard Linux 2+ system. -is_linux = ('linux' in str(sys.platform).lower()) -is_osx = ('darwin' in str(sys.platform).lower()) -is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess. -is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. - try: import simplejson as json except (ImportError, SyntaxError): @@ -99,7 +47,6 @@ if is_py2: basestring = basestring numeric_types = (int, long, float) - elif is_py3: from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag from urllib.request import parse_http_list, getproxies, proxy_bypass diff --git a/lib/requests/cookies.py b/lib/requests/cookies.py index 831c49c6..88b478c7 100644 --- a/lib/requests/cookies.py +++ b/lib/requests/cookies.py @@ -6,6 +6,7 @@ Compatibility code to be able to use `cookielib.CookieJar` with requests. requests.utils imports from here, so be careful with imports. """ +import copy import time import collections from .compat import cookielib, urlparse, urlunparse, Morsel @@ -157,26 +158,28 @@ class CookieConflictError(RuntimeError): class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): - """Compatibility class; is a cookielib.CookieJar, but exposes a dict interface. + """Compatibility class; is a cookielib.CookieJar, but exposes a dict + interface. This is the CookieJar we create by default for requests and sessions that don't specify one, since some clients may expect response.cookies and session.cookies to support dict operations. - Don't use the dict interface internally; it's just for compatibility with - with external client code. All `requests` code should work out of the box - with externally provided instances of CookieJar, e.g., LWPCookieJar and - FileCookieJar. - - Caution: dictionary operations that are normally O(1) may be O(n). + Requests does not use the dict interface internally; it's just for + compatibility with external client code. All requests code should work + out of the box with externally provided instances of ``CookieJar``, e.g. + ``LWPCookieJar`` and ``FileCookieJar``. Unlike a regular CookieJar, this class is pickleable. - """ + .. warning:: dictionary operations that are normally O(1) may be O(n). + """ def get(self, name, default=None, domain=None, path=None): """Dict-like get() that also supports optional domain and path args in order to resolve naming collisions from using one cookie jar over - multiple domains. Caution: operation is O(n), not O(1).""" + multiple domains. + + .. warning:: operation is O(n), not O(1).""" try: return self._find_no_duplicates(name, domain, path) except KeyError: @@ -199,37 +202,38 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): return c def iterkeys(self): - """Dict-like iterkeys() that returns an iterator of names of cookies from the jar. - See itervalues() and iteritems().""" + """Dict-like iterkeys() that returns an iterator of names of cookies + from the jar. See itervalues() and iteritems().""" for cookie in iter(self): yield cookie.name def keys(self): - """Dict-like keys() that returns a list of names of cookies from the jar. - See values() and items().""" + """Dict-like keys() that returns a list of names of cookies from the + jar. See values() and items().""" return list(self.iterkeys()) def itervalues(self): - """Dict-like itervalues() that returns an iterator of values of cookies from the jar. - See iterkeys() and iteritems().""" + """Dict-like itervalues() that returns an iterator of values of cookies + from the jar. See iterkeys() and iteritems().""" for cookie in iter(self): yield cookie.value def values(self): - """Dict-like values() that returns a list of values of cookies from the jar. - See keys() and items().""" + """Dict-like values() that returns a list of values of cookies from the + jar. See keys() and items().""" return list(self.itervalues()) def iteritems(self): - """Dict-like iteritems() that returns an iterator of name-value tuples from the jar. - See iterkeys() and itervalues().""" + """Dict-like iteritems() that returns an iterator of name-value tuples + from the jar. See iterkeys() and itervalues().""" for cookie in iter(self): yield cookie.name, cookie.value def items(self): - """Dict-like items() that returns a list of name-value tuples from the jar. - See keys() and values(). Allows client-code to call "dict(RequestsCookieJar) - and get a vanilla python dict of key value pairs.""" + """Dict-like items() that returns a list of name-value tuples from the + jar. See keys() and values(). Allows client-code to call + ``dict(RequestsCookieJar)`` and get a vanilla python dict of key value + pairs.""" return list(self.iteritems()) def list_domains(self): @@ -259,8 +263,9 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): return False # there is only one domain in jar def get_dict(self, domain=None, path=None): - """Takes as an argument an optional domain and path and returns a plain old - Python dict of name-value pairs of cookies that meet the requirements.""" + """Takes as an argument an optional domain and path and returns a plain + old Python dict of name-value pairs of cookies that meet the + requirements.""" dictionary = {} for cookie in iter(self): if (domain is None or cookie.domain == domain) and (path is None @@ -269,21 +274,24 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): return dictionary def __getitem__(self, name): - """Dict-like __getitem__() for compatibility with client code. Throws exception - if there are more than one cookie with name. In that case, use the more - explicit get() method instead. Caution: operation is O(n), not O(1).""" + """Dict-like __getitem__() for compatibility with client code. Throws + exception if there are more than one cookie with name. In that case, + use the more explicit get() method instead. + + .. warning:: operation is O(n), not O(1).""" return self._find_no_duplicates(name) def __setitem__(self, name, value): - """Dict-like __setitem__ for compatibility with client code. Throws exception - if there is already a cookie of that name in the jar. In that case, use the more - explicit set() method instead.""" + """Dict-like __setitem__ for compatibility with client code. Throws + exception if there is already a cookie of that name in the jar. In that + case, use the more explicit set() method instead.""" self.set(name, value) def __delitem__(self, name): - """Deletes a cookie given a name. Wraps cookielib.CookieJar's remove_cookie_by_name().""" + """Deletes a cookie given a name. Wraps ``cookielib.CookieJar``'s + ``remove_cookie_by_name()``.""" remove_cookie_by_name(self, name) def set_cookie(self, cookie, *args, **kwargs): @@ -295,15 +303,16 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): """Updates this jar with cookies from another CookieJar or dict-like""" if isinstance(other, cookielib.CookieJar): for cookie in other: - self.set_cookie(cookie) + self.set_cookie(copy.copy(cookie)) else: super(RequestsCookieJar, self).update(other) def _find(self, name, domain=None, path=None): - """Requests uses this method internally to get cookie values. Takes as args name - and optional domain and path. Returns a cookie.value. If there are conflicting cookies, - _find arbitrarily chooses one. See _find_no_duplicates if you want an exception thrown - if there are conflicting cookies.""" + """Requests uses this method internally to get cookie values. Takes as + args name and optional domain and path. Returns a cookie.value. If + there are conflicting cookies, _find arbitrarily chooses one. See + _find_no_duplicates if you want an exception thrown if there are + conflicting cookies.""" for cookie in iter(self): if cookie.name == name: if domain is None or cookie.domain == domain: @@ -313,10 +322,11 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path)) def _find_no_duplicates(self, name, domain=None, path=None): - """__get_item__ and get call _find_no_duplicates -- never used in Requests internally. - Takes as args name and optional domain and path. Returns a cookie.value. - Throws KeyError if cookie is not found and CookieConflictError if there are - multiple cookies that match name and optionally domain and path.""" + """Both ``__get_item__`` and ``get`` call this function: it's never + used elsewhere in Requests. Takes as args name and optional domain and + path. Returns a cookie.value. Throws KeyError if cookie is not found + and CookieConflictError if there are multiple cookies that match name + and optionally domain and path.""" toReturn = None for cookie in iter(self): if cookie.name == name: @@ -350,6 +360,21 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): return new_cj +def _copy_cookie_jar(jar): + if jar is None: + return None + + if hasattr(jar, 'copy'): + # We're dealing with an instane of RequestsCookieJar + return jar.copy() + # We're dealing with a generic CookieJar instance + new_jar = copy.copy(jar) + new_jar.clear() + for cookie in jar: + new_jar.set_cookie(copy.copy(cookie)) + return new_jar + + def create_cookie(name, value, **kwargs): """Make a cookie from underspecified parameters. @@ -390,11 +415,14 @@ def morsel_to_cookie(morsel): expires = None if morsel['max-age']: - expires = time.time() + morsel['max-age'] + try: + expires = int(time.time() + int(morsel['max-age'])) + except ValueError: + raise TypeError('max-age: %s must be integer' % morsel['max-age']) elif morsel['expires']: time_template = '%a, %d-%b-%Y %H:%M:%S GMT' - expires = time.mktime( - time.strptime(morsel['expires'], time_template)) - time.timezone + expires = int(time.mktime( + time.strptime(morsel['expires'], time_template)) - time.timezone) return create_cookie( comment=morsel['comment'], comment_url=bool(morsel['comment']), @@ -440,7 +468,7 @@ def merge_cookies(cookiejar, cookies): """ if not isinstance(cookiejar, cookielib.CookieJar): raise ValueError('You can only merge into CookieJar') - + if isinstance(cookies, dict): cookiejar = cookiejar_from_dict( cookies, cookiejar=cookiejar, overwrite=False) diff --git a/lib/requests/models.py b/lib/requests/models.py index b728c84e..4270c647 100644 --- a/lib/requests/models.py +++ b/lib/requests/models.py @@ -15,7 +15,7 @@ from .hooks import default_hooks from .structures import CaseInsensitiveDict from .auth import HTTPBasicAuth -from .cookies import cookiejar_from_dict, get_cookie_header +from .cookies import cookiejar_from_dict, get_cookie_header, _copy_cookie_jar from .packages.urllib3.fields import RequestField from .packages.urllib3.filepost import encode_multipart_formdata from .packages.urllib3.util import parse_url @@ -30,7 +30,8 @@ from .utils import ( iter_slices, guess_json_utf, super_len, to_native_string) from .compat import ( cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, - is_py2, chardet, json, builtin_str, basestring) + is_py2, chardet, builtin_str, basestring) +from .compat import json as complexjson from .status_codes import codes #: The set of HTTP status codes that indicate an automatically @@ -42,12 +43,11 @@ REDIRECT_STATI = ( codes.temporary_redirect, # 307 codes.permanent_redirect, # 308 ) + DEFAULT_REDIRECT_LIMIT = 30 CONTENT_CHUNK_SIZE = 10 * 1024 ITER_CHUNK_SIZE = 512 -json_dumps = json.dumps - class RequestEncodingMixin(object): @property @@ -143,13 +143,13 @@ class RequestEncodingMixin(object): else: fn = guess_filename(v) or k fp = v - if isinstance(fp, str): - fp = StringIO(fp) - if isinstance(fp, bytes): - fp = BytesIO(fp) - rf = RequestField(name=k, data=fp.read(), - filename=fn, headers=fh) + if isinstance(fp, (str, bytes, bytearray)): + fdata = fp + else: + fdata = fp.read() + + rf = RequestField(name=k, data=fdata, filename=fn, headers=fh) rf.make_multipart(content_type=ft) new_fields.append(rf) @@ -206,17 +206,8 @@ class Request(RequestHooksMixin): """ - def __init__(self, - method=None, - url=None, - headers=None, - files=None, - data=None, - params=None, - auth=None, - cookies=None, - hooks=None, - json=None): + def __init__(self, method=None, url=None, headers=None, files=None, + data=None, params=None, auth=None, cookies=None, hooks=None, json=None): # Default empty dicts for dict params. data = [] if data is None else data @@ -295,8 +286,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.hooks = default_hooks() def prepare(self, method=None, url=None, headers=None, files=None, - data=None, params=None, auth=None, cookies=None, hooks=None, - json=None): + data=None, params=None, auth=None, cookies=None, hooks=None, json=None): """Prepares the entire request with the given parameters.""" self.prepare_method(method) @@ -305,6 +295,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.prepare_cookies(cookies) self.prepare_body(data, files, json) self.prepare_auth(auth, url) + # Note that prepare_auth must be last to enable authentication schemes # such as OAuth to work on a fully prepared request. @@ -319,7 +310,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): p.method = self.method p.url = self.url p.headers = self.headers.copy() if self.headers is not None else None - p._cookies = self._cookies.copy() if self._cookies is not None else None + p._cookies = _copy_cookie_jar(self._cookies) p.body = self.body p.hooks = self.hooks return p @@ -356,8 +347,10 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): raise InvalidURL(*e.args) if not scheme: - raise MissingSchema("Invalid URL {0!r}: No schema supplied. " - "Perhaps you meant http://{0}?".format(url)) + error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?") + error = error.format(to_native_string(url, 'utf8')) + + raise MissingSchema(error) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) @@ -423,7 +416,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if json is not None: content_type = 'application/json' - body = json_dumps(json) + body = complexjson.dumps(json) is_stream = all([ hasattr(data, '__iter__'), @@ -500,7 +493,15 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.prepare_content_length(self.body) def prepare_cookies(self, cookies): - """Prepares the given HTTP cookie data.""" + """Prepares the given HTTP cookie data. + + This function eventually generates a ``Cookie`` header from the + given cookies using cookielib. Due to cookielib's design, the header + will not be regenerated if it already exists, meaning this function + can only be called once for the life of the + :class:`PreparedRequest ` object. Any subsequent calls + to ``prepare_cookies`` will have no actual effect, unless the "Cookie" + header is removed beforehand.""" if isinstance(cookies, cookielib.CookieJar): self._cookies = cookies @@ -513,6 +514,10 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): def prepare_hooks(self, hooks): """Prepares the given hooks.""" + # hooks can be passed as None to the prepare method and to this + # method. To prevent iterating over None, simply use an empty list + # if hooks is False-y + hooks = hooks or [] for event in hooks: self.register_hook(event, hooks[event]) @@ -523,16 +528,8 @@ class Response(object): """ __attrs__ = [ - '_content', - 'status_code', - 'headers', - 'url', - 'history', - 'encoding', - 'reason', - 'cookies', - 'elapsed', - 'request', + '_content', 'status_code', 'headers', 'url', 'history', + 'encoding', 'reason', 'cookies', 'elapsed', 'request' ] def __init__(self): @@ -572,7 +569,11 @@ class Response(object): self.cookies = cookiejar_from_dict({}) #: The amount of time elapsed between sending the request - #: and the arrival of the response (as a timedelta) + #: and the arrival of the response (as a timedelta). + #: This property specifically measures the time taken between sending + #: the first byte of the request and finishing parsing the headers. It + #: is therefore unaffected by consuming the response content or the + #: value of the ``stream`` keyword argument. self.elapsed = datetime.timedelta(0) #: The :class:`PreparedRequest ` object to which this @@ -648,9 +649,10 @@ class Response(object): If decode_unicode is True, content will be decoded using the best available encoding based on the response. """ + def generate(): - try: - # Special case for urllib3. + # Special case for urllib3. + if hasattr(self.raw, 'stream'): try: for chunk in self.raw.stream(chunk_size, decode_content=True): yield chunk @@ -660,7 +662,7 @@ class Response(object): raise ContentDecodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) - except AttributeError: + else: # Standard file-like object. while True: chunk = self.raw.read(chunk_size) @@ -688,6 +690,8 @@ class Response(object): """Iterates over the response data, one line at a time. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. + + .. note:: This method is not reentrant safe. """ pending = None @@ -789,14 +793,16 @@ class Response(object): encoding = guess_json_utf(self.content) if encoding is not None: try: - return json.loads(self.content.decode(encoding), **kwargs) + return complexjson.loads( + self.content.decode(encoding), **kwargs + ) except UnicodeDecodeError: # Wrong UTF codec detected; usually because it's not UTF-8 # but some other 8-bit codec. This is an RFC violation, # and the server didn't bother to tell us what codec *was* # used. pass - return json.loads(self.text, **kwargs) + return complexjson.loads(self.text, **kwargs) @property def links(self): @@ -822,10 +828,10 @@ class Response(object): http_error_msg = '' if 400 <= self.status_code < 500: - http_error_msg = '%s Client Error: %s' % (self.status_code, self.reason) + http_error_msg = '%s Client Error: %s for url: %s' % (self.status_code, self.reason, self.url) elif 500 <= self.status_code < 600: - http_error_msg = '%s Server Error: %s' % (self.status_code, self.reason) + http_error_msg = '%s Server Error: %s for url: %s' % (self.status_code, self.reason, self.url) if http_error_msg: raise HTTPError(http_error_msg, response=self) @@ -836,4 +842,7 @@ class Response(object): *Note: Should not normally need to be called explicitly.* """ + if not self._content_consumed: + return self.raw.close() + return self.raw.release_conn() diff --git a/lib/requests/packages/README.rst b/lib/requests/packages/README.rst new file mode 100644 index 00000000..c42f376b --- /dev/null +++ b/lib/requests/packages/README.rst @@ -0,0 +1,8 @@ +If you are planning to submit a pull request to requests with any changes in +this library do not go any further. These are independent libraries which we +vendor into requests. Any changes necessary to these libraries must be made in +them and submitted as separate pull requests to those libraries. + +urllib3 pull requests go here: https://github.com/shazow/urllib3 + +chardet pull requests go here: https://github.com/chardet/chardet diff --git a/lib/requests/packages/urllib3/__init__.py b/lib/requests/packages/urllib3/__init__.py index dfc82d03..b80f19d2 100644 --- a/lib/requests/packages/urllib3/__init__.py +++ b/lib/requests/packages/urllib3/__init__.py @@ -55,9 +55,14 @@ def add_stderr_logger(level=logging.DEBUG): del NullHandler -# Set security warning to only go off once by default. import warnings -warnings.simplefilter('always', exceptions.SecurityWarning) +# SecurityWarning's always go off by default. +warnings.simplefilter('always', exceptions.SecurityWarning, append=True) +# SubjectAltNameWarning's should go off once per host +warnings.simplefilter('default', exceptions.SubjectAltNameWarning) +# InsecurePlatformWarning's don't vary between requests, so we keep it default. +warnings.simplefilter('default', exceptions.InsecurePlatformWarning, + append=True) def disable_warnings(category=exceptions.HTTPWarning): """ diff --git a/lib/requests/packages/urllib3/_collections.py b/lib/requests/packages/urllib3/_collections.py index 784342a4..b68b9a59 100644 --- a/lib/requests/packages/urllib3/_collections.py +++ b/lib/requests/packages/urllib3/_collections.py @@ -1,7 +1,7 @@ from collections import Mapping, MutableMapping try: from threading import RLock -except ImportError: # Platform-specific: No threads available +except ImportError: # Platform-specific: No threads available class RLock: def __enter__(self): pass @@ -10,11 +10,11 @@ except ImportError: # Platform-specific: No threads available pass -try: # Python 2.7+ +try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict -from .packages.six import iterkeys, itervalues +from .packages.six import iterkeys, itervalues, PY3 __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] @@ -129,25 +129,82 @@ class HTTPHeaderDict(MutableMapping): 'foo=bar, baz=quxx' >>> headers['Content-Length'] '7' - - If you want to access the raw headers with their original casing - for debugging purposes you can access the private ``._data`` attribute - which is a normal python ``dict`` that maps the case-insensitive key to a - list of tuples stored as (case-sensitive-original-name, value). Using the - structure from above as our example: - - >>> headers._data - {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], - 'content-length': [('content-length', '7')]} """ def __init__(self, headers=None, **kwargs): - self._data = {} - if headers is None: - headers = {} - self.update(headers, **kwargs) + super(HTTPHeaderDict, self).__init__() + self._container = {} + if headers is not None: + if isinstance(headers, HTTPHeaderDict): + self._copy_from(headers) + else: + self.extend(headers) + if kwargs: + self.extend(kwargs) - def add(self, key, value): + def __setitem__(self, key, val): + self._container[key.lower()] = (key, val) + return self._container[key.lower()] + + def __getitem__(self, key): + val = self._container[key.lower()] + return ', '.join(val[1:]) + + def __delitem__(self, key): + del self._container[key.lower()] + + def __contains__(self, key): + return key.lower() in self._container + + def __eq__(self, other): + if not isinstance(other, Mapping) and not hasattr(other, 'keys'): + return False + if not isinstance(other, type(self)): + other = type(self)(other) + return (dict((k.lower(), v) for k, v in self.itermerged()) == + dict((k.lower(), v) for k, v in other.itermerged())) + + def __ne__(self, other): + return not self.__eq__(other) + + if not PY3: # Python 2 + iterkeys = MutableMapping.iterkeys + itervalues = MutableMapping.itervalues + + __marker = object() + + def __len__(self): + return len(self._container) + + def __iter__(self): + # Only provide the originally cased names + for vals in self._container.values(): + yield vals[0] + + def pop(self, key, default=__marker): + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + ''' + # Using the MutableMapping function directly fails due to the private marker. + # Using ordinary dict.pop would expose the internal structures. + # So let's reinvent the wheel. + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + return default + else: + del self[key] + return value + + def discard(self, key): + try: + del self[key] + except KeyError: + pass + + def add(self, key, val): """Adds a (name, value) pair, doesn't overwrite the value if it already exists. @@ -156,43 +213,111 @@ class HTTPHeaderDict(MutableMapping): >>> headers['foo'] 'bar, baz' """ - self._data.setdefault(key.lower(), []).append((key, value)) + key_lower = key.lower() + new_vals = key, val + # Keep the common case aka no item present as fast as possible + vals = self._container.setdefault(key_lower, new_vals) + if new_vals is not vals: + # new_vals was not inserted, as there was a previous one + if isinstance(vals, list): + # If already several items got inserted, we have a list + vals.append(val) + else: + # vals should be a tuple then, i.e. only one item so far + # Need to convert the tuple to list for further extension + self._container[key_lower] = [vals[0], vals[1], val] + + def extend(self, *args, **kwargs): + """Generic import function for any type of header-like object. + Adapted version of MutableMapping.update in order to insert items + with self.add instead of self.__setitem__ + """ + if len(args) > 1: + raise TypeError("extend() takes at most 1 positional " + "arguments ({} given)".format(len(args))) + other = args[0] if len(args) >= 1 else () + + if isinstance(other, HTTPHeaderDict): + for key, val in other.iteritems(): + self.add(key, val) + elif isinstance(other, Mapping): + for key in other: + self.add(key, other[key]) + elif hasattr(other, "keys"): + for key in other.keys(): + self.add(key, other[key]) + else: + for key, value in other: + self.add(key, value) + + for key, value in kwargs.items(): + self.add(key, value) def getlist(self, key): """Returns a list of all the values for the named field. Returns an empty list if the key doesn't exist.""" - return self[key].split(', ') if key in self else [] + try: + vals = self._container[key.lower()] + except KeyError: + return [] + else: + if isinstance(vals, tuple): + return [vals[1]] + else: + return vals[1:] - def copy(self): - h = HTTPHeaderDict() - for key in self._data: - for rawkey, value in self._data[key]: - h.add(rawkey, value) - return h - - def __eq__(self, other): - if not isinstance(other, Mapping): - return False - other = HTTPHeaderDict(other) - return dict((k1, self[k1]) for k1 in self._data) == \ - dict((k2, other[k2]) for k2 in other._data) - - def __getitem__(self, key): - values = self._data[key.lower()] - return ', '.join(value[1] for value in values) - - def __setitem__(self, key, value): - self._data[key.lower()] = [(key, value)] - - def __delitem__(self, key): - del self._data[key.lower()] - - def __len__(self): - return len(self._data) - - def __iter__(self): - for headers in itervalues(self._data): - yield headers[0][0] + # Backwards compatibility for httplib + getheaders = getlist + getallmatchingheaders = getlist + iget = getlist def __repr__(self): - return '%s(%r)' % (self.__class__.__name__, dict(self.items())) + return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) + + def _copy_from(self, other): + for key in other: + val = other.getlist(key) + if isinstance(val, list): + # Don't need to convert tuples + val = list(val) + self._container[key.lower()] = [key] + val + + def copy(self): + clone = type(self)() + clone._copy_from(self) + return clone + + def iteritems(self): + """Iterate over all header lines, including duplicate ones.""" + for key in self: + vals = self._container[key.lower()] + for val in vals[1:]: + yield vals[0], val + + def itermerged(self): + """Iterate over all headers, merging duplicate ones together.""" + for key in self: + val = self._container[key.lower()] + yield val[0], ', '.join(val[1:]) + + def items(self): + return list(self.iteritems()) + + @classmethod + def from_httplib(cls, message): # Python 2 + """Read headers from a Python 2 httplib message object.""" + # python2.7 does not expose a proper API for exporting multiheaders + # efficiently. This function re-reads raw lines from the message + # object and extracts the multiheaders properly. + headers = [] + + for line in message.headers: + if line.startswith((' ', '\t')): + key, value = headers[-1] + headers[-1] = (key, value + '\r\n' + line.rstrip()) + continue + + key, value = line.split(':', 1) + headers.append((key, value.strip())) + + return cls(headers) diff --git a/lib/requests/packages/urllib3/connection.py b/lib/requests/packages/urllib3/connection.py index e5de769d..3eab1e28 100644 --- a/lib/requests/packages/urllib3/connection.py +++ b/lib/requests/packages/urllib3/connection.py @@ -1,7 +1,7 @@ import datetime import sys import socket -from socket import timeout as SocketTimeout +from socket import error as SocketError, timeout as SocketTimeout import warnings from .packages import six @@ -36,9 +36,10 @@ except NameError: # Python 2: from .exceptions import ( + NewConnectionError, ConnectTimeoutError, + SubjectAltNameWarning, SystemTimeWarning, - SecurityWarning, ) from .packages.ssl_match_hostname import match_hostname @@ -133,11 +134,15 @@ class HTTPConnection(_HTTPConnection, object): conn = connection.create_connection( (self.host, self.port), self.timeout, **extra_kw) - except SocketTimeout: + except SocketTimeout as e: raise ConnectTimeoutError( self, "Connection to %s timed out. (connect timeout=%s)" % (self.host, self.timeout)) + except SocketError as e: + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e) + return conn def _prepare_conn(self, conn): @@ -185,17 +190,23 @@ class VerifiedHTTPSConnection(HTTPSConnection): """ cert_reqs = None ca_certs = None + ca_cert_dir = None ssl_version = None assert_fingerprint = None def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, - assert_hostname=None, assert_fingerprint=None): + assert_hostname=None, assert_fingerprint=None, + ca_cert_dir=None): + + if (ca_certs or ca_cert_dir) and cert_reqs is None: + cert_reqs = 'CERT_REQUIRED' self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs self.ca_certs = ca_certs + self.ca_cert_dir = ca_cert_dir self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint @@ -234,6 +245,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, cert_reqs=resolved_cert_reqs, ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, server_hostname=hostname, ssl_version=resolved_ssl_version) @@ -245,10 +257,11 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert = self.sock.getpeercert() if not cert.get('subjectAltName', ()): warnings.warn(( - 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. ' - 'This feature is being removed by major browsers and deprecated by RFC 2818. ' - '(See https://github.com/shazow/urllib3/issues/497 for details.)'), - SecurityWarning + 'Certificate for {0} has no `subjectAltName`, falling back to check for a ' + '`commonName` for now. This feature is being removed by major browsers and ' + 'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 ' + 'for details.)'.format(hostname)), + SubjectAltNameWarning ) match_hostname(cert, self.assert_hostname or hostname) @@ -260,3 +273,5 @@ if ssl: # Make a copy for testing. UnverifiedHTTPSConnection = HTTPSConnection HTTPSConnection = VerifiedHTTPSConnection +else: + HTTPSConnection = DummyConnection diff --git a/lib/requests/packages/urllib3/connectionpool.py b/lib/requests/packages/urllib3/connectionpool.py index 70ee4eed..b38ac68d 100644 --- a/lib/requests/packages/urllib3/connectionpool.py +++ b/lib/requests/packages/urllib3/connectionpool.py @@ -17,14 +17,17 @@ from .exceptions import ( ClosedPoolError, ProtocolError, EmptyPoolError, + HeaderParsingError, HostChangedError, LocationValueError, MaxRetryError, ProxyError, + ConnectTimeoutError, ReadTimeoutError, SSLError, TimeoutError, InsecureRequestWarning, + NewConnectionError, ) from .packages.ssl_match_hostname import CertificateError from .packages import six @@ -38,9 +41,10 @@ from .request import RequestMethods from .response import HTTPResponse from .util.connection import is_connection_dropped +from .util.response import assert_header_parsing from .util.retry import Retry from .util.timeout import Timeout -from .util.url import get_host +from .util.url import get_host, Url xrange = six.moves.xrange @@ -72,6 +76,21 @@ class ConnectionPool(object): return '%s(host=%r, port=%r)' % (type(self).__name__, self.host, self.port) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(): + """ + Close all pooled connections and disable the pool. + """ + pass + + # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) @@ -105,7 +124,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param maxsize: Number of connections to save that can be reused. More than 1 is useful - in multithreaded situations. If ``block`` is set to false, more + in multithreaded situations. If ``block`` is set to False, more connections will be created but they will not be saved once they've been used. @@ -266,6 +285,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ pass + def _prepare_proxy(self, conn): + # Nothing to do for HTTP connections. + pass + def _get_timeout(self, timeout): """ Helper that always returns a :class:`urllib3.util.Timeout` """ if timeout is _Default: @@ -349,7 +372,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() @@ -362,8 +385,19 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, httplib_response.status, httplib_response.length)) + + try: + assert_header_parsing(httplib_response.msg) + except HeaderParsingError as hpe: # Platform-specific: Python 3 + log.warning( + 'Failed to parse headers (url=%s): %s', + self._absolute_url(url), hpe, exc_info=True) + return httplib_response + def _absolute_url(self, path): + return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url + def close(self): """ Close all pooled connections and disable the pool. @@ -510,11 +544,18 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) conn = self._get_conn(timeout=pool_timeout) + conn.timeout = timeout_obj.connect_timeout + + is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None) + if is_new_proxy_conn: + self._prepare_proxy(conn) + # Make the request on the httplib connection object. httplib_response = self._make_request(conn, method, url, - timeout=timeout, + timeout=timeout_obj, body=body, headers=headers) # If we're going to release the connection in ``finally:``, then @@ -542,26 +583,30 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Close the connection. If a connection is reused on which there # was a Certificate error, the next request will certainly raise # another Certificate error. - if conn: - conn.close() - conn = None + conn = conn and conn.close() + release_conn = True raise SSLError(e) - except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: - if conn: - # Discard the connection for these exceptions. It will be - # be replaced during the next _get_conn() call. - conn.close() - conn = None + except SSLError: + # Treat SSLError separately from BaseSSLError to preserve + # traceback. + conn = conn and conn.close() + release_conn = True + raise - stacktrace = sys.exc_info()[2] - if isinstance(e, SocketError) and self.proxy: + except (TimeoutError, HTTPException, SocketError, ProtocolError) as e: + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + conn = conn and conn.close() + release_conn = True + + if isinstance(e, (SocketError, NewConnectionError)) and self.proxy: e = ProxyError('Cannot connect to proxy.', e) elif isinstance(e, (SocketError, HTTPException)): e = ProtocolError('Connection aborted.', e) - retries = retries.increment(method, url, error=e, - _pool=self, _stacktrace=stacktrace) + retries = retries.increment(method, url, error=e, _pool=self, + _stacktrace=sys.exc_info()[2]) retries.sleep() # Keep track of the error for the retry warning. @@ -593,6 +638,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): retries = retries.increment(method, url, response=response, _pool=self) except MaxRetryError: if retries.raise_on_redirect: + # Release the connection for this response, since we're not + # returning it to be released manually. + response.release_conn() raise return response @@ -629,10 +677,10 @@ class HTTPSConnectionPool(HTTPConnectionPool): ``assert_hostname`` and ``host`` in this order to verify connections. If ``assert_hostname`` is False, no verification is done. - The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and - ``ssl_version`` are only used if :mod:`ssl` is available and are fed into - :meth:`urllib3.util.ssl_wrap_socket` to upgrade the connection socket - into an SSL socket. + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``, + ``ca_cert_dir``, and ``ssl_version`` are only used if :mod:`ssl` is + available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade + the connection socket into an SSL socket. """ scheme = 'https' @@ -645,15 +693,20 @@ class HTTPSConnectionPool(HTTPConnectionPool): key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, assert_hostname=None, assert_fingerprint=None, - **conn_kw): + ca_cert_dir=None, **conn_kw): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, block, headers, retries, _proxy, _proxy_headers, **conn_kw) + + if ca_certs and cert_reqs is None: + cert_reqs = 'CERT_REQUIRED' + self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs self.ca_certs = ca_certs + self.ca_cert_dir = ca_cert_dir self.ssl_version = ssl_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint @@ -669,28 +722,31 @@ class HTTPSConnectionPool(HTTPConnectionPool): cert_file=self.cert_file, cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - if self.proxy is not None: - # Python 2.7+ - try: - set_tunnel = conn.set_tunnel - except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = conn._set_tunnel - - if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older - set_tunnel(self.host, self.port) - else: - set_tunnel(self.host, self.port, self.proxy_headers) - - # Establish tunnel connection early, because otherwise httplib - # would improperly set Host: header to proxy's IP:port. - conn.connect() - return conn + def _prepare_proxy(self, conn): + """ + Establish tunnel connection early, because otherwise httplib + would improperly set Host: header to proxy's IP:port. + """ + # Python 2.7+ + try: + set_tunnel = conn.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = conn._set_tunnel + + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) + + conn.connect() + def _new_conn(self): """ Return a fresh :class:`httplib.HTTPSConnection`. @@ -700,7 +756,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): % (self.num_connections, self.host)) if not self.ConnectionCls or self.ConnectionCls is DummyConnection: - # Platform-specific: Python without ssl raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") diff --git a/lib/requests/packages/urllib3/contrib/appengine.py b/lib/requests/packages/urllib3/contrib/appengine.py new file mode 100644 index 00000000..ed9d8b81 --- /dev/null +++ b/lib/requests/packages/urllib3/contrib/appengine.py @@ -0,0 +1,222 @@ +import logging +import os +import warnings + +from ..exceptions import ( + HTTPError, + HTTPWarning, + MaxRetryError, + ProtocolError, + TimeoutError, + SSLError +) + +from ..packages.six import BytesIO +from ..request import RequestMethods +from ..response import HTTPResponse +from ..util.timeout import Timeout +from ..util.retry import Retry + +try: + from google.appengine.api import urlfetch +except ImportError: + urlfetch = None + + +log = logging.getLogger(__name__) + + +class AppEnginePlatformWarning(HTTPWarning): + pass + + +class AppEnginePlatformError(HTTPError): + pass + + +class AppEngineManager(RequestMethods): + """ + Connection manager for Google App Engine sandbox applications. + + This manager uses the URLFetch service directly instead of using the + emulated httplib, and is subject to URLFetch limitations as described in + the App Engine documentation here: + + https://cloud.google.com/appengine/docs/python/urlfetch + + Notably it will raise an AppEnginePlatformError if: + * URLFetch is not available. + * If you attempt to use this on GAEv2 (Managed VMs), as full socket + support is available. + * If a request size is more than 10 megabytes. + * If a response size is more than 32 megabtyes. + * If you use an unsupported request method such as OPTIONS. + + Beyond those cases, it will raise normal urllib3 errors. + """ + + def __init__(self, headers=None, retries=None, validate_certificate=True): + if not urlfetch: + raise AppEnginePlatformError( + "URLFetch is not available in this environment.") + + if is_prod_appengine_v2(): + raise AppEnginePlatformError( + "Use normal urllib3.PoolManager instead of AppEngineManager" + "on Managed VMs, as using URLFetch is not necessary in " + "this environment.") + + warnings.warn( + "urllib3 is using URLFetch on Google App Engine sandbox instead " + "of sockets. To use sockets directly instead of URLFetch see " + "https://urllib3.readthedocs.org/en/latest/contrib.html.", + AppEnginePlatformWarning) + + RequestMethods.__init__(self, headers) + self.validate_certificate = validate_certificate + + self.retries = retries or Retry.DEFAULT + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # Return False to re-raise any potential exceptions + return False + + def urlopen(self, method, url, body=None, headers=None, + retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT, + **response_kw): + + retries = self._get_retries(retries, redirect) + + try: + response = urlfetch.fetch( + url, + payload=body, + method=method, + headers=headers or {}, + allow_truncated=False, + follow_redirects=( + redirect and + retries.redirect != 0 and + retries.total), + deadline=self._get_absolute_timeout(timeout), + validate_certificate=self.validate_certificate, + ) + except urlfetch.DeadlineExceededError as e: + raise TimeoutError(self, e) + + except urlfetch.InvalidURLError as e: + if 'too large' in e.message: + raise AppEnginePlatformError( + "URLFetch request too large, URLFetch only " + "supports requests up to 10mb in size.", e) + raise ProtocolError(e) + + except urlfetch.DownloadError as e: + if 'Too many redirects' in e.message: + raise MaxRetryError(self, url, reason=e) + raise ProtocolError(e) + + except urlfetch.ResponseTooLargeError as e: + raise AppEnginePlatformError( + "URLFetch response too large, URLFetch only supports" + "responses up to 32mb in size.", e) + + except urlfetch.SSLCertificateError as e: + raise SSLError(e) + + except urlfetch.InvalidMethodError as e: + raise AppEnginePlatformError( + "URLFetch does not support method: %s" % method, e) + + http_response = self._urlfetch_response_to_http_response( + response, **response_kw) + + # Check for redirect response + if (http_response.get_redirect_location() and + retries.raise_on_redirect and redirect): + raise MaxRetryError(self, url, "too many redirects") + + # Check if we should retry the HTTP response. + if retries.is_forced_retry(method, status_code=http_response.status): + retries = retries.increment( + method, url, response=http_response, _pool=self) + log.info("Forced retry: %s" % url) + retries.sleep() + return self.urlopen( + method, url, + body=body, headers=headers, + retries=retries, redirect=redirect, + timeout=timeout, **response_kw) + + return http_response + + def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw): + + if is_prod_appengine_v1(): + # Production GAE handles deflate encoding automatically, but does + # not remove the encoding header. + content_encoding = urlfetch_resp.headers.get('content-encoding') + + if content_encoding == 'deflate': + del urlfetch_resp.headers['content-encoding'] + + return HTTPResponse( + # In order for decoding to work, we must present the content as + # a file-like object. + body=BytesIO(urlfetch_resp.content), + headers=urlfetch_resp.headers, + status=urlfetch_resp.status_code, + **response_kw + ) + + def _get_absolute_timeout(self, timeout): + if timeout is Timeout.DEFAULT_TIMEOUT: + return 5 # 5s is the default timeout for URLFetch. + if isinstance(timeout, Timeout): + if not timeout.read is timeout.connect: + warnings.warn( + "URLFetch does not support granular timeout settings, " + "reverting to total timeout.", AppEnginePlatformWarning) + return timeout.total + return timeout + + def _get_retries(self, retries, redirect): + if not isinstance(retries, Retry): + retries = Retry.from_int( + retries, redirect=redirect, default=self.retries) + + if retries.connect or retries.read or retries.redirect: + warnings.warn( + "URLFetch only supports total retries and does not " + "recognize connect, read, or redirect retry parameters.", + AppEnginePlatformWarning) + + return retries + + +def is_appengine(): + return (is_local_appengine() or + is_prod_appengine_v1() or + is_prod_appengine_v2()) + + +def is_appengine_sandbox(): + return is_appengine() and not is_prod_appengine_v2() + + +def is_local_appengine(): + return ('APPENGINE_RUNTIME' in os.environ and + 'Development/' in os.environ['SERVER_SOFTWARE']) + + +def is_prod_appengine_v1(): + return ('APPENGINE_RUNTIME' in os.environ and + 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and + not is_prod_appengine_v2()) + + +def is_prod_appengine_v2(): + return os.environ.get('GAE_VM', False) == 'true' diff --git a/lib/requests/packages/urllib3/contrib/pyopenssl.py b/lib/requests/packages/urllib3/contrib/pyopenssl.py index 8229090c..c20ae46d 100644 --- a/lib/requests/packages/urllib3/contrib/pyopenssl.py +++ b/lib/requests/packages/urllib3/contrib/pyopenssl.py @@ -38,8 +38,6 @@ Module Variables ---------------- :var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. - Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: - ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS`` .. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication .. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) @@ -85,23 +83,16 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } -# A secure default. -# Sources for more information on TLS ciphers: -# -# - https://wiki.mozilla.org/Security/Server_Side_TLS -# - https://www.ssllabs.com/projects/best-practices/index.html -# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ -# -# The general intent is: -# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), -# - prefer ECDHE over DHE for better performance, -# - prefer any AES-GCM over any AES-CBC for better performance and security, -# - use 3DES as fallback which is secure but slow, -# - disable NULL authentication, MD5 MACs and DSS for security reasons. -DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ - "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ - "!aNULL:!MD5:!DSS" +DEFAULT_SSL_CIPHER_LIST = util.ssl_.DEFAULT_CIPHERS +# OpenSSL will only write 16K at a time +SSL_WRITE_BLOCKSIZE = 16384 + +try: + _ = memoryview + has_memoryview = True +except NameError: + has_memoryview = False orig_util_HAS_SNI = util.HAS_SNI orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket @@ -191,6 +182,11 @@ class WrappedSocket(object): return b'' else: raise + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return b'' + else: + raise except OpenSSL.SSL.WantReadError: rd, wd, ed = select.select( [self.socket], [], [], self.socket.gettimeout()) @@ -216,13 +212,21 @@ class WrappedSocket(object): continue def sendall(self, data): - while len(data): - sent = self._send_until_done(data) - data = data[sent:] + if has_memoryview and not isinstance(data, memoryview): + data = memoryview(data) + + total_sent = 0 + while total_sent < len(data): + sent = self._send_until_done(data[total_sent:total_sent+SSL_WRITE_BLOCKSIZE]) + total_sent += sent + + def shutdown(self): + # FIXME rethrow compatible exceptions should we ever use this + self.connection.shutdown() def close(self): if self._makefile_refs < 1: - return self.connection.shutdown() + return self.connection.close() else: self._makefile_refs -= 1 @@ -263,7 +267,7 @@ def _verify_callback(cnx, x509, err_no, err_depth, return_code): def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ca_certs=None, server_hostname=None, - ssl_version=None): + ssl_version=None, ca_cert_dir=None): ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) if certfile: keyfile = keyfile or certfile # Match behaviour of the normal python ssl library @@ -272,9 +276,9 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ctx.use_privatekey_file(keyfile) if cert_reqs != ssl.CERT_NONE: ctx.set_verify(_openssl_verify[cert_reqs], _verify_callback) - if ca_certs: + if ca_certs or ca_cert_dir: try: - ctx.load_verify_locations(ca_certs, None) + ctx.load_verify_locations(ca_certs, ca_cert_dir) except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) else: @@ -294,10 +298,12 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, try: cnx.do_handshake() except OpenSSL.SSL.WantReadError: - select.select([sock], [], []) + rd, _, _ = select.select([sock], [], [], sock.gettimeout()) + if not rd: + raise timeout('select timed out') continue except OpenSSL.SSL.Error as e: - raise ssl.SSLError('bad handshake', e) + raise ssl.SSLError('bad handshake: %r' % e) break return WrappedSocket(cnx, sock) diff --git a/lib/requests/packages/urllib3/exceptions.py b/lib/requests/packages/urllib3/exceptions.py index 0c6fd3c5..9607d65f 100644 --- a/lib/requests/packages/urllib3/exceptions.py +++ b/lib/requests/packages/urllib3/exceptions.py @@ -112,6 +112,9 @@ class ConnectTimeoutError(TimeoutError): "Raised when a socket timeout occurs while connecting to a server" pass +class NewConnectionError(ConnectTimeoutError, PoolError): + "Raised when we fail to establish a new connection. Usually ECONNREFUSED." + pass class EmptyPoolError(PoolError): "Raised when a pool runs out of connections and no more are allowed." @@ -149,6 +152,11 @@ class SecurityWarning(HTTPWarning): pass +class SubjectAltNameWarning(SecurityWarning): + "Warned when connecting to a host with a certificate missing a SAN." + pass + + class InsecureRequestWarning(SecurityWarning): "Warned when making an unverified HTTPS request." pass @@ -157,3 +165,29 @@ class InsecureRequestWarning(SecurityWarning): class SystemTimeWarning(SecurityWarning): "Warned when system time is suspected to be wrong" pass + + +class InsecurePlatformWarning(SecurityWarning): + "Warned when certain SSL configuration is not available on a platform." + pass + + +class ResponseNotChunked(ProtocolError, ValueError): + "Response needs to be chunked in order to read it as chunks." + pass + + +class ProxySchemeUnknown(AssertionError, ValueError): + "ProxyManager does not support the supplied scheme" + # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. + + def __init__(self, scheme): + message = "Not supported proxy scheme %s" % scheme + super(ProxySchemeUnknown, self).__init__(message) + + +class HeaderParsingError(HTTPError): + "Raised by assert_header_parsing, but we convert it to a log.warning statement." + def __init__(self, defects, unparsed_data): + message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data) + super(HeaderParsingError, self).__init__(message) diff --git a/lib/requests/packages/urllib3/poolmanager.py b/lib/requests/packages/urllib3/poolmanager.py index 515dc962..76b6a129 100644 --- a/lib/requests/packages/urllib3/poolmanager.py +++ b/lib/requests/packages/urllib3/poolmanager.py @@ -8,7 +8,7 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme -from .exceptions import LocationValueError +from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry @@ -64,6 +64,14 @@ class PoolManager(RequestMethods): self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.clear() + # Return False to re-raise any potential exceptions + return False + def _new_pool(self, scheme, host, port): """ Create a new :class:`ConnectionPool` based on host, port and scheme. @@ -167,7 +175,14 @@ class PoolManager(RequestMethods): if not isinstance(retries, Retry): retries = Retry.from_int(retries, redirect=redirect) - kw['retries'] = retries.increment(method, redirect_location) + try: + retries = retries.increment(method, url, response=response, _pool=conn) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + + kw['retries'] = retries kw['redirect'] = redirect log.info("Redirecting %s -> %s" % (url, redirect_location)) @@ -212,8 +227,8 @@ class ProxyManager(PoolManager): port = port_by_scheme.get(proxy.scheme, 80) proxy = proxy._replace(port=port) - assert proxy.scheme in ("http", "https"), \ - 'Not supported proxy scheme %s' % proxy.scheme + if proxy.scheme not in ("http", "https"): + raise ProxySchemeUnknown(proxy.scheme) self.proxy = proxy self.proxy_headers = proxy_headers or {} diff --git a/lib/requests/packages/urllib3/request.py b/lib/requests/packages/urllib3/request.py index b08d6c92..a1a12bc5 100644 --- a/lib/requests/packages/urllib3/request.py +++ b/lib/requests/packages/urllib3/request.py @@ -71,14 +71,22 @@ class RequestMethods(object): headers=headers, **urlopen_kw) - def request_encode_url(self, method, url, fields=None, **urlopen_kw): + def request_encode_url(self, method, url, fields=None, headers=None, + **urlopen_kw): """ Make a request using :meth:`urlopen` with the ``fields`` encoded in the url. This is useful for request methods like GET, HEAD, DELETE, etc. """ + if headers is None: + headers = self.headers + + extra_kw = {'headers': headers} + extra_kw.update(urlopen_kw) + if fields: url += '?' + urlencode(fields) - return self.urlopen(method, url, **urlopen_kw) + + return self.urlopen(method, url, **extra_kw) def request_encode_body(self, method, url, fields=None, headers=None, encode_multipart=True, multipart_boundary=None, diff --git a/lib/requests/packages/urllib3/response.py b/lib/requests/packages/urllib3/response.py index e69de957..788eb6ca 100644 --- a/lib/requests/packages/urllib3/response.py +++ b/lib/requests/packages/urllib3/response.py @@ -1,13 +1,16 @@ +from contextlib import contextmanager import zlib import io from socket import timeout as SocketTimeout from ._collections import HTTPHeaderDict -from .exceptions import ProtocolError, DecodeError, ReadTimeoutError -from .packages.six import string_types as basestring, binary_type +from .exceptions import ( + ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked +) +from .packages.six import string_types as basestring, binary_type, PY3 +from .packages.six.moves import http_client as httplib from .connection import HTTPException, BaseSSLError -from .util.response import is_fp_closed - +from .util.response import is_fp_closed, is_response_to_head class DeflateDecoder(object): @@ -21,6 +24,9 @@ class DeflateDecoder(object): return getattr(self._obj, name) def decompress(self, data): + if not data: + return data + if not self._first_try: return self._obj.decompress(data) @@ -36,9 +42,23 @@ class DeflateDecoder(object): self._data = None +class GzipDecoder(object): + + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + return self._obj.decompress(data) + + def _get_decoder(mode): if mode == 'gzip': - return zlib.decompressobj(16 + zlib.MAX_WBITS) + return GzipDecoder() return DeflateDecoder() @@ -76,9 +96,10 @@ class HTTPResponse(io.IOBase): strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = HTTPHeaderDict() - if headers: - self.headers.update(headers) + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) self.status = status self.version = version self.reason = reason @@ -100,7 +121,17 @@ class HTTPResponse(io.IOBase): if hasattr(body, 'read'): self._fp = body - if preload_content and not self._body: + # Are we using the chunked-style of transfer encoding? + self.chunked = False + self.chunk_left = None + tr_enc = self.headers.get('transfer-encoding', '').lower() + # Don't incur the penalty of creating a list and then discarding it + encodings = (enc.strip() for enc in tr_enc.split(",")) + if "chunked" in encodings: + self.chunked = True + + # We certainly don't want to preload content when the response is chunked. + if not self.chunked and preload_content and not self._body: self._body = self.read(decode_content=decode_content) def get_redirect_location(self): @@ -140,6 +171,76 @@ class HTTPResponse(io.IOBase): """ return self._fp_bytes_read + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessar. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None and content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + try: + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + content_encoding = self.headers.get('content-encoding', '').lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, e) + + if flush_decoder and decode_content and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() + + return data + + @contextmanager + def _error_catcher(self): + """ + Catch low-level python exceptions, instead re-raising urllib3 + variants, so that low-level exceptions are not leaked in the + high-level api. + + On exit, release the connection back to the pool. + """ + try: + try: + yield + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if 'read operation timed out' not in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except HTTPException as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) + except Exception: + # The response may not be closed but we're not going to use it anymore + # so close it now to ensure that the connection is released back to the pool. + if self._original_response and not self._original_response.isclosed(): + self._original_response.close() + + raise + finally: + if self._original_response and self._original_response.isclosed(): + self.release_conn() + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -161,12 +262,7 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 7230 - # Section 3.2 - content_encoding = self.headers.get('content-encoding', '').lower() - if self._decoder is None: - if content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) + self._init_decoder() if decode_content is None: decode_content = self.decode_content @@ -174,67 +270,37 @@ class HTTPResponse(io.IOBase): return flush_decoder = False + data = None - try: - try: - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() + with self._error_catcher(): + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() + flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) - if amt != 0 and not data: # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do - # not properly close the connection in all cases. There is - # no harm in redundantly calling close. - self._fp.close() - flush_decoder = True - - except SocketTimeout: - # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but - # there is yet no clean way to get at it from this context. - raise ReadTimeoutError(self._pool, None, 'Read timed out.') - - except BaseSSLError as e: - # FIXME: Is there a better way to differentiate between SSLErrors? - if not 'read operation timed out' in str(e): # Defensive: - # This shouldn't happen but just in case we're missing an edge - # case, let's avoid swallowing SSL errors. - raise - - raise ReadTimeoutError(self._pool, None, 'Read timed out.') - - except HTTPException as e: - # This includes IncompleteRead. - raise ProtocolError('Connection broken: %r' % e, e) + if data: self._fp_bytes_read += len(data) - try: - if decode_content and self._decoder: - data = self._decoder.decompress(data) - except (IOError, zlib.error) as e: - raise DecodeError( - "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, e) - - if flush_decoder and decode_content and self._decoder: - buf = self._decoder.decompress(binary_type()) - data += buf + self._decoder.flush() + data = self._decode(data, decode_content, flush_decoder) if cache_content: self._body = data - return data + return data - finally: - if self._original_response and self._original_response.isclosed(): - self.release_conn() def stream(self, amt=2**16, decode_content=None): """ @@ -252,11 +318,15 @@ class HTTPResponse(io.IOBase): If True, will attempt to decode the body based on the 'content-encoding' header. """ - while not is_fp_closed(self._fp): - data = self.read(amt=amt, decode_content=decode_content) + if self.chunked: + for line in self.read_chunked(amt, decode_content=decode_content): + yield line + else: + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) - if data: - yield data + if data: + yield data @classmethod def from_httplib(ResponseCls, r, **response_kw): @@ -267,14 +337,17 @@ class HTTPResponse(io.IOBase): Remaining parameters are passed to the HTTPResponse constructor, along with ``original_response=r``. """ + headers = r.msg - headers = HTTPHeaderDict() - for k, v in r.getheaders(): - headers.add(k, v) + if not isinstance(headers, HTTPHeaderDict): + if PY3: # Python 3 + headers = HTTPHeaderDict(headers.items()) + else: # Python 2 + headers = HTTPHeaderDict.from_httplib(headers) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) - return ResponseCls(body=r, + resp = ResponseCls(body=r, headers=headers, status=r.status, version=r.version, @@ -282,6 +355,7 @@ class HTTPResponse(io.IOBase): strict=strict, original_response=r, **response_kw) + return resp # Backwards-compatibility methods for httplib.HTTPResponse def getheaders(self): @@ -331,3 +405,81 @@ class HTTPResponse(io.IOBase): else: b[:len(temp)] = temp return len(temp) + + def _update_chunk_length(self): + # First, we'll figure out length of a chunk and then + # we'll try to read it from socket. + if self.chunk_left is not None: + return + line = self._fp.fp.readline() + line = line.split(b';', 1)[0] + try: + self.chunk_left = int(line, 16) + except ValueError: + # Invalid chunked protocol response, abort. + self.close() + raise httplib.IncompleteRead(line) + + def _handle_chunk(self, amt): + returned_chunk = None + if amt is None: + chunk = self._fp._safe_read(self.chunk_left) + returned_chunk = chunk + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + elif amt < self.chunk_left: + value = self._fp._safe_read(amt) + self.chunk_left = self.chunk_left - amt + returned_chunk = value + elif amt == self.chunk_left: + value = self._fp._safe_read(amt) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + returned_chunk = value + else: # amt > self.chunk_left + returned_chunk = self._fp._safe_read(self.chunk_left) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + return returned_chunk + + def read_chunked(self, amt=None, decode_content=None): + """ + Similar to :meth:`HTTPResponse.read`, but with an additional + parameter: ``decode_content``. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + self._init_decoder() + # FIXME: Rewrite this method and make it a class with a better structured logic. + if not self.chunked: + raise ResponseNotChunked("Response is not chunked. " + "Header 'transfer-encoding: chunked' is missing.") + + # Don't bother reading the body of a HEAD request. + if self._original_response and is_response_to_head(self._original_response): + self._original_response.close() + return + + with self._error_catcher(): + while True: + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + yield self._decode(chunk, decode_content=decode_content, + flush_decoder=True) + + # Chunk content ends with \r\n: discard it. + while True: + line = self._fp.fp.readline() + if not line: + # Some sites may not end with '\r\n'. + break + if line == b'\r\n': + break + + # We read everything; close the "file". + if self._original_response: + self._original_response.close() diff --git a/lib/requests/packages/urllib3/util/connection.py b/lib/requests/packages/urllib3/util/connection.py index 2156993a..4f2f0f18 100644 --- a/lib/requests/packages/urllib3/util/connection.py +++ b/lib/requests/packages/urllib3/util/connection.py @@ -60,6 +60,8 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, """ host, port = address + if host.startswith('['): + host = host.strip('[]') err = None for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): af, socktype, proto, canonname, sa = res @@ -78,15 +80,16 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, sock.connect(sa) return sock - except socket.error as _: - err = _ + except socket.error as e: + err = e if sock is not None: sock.close() + sock = None if err is not None: raise err - else: - raise socket.error("getaddrinfo returns an empty list") + + raise socket.error("getaddrinfo returns an empty list") def _set_socket_options(sock, options): diff --git a/lib/requests/packages/urllib3/util/response.py b/lib/requests/packages/urllib3/util/response.py index 45fff552..2c1de154 100644 --- a/lib/requests/packages/urllib3/util/response.py +++ b/lib/requests/packages/urllib3/util/response.py @@ -1,3 +1,8 @@ +from ..packages.six.moves import http_client as httplib + +from ..exceptions import HeaderParsingError + + def is_fp_closed(obj): """ Checks whether a given file-like object is closed. @@ -20,3 +25,49 @@ def is_fp_closed(obj): pass raise ValueError("Unable to determine whether fp is closed.") + + +def assert_header_parsing(headers): + """ + Asserts whether all headers have been successfully parsed. + Extracts encountered errors from the result of parsing headers. + + Only works on Python 3. + + :param headers: Headers to verify. + :type headers: `httplib.HTTPMessage`. + + :raises urllib3.exceptions.HeaderParsingError: + If parsing errors are found. + """ + + # This will fail silently if we pass in the wrong kind of parameter. + # To make debugging easier add an explicit check. + if not isinstance(headers, httplib.HTTPMessage): + raise TypeError('expected httplib.Message, got {}.'.format( + type(headers))) + + defects = getattr(headers, 'defects', None) + get_payload = getattr(headers, 'get_payload', None) + + unparsed_data = None + if get_payload: # Platform-specific: Python 3. + unparsed_data = get_payload() + + if defects or unparsed_data: + raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) + + +def is_response_to_head(response): + """ + Checks, wether a the request of a response has been a HEAD-request. + Handles the quirks of AppEngine. + + :param conn: + :type conn: :class:`httplib.HTTPResponse` + """ + # FIXME: Can we do this somehow without accessing private httplib _method? + method = response._method + if isinstance(method, int): # Platform-specific: Appengine + return method == 3 + return method.upper() == 'HEAD' diff --git a/lib/requests/packages/urllib3/util/retry.py b/lib/requests/packages/urllib3/util/retry.py index aeaf8a02..1fb1f23b 100644 --- a/lib/requests/packages/urllib3/util/retry.py +++ b/lib/requests/packages/urllib3/util/retry.py @@ -94,7 +94,7 @@ class Retry(object): seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer - than :attr:`Retry.MAX_BACKOFF`. + than :attr:`Retry.BACKOFF_MAX`. By default, backoff is disabled (set to 0). @@ -190,7 +190,7 @@ class Retry(object): return isinstance(err, (ReadTimeoutError, ProtocolError)) def is_forced_retry(self, method, status_code): - """ Is this method/response retryable? (Based on method/codes whitelists) + """ Is this method/status code retryable? (Based on method/codes whitelists) """ if self.method_whitelist and method.upper() not in self.method_whitelist: return False diff --git a/lib/requests/packages/urllib3/util/ssl_.py b/lib/requests/packages/urllib3/util/ssl_.py index a788b1b9..47b817e3 100644 --- a/lib/requests/packages/urllib3/util/ssl_.py +++ b/lib/requests/packages/urllib3/util/ssl_.py @@ -1,17 +1,25 @@ from binascii import hexlify, unhexlify -from hashlib import md5, sha1 +from hashlib import md5, sha1, sha256 -from ..exceptions import SSLError +from ..exceptions import SSLError, InsecurePlatformWarning SSLContext = None HAS_SNI = False create_default_context = None +# Maps the length of a digest to a possible hash function producing this digest +HASHFUNC_MAP = { + 32: md5, + 40: sha1, + 64: sha256, +} + import errno -import ssl +import warnings try: # Test for SSL features + import ssl from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 from ssl import HAS_SNI # Has SNI? except ImportError: @@ -24,14 +32,24 @@ except ImportError: OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 OP_NO_COMPRESSION = 0x20000 -try: - from ssl import _DEFAULT_CIPHERS -except ImportError: - _DEFAULT_CIPHERS = ( - 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' - 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:ECDH+RC4:' - 'DH+RC4:RSA+RC4:!aNULL:!eNULL:!MD5' - ) +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_CIPHERS = ( + 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' + 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:' + '!eNULL:!MD5' +) try: from ssl import SSLContext # Modern SSL? @@ -39,7 +57,8 @@ except ImportError: import sys class SSLContext(object): # Platform-specific: Python 2 & 3.1 - supports_set_ciphers = sys.version_info >= (2, 7) + supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or + (3, 2) <= sys.version_info) def __init__(self, protocol_version): self.protocol = protocol_version @@ -56,8 +75,11 @@ except ImportError: self.certfile = certfile self.keyfile = keyfile - def load_verify_locations(self, location): - self.ca_certs = location + def load_verify_locations(self, cafile=None, capath=None): + self.ca_certs = cafile + + if capath is not None: + raise SSLError("CA directories not supported in older Pythons") def set_ciphers(self, cipher_suite): if not self.supports_set_ciphers: @@ -69,6 +91,14 @@ except ImportError: self.ciphers = cipher_suite def wrap_socket(self, socket, server_hostname=None): + warnings.warn( + 'A true SSLContext object is not available. This prevents ' + 'urllib3 from configuring SSL appropriately and may cause ' + 'certain SSL connections to fail. For more information, see ' + 'https://urllib3.readthedocs.org/en/latest/security.html' + '#insecureplatformwarning.', + InsecurePlatformWarning + ) kwargs = { 'keyfile': self.keyfile, 'certfile': self.certfile, @@ -92,30 +122,21 @@ def assert_fingerprint(cert, fingerprint): Fingerprint as string of hexdigits, can be interspersed by colons. """ - # Maps the length of a digest to a possible hash function producing - # this digest. - hashfunc_map = { - 16: md5, - 20: sha1 - } - fingerprint = fingerprint.replace(':', '').lower() - digest_length, odd = divmod(len(fingerprint), 2) - - if odd or digest_length not in hashfunc_map: - raise SSLError('Fingerprint is of invalid length.') + digest_length = len(fingerprint) + hashfunc = HASHFUNC_MAP.get(digest_length) + if not hashfunc: + raise SSLError( + 'Fingerprint of invalid length: {0}'.format(fingerprint)) # We need encode() here for py32; works on py2 and p33. fingerprint_bytes = unhexlify(fingerprint.encode()) - hashfunc = hashfunc_map[digest_length] - cert_digest = hashfunc(cert).digest() - if not cert_digest == fingerprint_bytes: + if cert_digest != fingerprint_bytes: raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' - .format(hexlify(fingerprint_bytes), - hexlify(cert_digest))) + .format(fingerprint, hexlify(cert_digest))) def resolve_cert_reqs(candidate): @@ -157,7 +178,7 @@ def resolve_ssl_version(candidate): return candidate -def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, +def create_urllib3_context(ssl_version=None, cert_reqs=None, options=None, ciphers=None): """All arguments have the same meaning as ``ssl_wrap_socket``. @@ -194,6 +215,9 @@ def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, """ context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + # Setting the default here, as we may have no ssl module on import + cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs + if options is None: options = 0 # SSLv2 is easily broken and is considered harmful and dangerous @@ -207,20 +231,23 @@ def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, context.options |= options if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 - context.set_ciphers(ciphers or _DEFAULT_CIPHERS) + context.set_ciphers(ciphers or DEFAULT_CIPHERS) context.verify_mode = cert_reqs if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 - context.check_hostname = (context.verify_mode == ssl.CERT_REQUIRED) + # We do our own verification, including fingerprints and alternative + # hostnames. So disable it here + context.check_hostname = False return context def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ca_certs=None, server_hostname=None, - ssl_version=None, ciphers=None, ssl_context=None): + ssl_version=None, ciphers=None, ssl_context=None, + ca_cert_dir=None): """ - All arguments except for server_hostname and ssl_context have the same - meaning as they do when using :func:`ssl.wrap_socket`. + All arguments except for server_hostname, ssl_context, and ca_cert_dir have + the same meaning as they do when using :func:`ssl.wrap_socket`. :param server_hostname: When SNI is supported, the expected hostname of the certificate @@ -230,15 +257,19 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, :param ciphers: A string of ciphers we wish the client to support. This is not supported on Python 2.6 as the ssl module does not support it. + :param ca_cert_dir: + A directory containing CA certificates in multiple separate files, as + supported by OpenSSL's -CApath flag or the capath argument to + SSLContext.load_verify_locations(). """ context = ssl_context if context is None: context = create_urllib3_context(ssl_version, cert_reqs, ciphers=ciphers) - if ca_certs: + if ca_certs or ca_cert_dir: try: - context.load_verify_locations(ca_certs) + context.load_verify_locations(ca_certs, ca_cert_dir) except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 raise SSLError(e) # Py33 raises FileNotFoundError which subclasses OSError @@ -247,6 +278,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, if e.errno == errno.ENOENT: raise SSLError(e) raise + if certfile: context.load_cert_chain(certfile, keyfile) if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI diff --git a/lib/requests/packages/urllib3/util/url.py b/lib/requests/packages/urllib3/util/url.py index b2ec834f..e58050cd 100644 --- a/lib/requests/packages/urllib3/util/url.py +++ b/lib/requests/packages/urllib3/util/url.py @@ -15,6 +15,8 @@ class Url(namedtuple('Url', url_attrs)): def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): + if path and not path.startswith('/'): + path = '/' + path return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) diff --git a/lib/requests/sessions.py b/lib/requests/sessions.py index 4f306963..9c0dd73d 100644 --- a/lib/requests/sessions.py +++ b/lib/requests/sessions.py @@ -62,12 +62,11 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict): merged_setting = dict_class(to_key_val_list(session_setting)) merged_setting.update(to_key_val_list(request_setting)) - # Remove keys that are set to None. - for (k, v) in request_setting.items(): - if v is None: - del merged_setting[k] - - merged_setting = dict((k, v) for (k, v) in merged_setting.items() if v is not None) + # Remove keys that are set to None. Extract keys first to avoid altering + # the dictionary during iteration. + none_keys = [k for (k, v) in merged_setting.items() if v is None] + for key in none_keys: + del merged_setting[key] return merged_setting @@ -90,7 +89,7 @@ def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict): class SessionRedirectMixin(object): def resolve_redirects(self, resp, req, stream=False, timeout=None, - verify=True, cert=None, proxies=None): + verify=True, cert=None, proxies=None, **adapter_kwargs): """Receives a Response. Returns a generator of Responses.""" i = 0 @@ -171,7 +170,10 @@ class SessionRedirectMixin(object): except KeyError: pass - extract_cookies_to_jar(prepared_request._cookies, prepared_request, resp.raw) + # Extract any cookies sent on the response to the cookiejar + # in the new request. Because we've mutated our copied prepared + # request, use the old one that we haven't yet touched. + extract_cookies_to_jar(prepared_request._cookies, req, resp.raw) prepared_request._cookies.update(self.cookies) prepared_request.prepare_cookies(prepared_request._cookies) @@ -190,6 +192,7 @@ class SessionRedirectMixin(object): cert=cert, proxies=proxies, allow_redirects=False, + **adapter_kwargs ) extract_cookies_to_jar(self.cookies, prepared_request, resp.raw) @@ -271,6 +274,12 @@ class Session(SessionRedirectMixin): >>> s = requests.Session() >>> s.get('http://httpbin.org/get') 200 + + Or as a context manager:: + + >>> with requests.Session() as s: + >>> s.get('http://httpbin.org/get') + 200 """ __attrs__ = [ @@ -290,9 +299,9 @@ class Session(SessionRedirectMixin): #: :class:`Request `. self.auth = None - #: Dictionary mapping protocol to the URL of the proxy (e.g. - #: {'http': 'foo.bar:3128'}) to be used on each - #: :class:`Request `. + #: Dictionary mapping protocol or protocol and host to the URL of the proxy + #: (e.g. {'http': 'foo.bar:3128', 'http://host.name': 'foo.bar:4012'}) to + #: be used on each :class:`Request `. self.proxies = {} #: Event-handling hooks. @@ -401,8 +410,8 @@ class Session(SessionRedirectMixin): :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. - :param data: (optional) Dictionary or bytes to send in the body of the - :class:`Request`. + :param data: (optional) Dictionary, bytes, or file-like object to send + in the body of the :class:`Request`. :param json: (optional) json to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the @@ -414,13 +423,13 @@ class Session(SessionRedirectMixin): :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) How long to wait for the server to send - data before giving up, as a float, or a (`connect timeout, read - timeout `_) tuple. + data before giving up, as a float, or a :ref:`(connect timeout, + read timeout) ` tuple. :type timeout: float or tuple :param allow_redirects: (optional) Set to True by default. :type allow_redirects: bool - :param proxies: (optional) Dictionary mapping protocol to the URL of - the proxy. + :param proxies: (optional) Dictionary mapping protocol or protocol and + hostname to the URL of the proxy. :param stream: (optional) whether to immediately download the response content. Defaults to ``False``. :param verify: (optional) if ``True``, the SSL cert will be verified. @@ -557,10 +566,6 @@ class Session(SessionRedirectMixin): # Set up variables needed for resolve_redirects and dispatching of hooks allow_redirects = kwargs.pop('allow_redirects', True) stream = kwargs.get('stream') - timeout = kwargs.get('timeout') - verify = kwargs.get('verify') - cert = kwargs.get('cert') - proxies = kwargs.get('proxies') hooks = request.hooks # Get the appropriate adapter to use @@ -588,12 +593,7 @@ class Session(SessionRedirectMixin): extract_cookies_to_jar(self.cookies, request, r.raw) # Redirect resolving generator. - gen = self.resolve_redirects(r, request, - stream=stream, - timeout=timeout, - verify=verify, - cert=cert, - proxies=proxies) + gen = self.resolve_redirects(r, request, **kwargs) # Resolve redirects if allowed. history = [resp for resp in gen] if allow_redirects else [] diff --git a/lib/requests/utils.py b/lib/requests/utils.py index 74679414..3d4c7945 100644 --- a/lib/requests/utils.py +++ b/lib/requests/utils.py @@ -25,7 +25,8 @@ from . import __version__ from . import certs from .compat import parse_http_list as _parse_list_header from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2, - builtin_str, getproxies, proxy_bypass, urlunparse) + builtin_str, getproxies, proxy_bypass, urlunparse, + basestring) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict from .exceptions import InvalidURL @@ -66,7 +67,7 @@ def super_len(o): return len(o.getvalue()) -def get_netrc_auth(url): +def get_netrc_auth(url, raise_errors=False): """Returns the Requests tuple auth for a given url from netrc.""" try: @@ -104,8 +105,9 @@ def get_netrc_auth(url): return (_netrc[login_i], _netrc[2]) except (NetrcParseError, IOError): # If there was a parsing error or a permissions issue reading the file, - # we'll just skip netrc auth - pass + # we'll just skip netrc auth unless explicitly asked to raise errors. + if raise_errors: + raise # AppEngine hackiness. except (ImportError, AttributeError): @@ -115,7 +117,8 @@ def get_netrc_auth(url): def guess_filename(obj): """Tries to guess the filename of the given object.""" name = getattr(obj, 'name', None) - if name and isinstance(name, builtin_str) and name[0] != '<' and name[-1] != '>': + if (name and isinstance(name, basestring) and name[0] != '<' and + name[-1] != '>'): return os.path.basename(name) @@ -418,10 +421,18 @@ def requote_uri(uri): This function passes the given URI through an unquote/quote cycle to ensure that it is fully and consistently quoted. """ - # Unquote only the unreserved characters - # Then quote only illegal characters (do not quote reserved, unreserved, - # or '%') - return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") + safe_with_percent = "!#$%&'()*+,/:;=?@[]~" + safe_without_percent = "!#$&'()*+,/:;=?@[]~" + try: + # Unquote only the unreserved characters + # Then quote only illegal characters (do not quote reserved, + # unreserved, or '%') + return quote(unquote_unreserved(uri), safe=safe_with_percent) + except InvalidURL: + # We couldn't unquote the given URI, so let's try quoting it, but + # there may be unquoted '%'s in the URI. We need to make sure they're + # properly quoted so they do not cause issues elsewhere. + return quote(uri, safe=safe_without_percent) def address_in_network(ip, net): @@ -526,6 +537,18 @@ def get_environ_proxies(url): else: return getproxies() +def select_proxy(url, proxies): + """Select a proxy for the url, if applicable. + + :param url: The url being for the request + :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs + """ + proxies = proxies or {} + urlparts = urlparse(url) + proxy = proxies.get(urlparts.scheme+'://'+urlparts.hostname) + if proxy is None: + proxy = proxies.get(urlparts.scheme) + return proxy def default_user_agent(name="python-requests"): """Return a string representing the default user agent.""" From 20934f3ac8e31288c78508b06ad0f3b4897b1a7d Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Thu, 10 Sep 2015 19:42:25 +0200 Subject: [PATCH 03/44] PEP8 + ignore requests warning. --- headphones/request.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/headphones/request.py b/headphones/request.py index 1c6f0308..a3493f79 100644 --- a/headphones/request.py +++ b/headphones/request.py @@ -24,6 +24,9 @@ import headphones import headphones.lock import collections +# Disable SSL certificate warnings. We have our own handling +requests.packages.urllib3.disable_warnings() + # Dictionary with last request times, for rate limiting. last_requests = collections.defaultdict(int) fake_lock = headphones.lock.FakeLock() @@ -95,7 +98,8 @@ def request_response(url, method="get", auto_raise=True, "host is up and running.") except requests.Timeout: logger.error( - "Request timed out. The remote host did not respond in a timely manner.") + "Request timed out. The remote host did not respond in a timely " + "manner.") except requests.HTTPError as e: if e.response is not None: if e.response.status_code >= 500: @@ -206,7 +210,8 @@ def server_message(response): message = None # First attempt is to 'read' the response as HTML - if response.headers.get("content-type") and "text/html" in response.headers.get("content-type"): + if response.headers.get("content-type") and \ + "text/html" in response.headers.get("content-type"): try: soup = BeautifulSoup(response.content, "html5lib") except Exception: From b21aef221e443ab4354e4980362e8da005f272e8 Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Thu, 10 Sep 2015 19:43:51 +0200 Subject: [PATCH 04/44] Add SSL/SNI diagnostics script. --- contrib/sni_test.py | 145 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 contrib/sni_test.py diff --git a/contrib/sni_test.py b/contrib/sni_test.py new file mode 100644 index 00000000..15b2b2c6 --- /dev/null +++ b/contrib/sni_test.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python + +import os +import sys + +# Ensure that we use the Headphones provided libraries. +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../lib")) + +import urlparse + + +def can_import(module): + """ + Return True if a given module can be imported or not. + """ + + try: + __import__(module) + except ImportError: + return False + + # Module can be imported + return True + + +def check_installation(): + """ + Check if some core modules are available. Info is based on this topic: + https://github.com/rembo10/headphones/issues/2210. + """ + + if can_import("requests"): + import requests + requests_version = requests.__version__ + else: + requests_version = "no" + + if can_import("OpenSSL"): + import OpenSSL + openssl_version = OpenSSL.__version__ + else: + openssl_version = "no" + + if can_import("cryptography"): + import cryptography + cryptography_version = cryptography.__version__ + else: + cryptography_version = "no" + + if can_import("pyasn1"): + import pyasn1 + pyasn1_version = pyasn1.__version__ + else: + pyasn1_version = "no" + + if can_import("ndg.httpsclient"): + from ndg import httpsclient + ndg_version = httpsclient.__date__ + else: + ndg_version = "no" + + # Print some system information. + sys.stdout.write( + "* Checking Python version: %s.%s.%s\n" % sys.version_info[:3]) + sys.stdout.write("* Operating system: %s\n" % sys.platform) + + sys.stdout.write( + "* Checking if requests can be imported: %s\n" % requests_version) + sys.stdout.write( + "* Checking if pyOpenSSL is installed: %s\n" % openssl_version) + sys.stdout.write( + "* Checking if cryptography is installed: %s\n" % cryptography_version) + sys.stdout.write( + "* Checking if pyasn1 is installed: %s\n" % pyasn1_version) + sys.stdout.write( + "* Checking if ndg.httpsclient is installed: %s\n" % ndg_version) + + +def main(): + """ + Test if the current Headphones installation can connect to SNI-enabled + servers. + """ + + # Read the URL to test. + if len(sys.argv) == 1: + url = "https://sni.velox.ch/" + else: + url = sys.argv[1] + + # Check if it is a HTTPS website. + parts = urlparse.urlparse(url) + + if parts.scheme.lower() != "https": + sys.stderr.write( + "Error: provided URL does not start with https://\n") + return 1 + + # Gather information + check_installation() + + # Do the request. + if not can_import("requests"): + sys.stderr.exit("Error: cannot continue without requests module!\n") + return 1 + + sys.stdout.write("* Performing request: %s\n" % url) + + import requests + requests.packages.urllib3.disable_warnings() + + try: + try: + response = requests.get(url) + except requests.exceptions.SSLError as e: + sys.stdout.write( + "- Server certificate seems invalid. I will disable " + "certificate check and try again. You'll see the real " + "exception if it fails again.\n") + sys.stdout.write( + "* Retrying request with certificate verification off.\n") + response = requests.get(url) + except Exception as e: + sys.stdout.write( + "- An error occured while performing the request. The " + "exception was: %s\n" % e.message) + sys.stdout.write( + "- Consult the Troubleshooting wiki (https://github.com/" + "rembo10/headphones/wiki/Troubleshooting) before you post an " + "issue!") + return 0 + + # Verify the response. + if response.status_code == 200: + sys.stdout.write("+ Got a valid response. All seems OK!\n") + else: + sys.stdout.write( + "- Server returned status code %s. Expected a status code 200.\n", + response.status_code) + sys.stdout.write( + "- However, I was able to communicate to the server!\n") + +# E.g. `python sni_test.py https://example.org'. +if __name__ == "__main__": + sys.exit(main()) From 7e0742f29bca53e698ba505c8e757897231e000c Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Thu, 10 Sep 2015 19:55:33 +0200 Subject: [PATCH 05/44] Potential fix for #2355. --- headphones/postprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py index 7235a954..2dab6062 100755 --- a/headphones/postprocessor.py +++ b/headphones/postprocessor.py @@ -812,7 +812,7 @@ def moveFiles(albumpath, release, tracks): try: os.chmod(os.path.normpath(temp_f).encode(headphones.SYS_ENCODING, 'replace'), int(headphones.CONFIG.FOLDER_PERMISSIONS, 8)) except Exception as e: - logger.error("Error trying to change permissions on folder: %s. %s", temp_f, e) + logger.error("Error trying to change permissions on folder: %s. %s", temp_f.decode(headphones.SYS_ENCODING, 'replace'), e) # If we failed to move all the files out of the directory, this will fail too try: From c4403cc4defa32af2410f1a2f1bd31ce1a358c41 Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Thu, 10 Sep 2015 20:05:02 +0200 Subject: [PATCH 06/44] Add logging to issue #2330. --- headphones/mb.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/headphones/mb.py b/headphones/mb.py index 0a477aa0..958245b7 100644 --- a/headphones/mb.py +++ b/headphones/mb.py @@ -99,6 +99,12 @@ def findArtist(name, limit=1): with mb_lock: try: artistResults = musicbrainzngs.search_artists(limit=limit, **criteria)['artist-list'] + except ValueError as e: + if "at least one query term is required" in e.message: + logger.error("Tried to search without a term, or an empty one. Provided artist (probably emtpy): %s", name) + return False + else: + raise except musicbrainzngs.WebServiceError as e: logger.warn('Attempt to query MusicBrainz for %s failed (%s)' % (name, str(e))) mb_lock.snooze(5) From 07b28605b5b00a66d24c8f2ae1d10d0fcce17d1b Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Thu, 10 Sep 2015 20:17:27 +0200 Subject: [PATCH 07/44] Add config option to disable changing file permissions (advanced feature for now). --- headphones/config.py | 2 ++ headphones/postprocessor.py | 24 +++++++++++++++--------- headphones/searcher.py | 22 +++++++++++++++------- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/headphones/config.py b/headphones/config.py index e2a122e2..d6d4e653 100644 --- a/headphones/config.py +++ b/headphones/config.py @@ -85,8 +85,10 @@ _CONFIG_DEFINITIONS = { 'EXTRA_TORZNABS': (list, 'Torznab', ''), 'FILE_FORMAT': (str, 'General', 'Track Artist - Album [Year] - Title'), 'FILE_PERMISSIONS': (str, 'General', '0644'), + 'FILE_PERMISSIONS_ENABLED': (bool_int, 'General', True), 'FILE_UNDERSCORES': (int, 'General', 0), 'FOLDER_FORMAT': (str, 'General', 'Artist/Album [Year]'), + 'FOLDER_PERMISSIONS_ENABLED': (bool_int, 'General', True), 'FOLDER_PERMISSIONS': (str, 'General', '0755'), 'FREEZE_DB': (int, 'General', 0), 'GIT_BRANCH': (str, 'General', 'master'), diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py index 2dab6062..d94140e1 100755 --- a/headphones/postprocessor.py +++ b/headphones/postprocessor.py @@ -809,10 +809,13 @@ def moveFiles(albumpath, release, tracks): temp_f = os.path.join(temp_f, f) - try: - os.chmod(os.path.normpath(temp_f).encode(headphones.SYS_ENCODING, 'replace'), int(headphones.CONFIG.FOLDER_PERMISSIONS, 8)) - except Exception as e: - logger.error("Error trying to change permissions on folder: %s. %s", temp_f.decode(headphones.SYS_ENCODING, 'replace'), e) + if headphones.CONFIG.FOLDER_PERMISSIONS_ENABLED: + try: + os.chmod(os.path.normpath(temp_f).encode(headphones.SYS_ENCODING, 'replace'), int(headphones.CONFIG.FOLDER_PERMISSIONS, 8)) + except Exception as e: + logger.error("Error trying to change permissions on folder: %s. %s", temp_f.decode(headphones.SYS_ENCODING, 'replace'), e) + else: + logger.debug("Not changing folder permissions, since it is disabled: %s", temp_f.decode(headphones.SYS_ENCODING, 'replace')) # If we failed to move all the files out of the directory, this will fail too try: @@ -1037,11 +1040,14 @@ def updateFilePermissions(albumpaths): for r, d, f in os.walk(folder): for files in f: full_path = os.path.join(r, files) - try: - os.chmod(full_path, int(headphones.CONFIG.FILE_PERMISSIONS, 8)) - except: - logger.error("Could not change permissions for file: %s", full_path) - continue + if headphones.CONFIG.FILE_PERMISSIONS_ENABLED: + try: + os.chmod(full_path, int(headphones.CONFIG.FILE_PERMISSIONS, 8)) + except: + logger.error("Could not change permissions for file: %s", full_path) + continue + else: + logger.debug("Not changing file permissions, since it is disabled: %s", full_path.decode(headphones.SYS_ENCODING, 'replace')) def renameUnprocessedFolder(path, tag): diff --git a/headphones/searcher.py b/headphones/searcher.py index 7dbcf5ba..d7abc64e 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -79,16 +79,24 @@ def torrent_to_file(target_file, data): with open(target_file, "wb") as fp: fp.write(data) except IOError as e: - logger.error("Could not write torrent file '%s': %s. Skipping.", + logger.error( + "Could not write torrent file '%s': %s. Skipping.", target_file, e.message) return # Try to change permissions - try: - os.chmod(target_file, int(headphones.CONFIG.FILE_PERMISSIONS, 8)) - except OSError as e: - logger.warn("Could not change permissions for file '%s': %s. " \ - "Continuing.", target_file, e.message) + if headphones.CONFIG.FILE_PERMISSIONS_ENABLED: + try: + os.chmod(target_file, int(headphones.CONFIG.FILE_PERMISSIONS, 8)) + except OSError as e: + logger.warn( + "Could not change permissions for file '%s': %s. Continuing.", + target_file.decode(headphones.SYS_ENCODING, "replace"), + e.message) + else: + logger.debug( + "Not changing file permissions, since it is disabled: %s", + target_file.decode(headphones.SYS_ENCODING, "replace")) # Done return True @@ -203,7 +211,7 @@ def searchforalbum(albumid=None, new=False, losslessOnly=False, if release_date > datetime.datetime.today(): logger.info("Skipping: %s. Waiting for release date of: %s" % (album['AlbumTitle'], album['ReleaseDate'])) continue - + new = True if album['Status'] == "Wanted Lossless": From 4e79698b4ee7a8b585a01d30fbb9f54018774caa Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Thu, 10 Sep 2015 20:23:15 +0200 Subject: [PATCH 08/44] Add undocumented feature. Refs #2317 --- API.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/API.md b/API.md index 0fc0946d..d1dcdc21 100644 --- a/API.md +++ b/API.md @@ -66,7 +66,7 @@ Unmark album as wanted / i.e. mark as skipped ### forceSearch force search for wanted albums - not launched in a separate thread so it may take a bit to complete -### forceProcess +### forceProcess[&dir=/path/to/folder] Force post process albums in download directory - also not launched in a separate thread ### forceActiveArtistsUpdate force Active Artist Update - also not launched in a separate thread @@ -108,4 +108,4 @@ See above. Gives you a list of results from searcher.searchforalbum(). Basically runs a normal search, but rather than sorting them and downloading the best result, it dumps the data, which you can then pass on to download_specific_release(). Returns a list of dictionaries with params: title, size, url, provider & kind - all of these values must be passed back to download_specific_release ### download_specific_release&id=albumid&title=$title&size=$size&url=$url&provider=$provider&kind=$kind -Allows you to manually pass a choose_specific_download release back to searcher.send_to_downloader() \ No newline at end of file +Allows you to manually pass a choose_specific_download release back to searcher.send_to_downloader() From 409f6b8dd5460cc153d902a91c7b34c296e90218 Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Thu, 10 Sep 2015 20:26:31 +0200 Subject: [PATCH 09/44] At least display that it is wrong. Refs #2306. --- data/interfaces/default/history.html | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/data/interfaces/default/history.html b/data/interfaces/default/history.html index 4dde71ad..80834d60 100644 --- a/data/interfaces/default/history.html +++ b/data/interfaces/default/history.html @@ -55,7 +55,10 @@ if item['URL'].find('codeshy') != -1: fileid = 'nzb' - folder = 'Folder: ' + item['FolderName'] + if 'FolderName' in item: + folder = 'Folder: ' + item['FolderName'] + else: + folder = 'Folder: ?' %> From faa4bd82ae37ab58741f7638a8db451abee9d53c Mon Sep 17 00:00:00 2001 From: Bas Stottelaar Date: Thu, 10 Sep 2015 20:37:21 +0200 Subject: [PATCH 10/44] Add advanced option to stop renaming folders. Refs #2281. --- headphones/config.py | 2 ++ headphones/postprocessor.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/headphones/config.py b/headphones/config.py index d6d4e653..97cc5e6c 100644 --- a/headphones/config.py +++ b/headphones/config.py @@ -205,6 +205,8 @@ _CONFIG_DEFINITIONS = { 'PUSHOVER_ONSNATCH': (int, 'Pushover', 0), 'PUSHOVER_PRIORITY': (int, 'Pushover', 0), 'RENAME_FILES': (int, 'General', 0), + 'RENAME_UNPROCESSED': (bool_int, 'General', 1), + 'RENAME_FROZEN': (bool_int, 'General', 1), 'REPLACE_EXISTING_FOLDERS': (int, 'General', 0), 'KEEP_ORIGINAL_FOLDER': (int, 'General', 0), 'REQUIRED_WORDS': (str, 'General', ''), diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py index d94140e1..fc9b8fc7 100755 --- a/headphones/postprocessor.py +++ b/headphones/postprocessor.py @@ -104,7 +104,10 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal myDB.action('UPDATE snatched SET status = "Frozen" WHERE status NOT LIKE "Seed%" and AlbumID=?', [albumid]) frozen = re.search(r' \(Frozen\)(?:\[\d+\])?', albumpath) if not frozen: - renameUnprocessedFolder(albumpath, tag="Frozen") + if headphones.CONFIG.RENAME_FROZEN: + renameUnprocessedFolder(albumpath, tag="Frozen") + else: + logger.warn(u"Won't rename %s to mark as 'Frozen', because it is disabled.", albumpath.decode(headphones.SYS_ENCODING, 'replace')) return logger.info(u"Now adding/updating artist: " + release_dict['artist_name']) @@ -269,11 +272,14 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, Kind, keep_original_folder) return - logger.warn(u'Could not identify album: %s. It may not be the intended album.' % albumpath.decode(headphones.SYS_ENCODING, 'replace')) + logger.warn(u'Could not identify album: %s. It may not be the intended album.', albumpath.decode(headphones.SYS_ENCODING, 'replace')) myDB.action('UPDATE snatched SET status = "Unprocessed" WHERE status NOT LIKE "Seed%" and AlbumID=?', [albumid]) processed = re.search(r' \(Unprocessed\)(?:\[\d+\])?', albumpath) if not processed: - renameUnprocessedFolder(albumpath, tag="Unprocessed") + if headphones.CONFIG.RENAME_UNPROCESSED: + renameUnprocessedFolder(albumpath, tag="Unprocessed") + else: + logger.warn(u"Won't rename %s to mark as 'Unprocessed', because it is disabled.", albumpath.decode(headphones.SYS_ENCODING, 'replace')) def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, Kind=None, keep_original_folder=False): From 912d5c4d78f10b5a6610a932f1de36619936eb99 Mon Sep 17 00:00:00 2001 From: Ade Date: Sat, 12 Sep 2015 08:20:18 +1200 Subject: [PATCH 11/44] Fix history indent --- data/interfaces/default/history.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data/interfaces/default/history.html b/data/interfaces/default/history.html index 80834d60..c410b2c4 100644 --- a/data/interfaces/default/history.html +++ b/data/interfaces/default/history.html @@ -55,10 +55,10 @@ if item['URL'].find('codeshy') != -1: fileid = 'nzb' - if 'FolderName' in item: - folder = 'Folder: ' + item['FolderName'] - else: - folder = 'Folder: ?' + if 'FolderName' in item: + folder = 'Folder: ' + item['FolderName'] + else: + folder = 'Folder: ?' %> From 6b30ed0ba6a338bd5df238fc244ddf8e14ef4379 Mon Sep 17 00:00:00 2001 From: lepricon49 Date: Sat, 12 Sep 2015 13:35:48 -0700 Subject: [PATCH 12/44] Stop JS links from scrolling to the top of the page Changed all Using href="#" to href="javascript:void(0)". Using "#" causes the browser to jump to the top of the page when the hyperlink is clicked (default empty # anchor position). Using "javascript:void(0)" will not change the browser's scroll position. --- data/interfaces/default/album.html | 22 +++++++++--------- data/interfaces/default/artist.html | 24 ++++++++++---------- data/interfaces/default/base.html | 6 ++--- data/interfaces/default/config.html | 2 +- data/interfaces/default/history.html | 14 ++++++------ data/interfaces/default/manage.html | 18 +++++++-------- data/interfaces/default/managemanual.html | 8 +++---- data/interfaces/default/manageunmatched.html | 8 +++---- data/interfaces/default/upcoming.html | 2 +- 9 files changed, 52 insertions(+), 52 deletions(-) diff --git a/data/interfaces/default/album.html b/data/interfaces/default/album.html index 06ac7e10..13223743 100644 --- a/data/interfaces/default/album.html +++ b/data/interfaces/default/album.html @@ -14,15 +14,15 @@ Delete Album %if album['Status'] == 'Skipped' or album['Status'] == 'Ignored': - Mark Album as Wanted + Mark Album as Wanted %elif album['Status'] == 'Wanted': - Force Check - Mark Album as Skipped + Force Check + Mark Album as Skipped %else: - Retry Download - Try New Version + Retry Download + Try New Version %endif - Choose Alternate Release + Choose Alternate Release - Edit Search Term + Edit Search Term - Choose Specific Download + Choose Specific Download @@ -66,7 +66,7 @@
- + @@ -78,7 +78,7 @@
Are you sure you want to ignore Local Album: ${album['AlbumTitle']} from future matching?

- +
@@ -97,7 +97,7 @@
- + diff --git a/data/interfaces/default/upcoming.html b/data/interfaces/default/upcoming.html index dbe7b254..97046f1c 100644 --- a/data/interfaces/default/upcoming.html +++ b/data/interfaces/default/upcoming.html @@ -3,7 +3,7 @@ <%def name="headerIncludes()"> From 7f4d10c85d5537528d1eb2b9e5e12979bfc51039 Mon Sep 17 00:00:00 2001 From: lepricon49 Date: Sun, 13 Sep 2015 10:54:05 -0700 Subject: [PATCH 13/44] Add magnet support for KAT rTorrent via blackhole KAT torrents downloads were 100% broken. This is a temporary hack to get KAT partially working. --- data/interfaces/default/config.html | 7 +++- data/interfaces/default/css/style.css | 4 +-- data/interfaces/default/css/style.less | 4 +-- headphones/config.py | 2 +- headphones/searcher.py | 49 ++++++++++++++++++++------ headphones/webserve.py | 1 + 6 files changed, 50 insertions(+), 17 deletions(-) diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index bf99dafd..aad752a5 100644 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -336,9 +336,14 @@ Convert + +
- Note: opening magnet URL's is not suitable for headless/console/terminal servers. + Note: Opening magnet URLs is not suitable for headless/console/terminal servers.
Embed only works for rTorrent.
diff --git a/data/interfaces/default/css/style.css b/data/interfaces/default/css/style.css index a6723818..ef8047c7 100644 --- a/data/interfaces/default/css/style.css +++ b/data/interfaces/default/css/style.css @@ -334,7 +334,7 @@ form .row label { font-size: 12px; line-height: normal; padding-top: 7px; - width: 175px; + width: 170px; } form .row label.inline { margin-right: 5px; @@ -368,7 +368,7 @@ form .row small { display: block; font-size: 9px; line-height: 12px; - margin-left: 175px; + margin-left: 170px; margin-top: 3px; } form .left label { diff --git a/data/interfaces/default/css/style.less b/data/interfaces/default/css/style.less index b03057c2..3e0f9659 100644 --- a/data/interfaces/default/css/style.less +++ b/data/interfaces/default/css/style.less @@ -190,7 +190,7 @@ form { font-size: 12px; line-height: normal; padding-top: 7px; - width: 175px; + width: 170px; &.inline { margin-right: 5px; @@ -216,7 +216,7 @@ form { display: block; font-size: 9px; line-height: 12px; - margin-left: 175px; + margin-left: 170px; margin-top: 3px; } } diff --git a/headphones/config.py b/headphones/config.py index e2a122e2..7db9509b 100644 --- a/headphones/config.py +++ b/headphones/config.py @@ -160,7 +160,7 @@ _CONFIG_DEFINITIONS = { 'OMGWTFNZBS': (int, 'omgwtfnzbs', 0), 'OMGWTFNZBS_APIKEY': (str, 'omgwtfnzbs', ''), 'OMGWTFNZBS_UID': (str, 'omgwtfnzbs', ''), - 'OPEN_MAGNET_LINKS': (int, 'General', 0), # 0: Ignore, 1: Open, 2: Convert + 'OPEN_MAGNET_LINKS': (int, 'General', 0), # 0: Ignore, 1: Open, 2: Convert, 3: Embed (rtorrent) 'MAGNET_LINKS': (int, 'General', 0), 'OSX_NOTIFY_APP': (str, 'OSX_Notify', '/Applications/Headphones'), 'OSX_NOTIFY_ENABLED': (int, 'OSX_Notify', 0), diff --git a/headphones/searcher.py b/headphones/searcher.py index 7dbcf5ba..6f9ea87a 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -165,7 +165,7 @@ def get_seed_ratio(provider): elif provider == 'Mininova': seed_ratio = headphones.CONFIG.MININOVA_RATIO elif provider == 'Strike': - seed_ratio = headphones.CONFIG.STRIKE_RATIO + seed_ratio = headphones.CONFIG.STRIKE_RATIO else: seed_ratio = None @@ -711,7 +711,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_spe def send_to_downloader(data, bestqual, album): - logger.info(u'Found best result from %s: %s - %s', bestqual[3], bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1])) + logger.info(u'Found best result from %s: [%s] - %s - %s', bestqual[3], bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1])) # Get rid of any dodgy chars here so we can prevent sab from renaming our downloads kind = bestqual[4] seed_ratio = None @@ -810,8 +810,11 @@ def send_to_downloader(data, bestqual, album): logger.warning("Unable to convert magnet with hash " \ "'%s' into a torrent file.", torrent_hash) return + elif headphones.CONFIG.MAGNET_LINKS == 3: + torrent_to_file(download_path, data) + return else: - logger.error("Cannot save magnet link in blackhole. " \ + logger.error("Ignoring magnet links for blackhole. " \ "Please switch your torrent downloader to " \ "Transmission or uTorrent, or allow Headphones " \ "to open or convert magnet links") @@ -1198,20 +1201,41 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose seeders = item['seeds'] url = item['torrentLink'] size = int(item['size']) + hash = item['hash'] + # The "tr" querystring used in magnetUri is a hack since it assumes the tracker is always the same for KAT. + # It currently works for all torrents tested but it should be changed to load the HTML, and parse out the real magnet URI. + magnetUri = r"magnet:?xt=urn:btih:" + hash + r"&tr=udp%3A%2F%2Fopen.demonii.com%3A1337" if format == "2": torrent = request.request_content(url, headers=headers) if not torrent or (int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1): rightformat = False - if rightformat and size < maxsize and minimumseeders < int(seeders): - match = True - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) - else: - match = False - logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d, Format: %s)', title, size, int(seeders), rightformat) + #if rightformat and size < maxsize and minimumseeders < int(seeders): + # match = True + # logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + #else: + # match = False + # logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d, Format: %s)', title, size, int(seeders), rightformat) - resultlist.append((title, size, url, provider, 'torrent', match)) + match = False + if rightformat: + if size < maxsize: + if minimumseeders < int(seeders): + match = True + logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + else: + logger.info('%s has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d, Format: %s)', title, size, int(seeders), rightformat) + else: + logger.info('%s is larger than the maxsize for this category, skipping. (Size: %i bytes, Seeders: %d, Format: %s)', title, size, int(seeders), rightformat) + else: + logger.info('%s is the wrong format for this category, skipping. (Size: %i bytes, Seeders: %d, Format: %s)', title, size, int(seeders), rightformat) + + + # The URL returned from KAT is NOT a torrent URL. It is a jump page. + # More work needs to be done here to get this to work right for real torrent files. Refer to CouchPotatos HTML parsing method. + #resultlist.append((title, size, url, provider, 'torrent', match)) + resultlist.append((title, size, magnetUri, provider, 'magnet', match)) except Exception as e: logger.exception("Unhandled exception in the KAT parser") @@ -1669,7 +1693,6 @@ def preprocess(resultlist): headers = {} if result[3] == 'Kick Ass Torrents': - #headers['Referer'] = 'http://kat.ph/' headers['User-Agent'] = USER_AGENT elif result[3] == 'What.cd': headers['User-Agent'] = 'Headphones' @@ -1677,6 +1700,10 @@ def preprocess(resultlist): headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36' return request.request_content(url=result[2], headers=headers), result + if result[4] == 'magnet': + magnet_link = result[2] + return "d10:magnet-uri%d:%se" % (len(magnet_link), magnet_link), result + else: headers = {'User-Agent': USER_AGENT} diff --git a/headphones/webserve.py b/headphones/webserve.py index 121e8667..4966ec5b 100644 --- a/headphones/webserve.py +++ b/headphones/webserve.py @@ -1153,6 +1153,7 @@ class WebInterface(object): "magnet_links_0": radio(headphones.CONFIG.MAGNET_LINKS, 0), "magnet_links_1": radio(headphones.CONFIG.MAGNET_LINKS, 1), "magnet_links_2": radio(headphones.CONFIG.MAGNET_LINKS, 2), + "magnet_links_3": radio(headphones.CONFIG.MAGNET_LINKS, 3), "log_dir": headphones.CONFIG.LOG_DIR, "cache_dir": headphones.CONFIG.CACHE_DIR, "interface_list": interface_list, From c78e8764a0799ba974daa94247c97ace40c4f0bd Mon Sep 17 00:00:00 2001 From: lepricon49 Date: Sun, 13 Sep 2015 10:57:14 -0700 Subject: [PATCH 14/44] Add snatched email indicator prefix Prefix subject with "Snatched: " to make it more apparent that it is not the final download email notification. --- headphones/searcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index 6f9ea87a..530cc339 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -951,7 +951,7 @@ def send_to_downloader(data, bestqual, album): logger.info(u"Sending Email notification") email = notifiers.Email() message = 'Snatched from ' + provider + '. ' + name - email.notify(title, message) + email.notify("Snatched: " + title, message) def verifyresult(title, artistterm, term, lossless): From 3b6c986704488ceb414305661cb8d419c4da3885 Mon Sep 17 00:00:00 2001 From: Ade Date: Mon, 14 Sep 2015 21:31:38 +1200 Subject: [PATCH 15/44] More torcache fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - reverted back to rss for kat so we can use magnet links instead of torcache link for transmission, utorrent. Also now uses the kat ’lossless’ category instead of going out to torcache to check for flac. - added torcache referer for blackhole --- headphones/searcher.py | 53 +++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index d7abc64e..82b922e8 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -800,6 +800,7 @@ def send_to_downloader(data, bestqual, album): services = TORRENT_TO_MAGNET_SERVICES[:] random.shuffle(services) headers = {'User-Agent': USER_AGENT} + headers['Referer'] = 'https://torcache.net/' for service in services: @@ -1167,59 +1168,53 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose if headphones.CONFIG.KAT_PROXY_URL: providerurl = fix_url(set_proxy(headphones.CONFIG.KAT_PROXY_URL)) else: - providerurl = fix_url("https://kickass.to") + providerurl = fix_url("https://kat.cr") # Build URL - providerurl = providerurl + "/json.php?" + providerurl = providerurl + "/usearch/" + ka_term - # Pick category for torrents + # Set max size and category if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly: - format = "2" # FLAC maxsize = 10000000000 + providerurl += " category:lossless/" elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless: - format = "10" # MP3 and FLAC maxsize = 10000000000 + providerurl += " category:music/" else: - format = "8" # MP3 only maxsize = 300000000 + providerurl += " category:music/" # Requesting content logger.info("Searching %s using term: %s" % (provider,ka_term)) params = { - "q": ka_term + "+category:music", "field": "seeders", - "sorder": "desc" + "sorder": "desc", + "rss": "1" } - headers = {'User-Agent': USER_AGENT} - data = request.request_json(url=providerurl, params=params, headers=headers) + + data = request.request_feed(url=providerurl, params=params, + whitelist_status_code=[404]) # Process feed if data: - if not data['list']: + if not len(data.entries): logger.info("No results found on %s using search term: %s" % (provider, ka_term)) else: - for item in data['list']: + for item in data.entries: try: - rightformat = True title = item['title'] - seeders = item['seeds'] - url = item['torrentLink'] - size = int(item['size']) - - if format == "2": - torrent = request.request_content(url, headers=headers) - if not torrent or (int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1): - rightformat = False - - if rightformat and size < maxsize and minimumseeders < int(seeders): - match = True + seeders = item['torrent_seeds'] + if headphones.CONFIG.TORRENT_DOWNLOADER == 0: + url = item['links'][1]['href'] + else: + url = item['torrent_magneturi'] + size = int(item['links'][1]['length']) + if size < maxsize and minimumseeders < int(seeders): + resultlist.append((title, size, url, provider, 'torrent', True)) logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) else: - match = False - logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d, Format: %s)', title, size, int(seeders), rightformat) - - resultlist.append((title, size, url, provider, 'torrent', match)) + logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d)', title, size, int(seeders)) except Exception as e: logger.exception("Unhandled exception in the KAT parser") @@ -1677,7 +1672,7 @@ def preprocess(resultlist): headers = {} if result[3] == 'Kick Ass Torrents': - #headers['Referer'] = 'http://kat.ph/' + headers['Referer'] = 'https://torcache.net/' headers['User-Agent'] = USER_AGENT elif result[3] == 'What.cd': headers['User-Agent'] = 'Headphones' From f9a02d9b3bb629bf45bb1a1fa6fdaeafb2a482a1 Mon Sep 17 00:00:00 2001 From: Ade Date: Fri, 18 Sep 2015 18:16:39 +1200 Subject: [PATCH 16/44] Get folder name working in history + show magnet as torrent --- data/interfaces/default/history.html | 7 +++---- headphones/webserve.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/data/interfaces/default/history.html b/data/interfaces/default/history.html index c410b2c4..9a1079ba 100644 --- a/data/interfaces/default/history.html +++ b/data/interfaces/default/history.html @@ -50,15 +50,14 @@ fileid = 'nzb' if item['URL'].find('torrent') != -1: fileid = 'torrent' + if item['URL'].find('magnet:') != -1: + fileid = 'torrent' if item['URL'].find('rutracker') != -1: fileid = 'torrent' if item['URL'].find('codeshy') != -1: fileid = 'nzb' - if 'FolderName' in item: - folder = 'Folder: ' + item['FolderName'] - else: - folder = 'Folder: ?' + folder = 'Folder: ' + item['FolderName'] %> diff --git a/headphones/webserve.py b/headphones/webserve.py index 121e8667..815c5e9f 100644 --- a/headphones/webserve.py +++ b/headphones/webserve.py @@ -814,7 +814,7 @@ class WebInterface(object): @cherrypy.expose def history(self): myDB = db.DBConnection() - history = myDB.select('''SELECT * from snatched WHERE Status NOT LIKE "Seed%" order by DateAdded DESC''') + history = myDB.select('''SELECT AlbumID, Title, Size, URL, DateAdded, Status, Kind, ifnull(FolderName, '?') FolderName FROM snatched WHERE Status NOT LIKE "Seed%" ORDER BY DateAdded DESC''') return serve_template(templatename="history.html", title="History", history=history) @cherrypy.expose From 0c556e32faaaa57edb84a090c8aeddd57bf1e6f4 Mon Sep 17 00:00:00 2001 From: Ade Date: Wed, 23 Sep 2015 20:58:10 +1200 Subject: [PATCH 17/44] Strike whitelist 404 --- headphones/searcher.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/headphones/searcher.py b/headphones/searcher.py index 82b922e8..93451107 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -1538,45 +1538,35 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose providerurl = providerurl + s_term + "&category=Music" if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly: - format = "2" providerurl = providerurl + "&subcategory=Lossless" maxsize = 10000000000 elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless: - format = "10" # MP3 and FLAC maxsize = 10000000000 else: - format = "8" # MP3 only maxsize = 300000000 logger.info("Searching %s using term: %s" % (provider, s_term)) - data = request.request_json(url=providerurl) + data = request.request_json(url=providerurl, + whitelist_status_code=[404]) if not data or not data.get('torrents'): logger.info("No results found on %s using search term: %s" % (provider, s_term)) else: for item in data['torrents']: try: - rightformat = True title = item['torrent_title'] seeders = item['seeds'] url = item['magnet_uri'] size = int(item['size']) subcategory = item['sub_category'] - if format == 2: - if subcategory != "Lossless": - rightformat = False - - if rightformat and size < maxsize and minimumseeders < int(seeders): - match = True + if size < maxsize and minimumseeders < int(seeders): + resultlist.append((title, size, url, provider, 'torrent', True)) logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) else: - match = False logger.info( - '%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d, Format: %s)', - title, size, int(seeders), rightformat) - - resultlist.append((title, size, url, provider, 'torrent', match)) + '%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d)', + title, size, int(seeders)) except Exception as e: logger.exception("Unhandled exception in the Strike parser") From 9519bfddd40d5f4869cc26c50df3b6ff63ddda0d Mon Sep 17 00:00:00 2001 From: Ade Date: Tue, 6 Oct 2015 19:32:55 +1300 Subject: [PATCH 18/44] torznab to transmission fix --- headphones/transmission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/headphones/transmission.py b/headphones/transmission.py index 990360d0..ab5ae6c9 100644 --- a/headphones/transmission.py +++ b/headphones/transmission.py @@ -31,7 +31,7 @@ import headphones def addTorrent(link, data=None): method = 'torrent-add' - if link.endswith('.torrent') or data: + if link.endswith('.torrent') and not link.startswith('http') or data: if data: metainfo = str(base64.b64encode(data)) else: From 18d53167d00fa39216c5704317ec9b8787b5a8e1 Mon Sep 17 00:00:00 2001 From: Ade Date: Thu, 8 Oct 2015 21:48:39 +1300 Subject: [PATCH 19/44] utorrent torrents not being removed, fix #2385 --- headphones/utorrent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/headphones/utorrent.py b/headphones/utorrent.py index 08d20ce1..a826e928 100644 --- a/headphones/utorrent.py +++ b/headphones/utorrent.py @@ -181,7 +181,7 @@ def removeTorrent(hash, remove_data=False): status, torrentList = uTorrentClient.list() torrents = torrentList['torrents'] for torrent in torrents: - if torrent[0].lower() == hash: + if torrent[0].upper() == hash.upper(): if torrent[21] == 'Finished': logger.info('%s has finished seeding, removing torrent and data' % torrent[2]) uTorrentClient.remove(hash, remove_data) From 022976d2f669e23760242cfe3c0c12cf0b2fb831 Mon Sep 17 00:00:00 2001 From: theguardian Date: Tue, 20 Oct 2015 21:52:55 -0700 Subject: [PATCH 20/44] added verify_ssl_cert option to fix broken ssl installs --- Headphones.py | 10 ++++++++++ data/interfaces/default/config.html | 6 ++++++ headphones/config.py | 2 +- headphones/webserve.py | 3 ++- 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Headphones.py b/Headphones.py index 0d31534e..713a628b 100755 --- a/Headphones.py +++ b/Headphones.py @@ -177,6 +177,16 @@ def main(): "module to enable HTTPS. HTTPS will be disabled.") headphones.CONFIG.ENABLE_HTTPS = False + #This fix is put in place for systems with broken SSL (like QNAP) + certificate_verification = headphones.CONFIG.VERIFY_SSL_CERT + if not certificate_verification: + try: + import ssl + ssl._create_default_https_context = ssl._create_unverified_context + except: + pass + #==== end block (should be configurable at settings level) + # Try to start the server. Will exit here is address is already in use. web_config = { 'http_port': http_port, diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index 5688ce6d..a6f3f6f6 100644 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -80,6 +80,12 @@ +
+ + +
diff --git a/headphones/config.py b/headphones/config.py index 97cc5e6c..9d48f471 100644 --- a/headphones/config.py +++ b/headphones/config.py @@ -253,7 +253,7 @@ _CONFIG_DEFINITIONS = { 'UTORRENT_LABEL': (str, 'uTorrent', ''), 'UTORRENT_PASSWORD': (str, 'uTorrent', ''), 'UTORRENT_USERNAME': (str, 'uTorrent', ''), - 'VERIFY_SSL_CERT': (bool_int, 'Advanced', 1), + 'VERIFY_SSL_CERT': (int, 'Advanced', 1), 'WAIT_UNTIL_RELEASE_DATE' : (int, 'General', 0), 'WAFFLES': (int, 'Waffles', 0), 'WAFFLES_PASSKEY': (str, 'Waffles', ''), diff --git a/headphones/webserve.py b/headphones/webserve.py index 815c5e9f..b7f2874a 100644 --- a/headphones/webserve.py +++ b/headphones/webserve.py @@ -1025,6 +1025,7 @@ class WebInterface(object): "enable_https": checked(headphones.CONFIG.ENABLE_HTTPS), "https_cert": headphones.CONFIG.HTTPS_CERT, "https_key": headphones.CONFIG.HTTPS_KEY, + "verify_ssl_cert": checked(headphones.CONFIG.VERIFY_SSL_CERT), "api_enabled": checked(headphones.CONFIG.API_ENABLED), "api_key": headphones.CONFIG.API_KEY, "download_scan_interval": headphones.CONFIG.DOWNLOAD_SCAN_INTERVAL, @@ -1286,7 +1287,7 @@ class WebInterface(object): # Handle the variable config options. Note - keys with False values aren't getting passed checked_configs = [ - "launch_browser", "enable_https", "api_enabled", "use_blackhole", "headphones_indexer", "use_newznab", "newznab_enabled", "use_torznab", "torznab_enabled", + "launch_browser", "enable_https", "verify_ssl_cert", "api_enabled", "use_blackhole", "headphones_indexer", "use_newznab", "newznab_enabled", "use_torznab", "torznab_enabled", "use_nzbsorg", "use_omgwtfnzbs", "use_kat", "use_piratebay", "use_oldpiratebay", "use_mininova", "use_waffles", "use_rutracker", "use_whatcd", "use_strike", "preferred_bitrate_allow_lossless", "detect_bitrate", "ignore_clean_releases", "freeze_db", "cue_split", "move_files", "rename_files", "correct_metadata", "cleanup_files", "keep_nfo", "add_album_art", "embed_album_art", "embed_lyrics", From 8506671c5ced0f794d00613bf72348e92a5406c3 Mon Sep 17 00:00:00 2001 From: Troy Olson Date: Wed, 21 Oct 2015 13:33:02 -0700 Subject: [PATCH 21/44] =?UTF-8?q?Add=20fix=20for=20=CE=9E=20(capital=20'Xi?= =?UTF-8?q?')=20for=20the=20band=20CHVRCH=CE=9ES?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm guessing there aren't too many other bands that this will apply to. Hex code found here: http://webdesign.about.com/od/localization/l/blhtmlcodes-gr.htm --- headphones/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/headphones/helpers.py b/headphones/helpers.py index 55a8778a..863384e5 100644 --- a/headphones/helpers.py +++ b/headphones/helpers.py @@ -70,7 +70,7 @@ def latinToAscii(unicrap): xlate = { 0xc0: 'A', 0xc1: 'A', 0xc2: 'A', 0xc3: 'A', 0xc4: 'A', 0xc5: 'A', 0xc6: 'Ae', 0xc7: 'C', - 0xc8: 'E', 0xc9: 'E', 0xca: 'E', 0xcb: 'E', 0x86: 'e', + 0xc8: 'E', 0xc9: 'E', 0xca: 'E', 0xcb: 'E', 0x86: 'e', 0x39e: 'E', 0xcc: 'I', 0xcd: 'I', 0xce: 'I', 0xcf: 'I', 0xd0: 'Th', 0xd1: 'N', 0xd2: 'O', 0xd3: 'O', 0xd4: 'O', 0xd5: 'O', 0xd6: 'O', 0xd8: 'O', From aa6cf97324c26d5aa792cfa0913fd70c26e24189 Mon Sep 17 00:00:00 2001 From: theguardian Date: Thu, 22 Oct 2015 07:58:57 -0700 Subject: [PATCH 22/44] made requested fixes --- Headphones.py | 15 +++++---------- data/interfaces/default/config.html | 6 ------ headphones/config.py | 2 +- headphones/webserve.py | 3 +-- 4 files changed, 7 insertions(+), 19 deletions(-) diff --git a/Headphones.py b/Headphones.py index 713a628b..c4fa80d4 100755 --- a/Headphones.py +++ b/Headphones.py @@ -22,6 +22,11 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'lib/')) from headphones import webstart, logger +if sys.version_info >= (2, 7, 9): + import ssl + # pylint: disable=W0212 + ssl._create_default_https_context = ssl._create_unverified_context + import locale import time import signal @@ -177,16 +182,6 @@ def main(): "module to enable HTTPS. HTTPS will be disabled.") headphones.CONFIG.ENABLE_HTTPS = False - #This fix is put in place for systems with broken SSL (like QNAP) - certificate_verification = headphones.CONFIG.VERIFY_SSL_CERT - if not certificate_verification: - try: - import ssl - ssl._create_default_https_context = ssl._create_unverified_context - except: - pass - #==== end block (should be configurable at settings level) - # Try to start the server. Will exit here is address is already in use. web_config = { 'http_port': http_port, diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index a6f3f6f6..5688ce6d 100644 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -80,12 +80,6 @@ -
- - -
diff --git a/headphones/config.py b/headphones/config.py index 9d48f471..97cc5e6c 100644 --- a/headphones/config.py +++ b/headphones/config.py @@ -253,7 +253,7 @@ _CONFIG_DEFINITIONS = { 'UTORRENT_LABEL': (str, 'uTorrent', ''), 'UTORRENT_PASSWORD': (str, 'uTorrent', ''), 'UTORRENT_USERNAME': (str, 'uTorrent', ''), - 'VERIFY_SSL_CERT': (int, 'Advanced', 1), + 'VERIFY_SSL_CERT': (bool_int, 'Advanced', 1), 'WAIT_UNTIL_RELEASE_DATE' : (int, 'General', 0), 'WAFFLES': (int, 'Waffles', 0), 'WAFFLES_PASSKEY': (str, 'Waffles', ''), diff --git a/headphones/webserve.py b/headphones/webserve.py index b7f2874a..815c5e9f 100644 --- a/headphones/webserve.py +++ b/headphones/webserve.py @@ -1025,7 +1025,6 @@ class WebInterface(object): "enable_https": checked(headphones.CONFIG.ENABLE_HTTPS), "https_cert": headphones.CONFIG.HTTPS_CERT, "https_key": headphones.CONFIG.HTTPS_KEY, - "verify_ssl_cert": checked(headphones.CONFIG.VERIFY_SSL_CERT), "api_enabled": checked(headphones.CONFIG.API_ENABLED), "api_key": headphones.CONFIG.API_KEY, "download_scan_interval": headphones.CONFIG.DOWNLOAD_SCAN_INTERVAL, @@ -1287,7 +1286,7 @@ class WebInterface(object): # Handle the variable config options. Note - keys with False values aren't getting passed checked_configs = [ - "launch_browser", "enable_https", "verify_ssl_cert", "api_enabled", "use_blackhole", "headphones_indexer", "use_newznab", "newznab_enabled", "use_torznab", "torznab_enabled", + "launch_browser", "enable_https", "api_enabled", "use_blackhole", "headphones_indexer", "use_newznab", "newznab_enabled", "use_torznab", "torznab_enabled", "use_nzbsorg", "use_omgwtfnzbs", "use_kat", "use_piratebay", "use_oldpiratebay", "use_mininova", "use_waffles", "use_rutracker", "use_whatcd", "use_strike", "preferred_bitrate_allow_lossless", "detect_bitrate", "ignore_clean_releases", "freeze_db", "cue_split", "move_files", "rename_files", "correct_metadata", "cleanup_files", "keep_nfo", "add_album_art", "embed_album_art", "embed_lyrics", From aae0476967e40626c78166c5a96f37da16fd3936 Mon Sep 17 00:00:00 2001 From: theguardian Date: Thu, 22 Oct 2015 08:53:43 -0700 Subject: [PATCH 23/44] check for affected python versions --- Headphones.py | 5 ----- headphones/request.py | 9 +++++++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Headphones.py b/Headphones.py index c4fa80d4..0d31534e 100755 --- a/Headphones.py +++ b/Headphones.py @@ -22,11 +22,6 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'lib/')) from headphones import webstart, logger -if sys.version_info >= (2, 7, 9): - import ssl - # pylint: disable=W0212 - ssl._create_default_https_context = ssl._create_unverified_context - import locale import time import signal diff --git a/headphones/request.py b/headphones/request.py index a3493f79..98bfaff9 100644 --- a/headphones/request.py +++ b/headphones/request.py @@ -18,6 +18,7 @@ from headphones import logger from xml.dom import minidom from bs4 import BeautifulSoup +import sys import requests import feedparser import headphones @@ -54,6 +55,14 @@ def request_response(url, method="get", auto_raise=True, # pose a security issue! kwargs["verify"] = bool(headphones.CONFIG.VERIFY_SSL_CERT) + #This fix is put in place for systems with broken SSL (like QNAP) + if not headphones.CONFIG.VERIFY_SSL_CERT and sys.version_info >= (2, 7, 9): + try: + import ssl + ssl._create_default_https_context = ssl._create_unverified_context + except: + pass + # Map method to the request.XXX method. This is a simple hack, but it # allows requests to apply more magic per method. See lib/requests/api.py. request_method = getattr(requests, method.lower()) From e7d7dc6b6b861ad2dee3bed915596e2380210ca8 Mon Sep 17 00:00:00 2001 From: jvs3 Date: Thu, 22 Oct 2015 22:21:36 +0200 Subject: [PATCH 24/44] Added the option to provide an extension when using 'Advanced Encoding' instead of using the extension of the format selected in 'Audio Properties'. --- data/interfaces/default/config.html | 5 +++++ headphones/music_encoder.py | 2 +- headphones/webserve.py | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index 5688ce6d..509f5585 100644 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -1402,6 +1402,11 @@ +
+ + + If different from format selected above +
diff --git a/headphones/music_encoder.py b/headphones/music_encoder.py index c25748f7..5f79e896 100644 --- a/headphones/music_encoder.py +++ b/headphones/music_encoder.py @@ -128,7 +128,7 @@ def encode(albumPath): logger.warn('Cannot re-encode .ogg %s', music.decode(headphones.SYS_ENCODING, 'replace')) else: encode = True - elif headphones.CONFIG.ENCODEROUTPUTFORMAT == 'mp3' or headphones.CONFIG.ENCODEROUTPUTFORMAT == 'm4a': + else: if music.decode(headphones.SYS_ENCODING, 'replace').lower().endswith('.' + headphones.CONFIG.ENCODEROUTPUTFORMAT) and (int(infoMusic.bitrate / 1000) <= headphones.CONFIG.BITRATE): logger.info('%s has bitrate <= %skb, will not be re-encoded', music, headphones.CONFIG.BITRATE) else: diff --git a/headphones/webserve.py b/headphones/webserve.py index 121e8667..5385ac31 100644 --- a/headphones/webserve.py +++ b/headphones/webserve.py @@ -1309,6 +1309,12 @@ class WebInterface(object): kwargs[plain_config] = kwargs[use_config] del kwargs[use_config] + # Check if encoderoutputformat is set multiple times + if len(kwargs['encoderoutputformat'][-1]) > 1: + kwargs['encoderoutputformat'] = kwargs['encoderoutputformat'][-1] + else: + kwargs['encoderoutputformat'] = kwargs['encoderoutputformat'][0] + extra_newznabs = [] for kwarg in [x for x in kwargs if x.startswith('newznab_host')]: newznab_host_key = kwarg From 80515658b0362f15c98bee2f3f2316155649764a Mon Sep 17 00:00:00 2001 From: Nate Date: Thu, 5 Nov 2015 14:38:19 +0100 Subject: [PATCH 25/44] Fixed a bug in track matching where only progress exactly divisible by 10 was displayed Bug prevented progress from being properly displayed on albums with a track count not divisible by 10 --- headphones/librarysync.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/headphones/librarysync.py b/headphones/librarysync.py index 17381d80..a8a67f4f 100644 --- a/headphones/librarysync.py +++ b/headphones/librarysync.py @@ -15,6 +15,7 @@ import os import headphones +import math from beets.mediafile import MediaFile, FileTypeError, UnreadableFileError @@ -190,6 +191,7 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, # We'll use this to give a % completion, just because the track matching might take a while song_count = 0 latest_artist = [] + last_completion_percentage = 0 for song in song_list: @@ -200,10 +202,11 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, logger.info("Now matching songs by %s" % song['ArtistName']) song_count += 1 - completion_percentage = float(song_count) / total_number_of_songs * 100 + completion_percentage = math.floor(float(song_count) / total_number_of_songs * 1000) / 10 - if completion_percentage % 10 == 0: + if completion_percentage >= (last_completion_percentage + 10): logger.info("Track matching is " + str(completion_percentage) + "% complete") + last_completion_percentage = completion_percentage #THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT #EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, From ef4a8442334aee486001f0da51b38afe0fb00b91 Mon Sep 17 00:00:00 2001 From: Ade Date: Sat, 14 Nov 2015 16:02:31 +1300 Subject: [PATCH 26/44] rutracker fix --- headphones/rutracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/headphones/rutracker.py b/headphones/rutracker.py index 0b7959b9..f106e133 100644 --- a/headphones/rutracker.py +++ b/headphones/rutracker.py @@ -176,7 +176,7 @@ class Rutracker(object): cookie = {'bb_dl': torrent_id} try: headers = {'Referer': url} - r = self.session.get(url=downloadurl, cookies=cookie, headers=headers, timeout=self.timeout) + r = self.session.post(url=downloadurl, cookies=cookie, headers=headers, timeout=self.timeout) return r.content except Exception as e: logger.error('Error getting torrent: %s', e) From 4b445ca4dfbcfeb1e5486bd51294f9f83c0c7852 Mon Sep 17 00:00:00 2001 From: Jamie Magee Date: Fri, 27 Nov 2015 10:48:04 +0100 Subject: [PATCH 27/44] Make ShellCheck suggested changes --- init-scripts/init-alt.freebsd | 26 +++++++++++++------------- init-scripts/init.freebsd | 18 +++++++++--------- init-scripts/init.ubuntu | 30 +++++++++++++----------------- 3 files changed, 35 insertions(+), 39 deletions(-) diff --git a/init-scripts/init-alt.freebsd b/init-scripts/init-alt.freebsd index 16fd5f97..308b92d0 100644 --- a/init-scripts/init-alt.freebsd +++ b/init-scripts/init-alt.freebsd @@ -27,17 +27,17 @@ rcvar=${name}_enable load_rc_config ${name} -: ${headphones_enable:="NO"} -: ${headphones_user:="_sabnzbd"} -: ${headphones_dir:="/usr/local/headphones"} -: ${headphones_chdir:="${headphones_dir}"} -: ${headphones_pid:="${headphones_dir}/headphones.pid"} -: ${headphones_conf:="${headphones_dir}/config.ini"} +: "${headphones_enable:="NO"}" +: "${headphones_user:="_sabnzbd"}" +: "${headphones_dir:="/usr/local/headphones"}" +: "${headphones_chdir:="${headphones_dir}"}" +: "${headphones_pid:="${headphones_dir}/headphones.pid"}" +: "${headphones_conf:="${headphones_dir}/config.ini"}" WGET="/usr/local/bin/wget" # You need wget for this script to safely shutdown Headphones. if [ -e "${headphones_conf}" ]; then - HOST=`grep -A64 "\[General\]" "${headphones_conf}"|egrep "^http_host"|perl -wple 's/^http_host = (.*)$/$1/'` - PORT=`grep -A64 "\[General\]" "${headphones_conf}"|egrep "^http_port"|perl -wple 's/^http_port = (.*)$/$1/'` + HOST=$(grep -A64 "\[General\]" "${headphones_conf}"|egrep "^http_host"|perl -wple 's/^http_host = (.*)$/$1/') + PORT=$(grep -A64 "\[General\]" "${headphones_conf}"|egrep "^http_port"|perl -wple 's/^http_port = (.*)$/$1/') fi status_cmd="${name}_status" @@ -53,15 +53,15 @@ if [ ! -x "${WGET}" ]; then fi # Ensure user is root when running this script. -if [ `id -u` != "0" ]; then +if [ "$(id -u)" != "0" ]; then echo "Oops, you should be root before running this!" exit 1 fi verify_headphones_pid() { # Make sure the pid corresponds to the Headphones process. - pid=`cat ${headphones_pid} 2>/dev/null` - ps -p ${pid} | grep -q "python ${headphones_dir}/Headphones.py" + pid=$(cat "${headphones_pid}" 2>/dev/null) + pgrep -F "${headphones_pid}" -q "python ${headphones_dir}/Headphones.py" return $? } @@ -73,10 +73,10 @@ headphones_stop() { fi echo "Stopping $name" verify_headphones_pid - ${WGET} -O - -q --user=${SBUSR} --password=${SBPWD} "http://${HOST}:${PORT}/shutdown/" >/dev/null + ${WGET} -O - -q --user="${SBUSR}" --password="${SBPWD}" "http://${HOST}:${PORT}/shutdown/" >/dev/null if [ -n "${pid}" ]; then - wait_for_pids ${pid} + wait_for_pids "${pid}" echo "Stopped $name" fi } diff --git a/init-scripts/init.freebsd b/init-scripts/init.freebsd index 3d2ca978..85d2cf14 100755 --- a/init-scripts/init.freebsd +++ b/init-scripts/init.freebsd @@ -28,11 +28,11 @@ rcvar=${name}_enable load_rc_config ${name} -: ${headphones_enable:="NO"} -: ${headphones_user:="_sabnzbd"} -: ${headphones_dir:="/usr/local/headphones"} -: ${headphones_chdir:="${headphones_dir}"} -: ${headphones_pid:="${headphones_dir}/headphones.pid"} +: "${headphones_enable:="NO"}" +: "${headphones_user:="_sabnzbd"}" +: "${headphones_dir:="/usr/local/headphones"}" +: "${headphones_chdir:="${headphones_dir}"}" +: "${headphones_pid:="${headphones_dir}/headphones.pid"}" status_cmd="${name}_status" stop_cmd="${name}_stop" @@ -41,15 +41,15 @@ command="/usr/sbin/daemon" command_args="-f -p ${headphones_pid} python ${headphones_dir}/Headphones.py ${headphones_flags} --quiet --nolaunch" # Ensure user is root when running this script. -if [ `id -u` != "0" ]; then +if [ "$(id -u)" != "0" ]; then echo "Oops, you should be root before running this!" exit 1 fi verify_headphones_pid() { # Make sure the pid corresponds to the Headphones process. - pid=`cat ${headphones_pid} 2>/dev/null` - ps -p ${pid} | grep -q "python ${headphones_dir}/Headphones.py" + pid=$(cat "${headphones_pid}" 2>/dev/null) + pgrep -F "${headphones_pid}" -q "python ${headphones_dir}/Headphones.py" return $? } @@ -58,7 +58,7 @@ headphones_stop() { echo "Stopping $name" verify_headphones_pid if [ -n "${pid}" ]; then - wait_for_pids ${pid} + wait_for_pids "${pid}" echo "Stopped" fi } diff --git a/init-scripts/init.ubuntu b/init-scripts/init.ubuntu index 7578ecd1..991f8e0a 100755 --- a/init-scripts/init.ubuntu +++ b/init-scripts/init.ubuntu @@ -32,7 +32,6 @@ ## HP_PIDFILE= #$PID_FILE, the location of headphones.pid, the default is /var/run/headphones/headphones.pid ## PYTHON_BIN= #$DAEMON, the location of the python binary, the default is /usr/bin/python ## HP_OPTS= #$EXTRA_DAEMON_OPTS, extra cli option for headphones, i.e. " --config=/home/headphones/config.ini" -## SSD_OPTS= #$EXTRA_SSD_OPTS, extra start-stop-daemon option like " --group=users" ## HP_PORT= #$PORT_OPTS, hardcoded port for the webserver, overrides value in config.ini ## ## EXAMPLE if want to run as different user @@ -101,9 +100,6 @@ load_settings() { # Extra daemon option like: HP_OPTS=" --config=/home/headphones/config.ini" EXTRA_DAEMON_OPTS=${HP_OPTS-} - # Extra start-stop-daemon option like START_OPTS=" --group=users" - EXTRA_SSD_OPTS=${SSD_OPTS-} - # Hardcoded port to run on, overrides config.ini settings [ -n "$HP_PORT" ] && { PORT_OPTS=" --port=${HP_PORT} " @@ -114,7 +110,7 @@ load_settings() { SETTINGS_LOADED=TRUE fi - [ -x $DAEMON ] || { + [ -x "$DAEMON" ] || { log_warning_msg "$DESC: Can't execute daemon, aborting. See $DAEMON"; return 1;} @@ -125,8 +121,8 @@ load_settings || exit 0 is_running () { # returns 1 when running, else 0. - if [ -e $PID_FILE ]; then - PID=`cat $PID_FILE` + if [ -e "$PID_FILE" ]; then + PID=$(cat "$PID_FILE") RET=$? [ $RET -gt 1 ] && exit 1 || return $RET @@ -136,28 +132,28 @@ is_running () { } handle_pid () { - PID_PATH=`dirname $PID_FILE` - [ -d $PID_PATH ] || mkdir -p $PID_PATH && chown -R $RUN_AS $PID_PATH > /dev/null || { + PID_PATH=$(dirname "$PID_FILE") + [ -d "$PID_PATH" ] || mkdir -p "$PID_PATH" && chown -R "$RUN_AS" "$PID_PATH" > /dev/null || { log_warning_msg "$DESC: Could not create $PID_FILE, See $SETTINGS, aborting."; return 1;} - if [ -e $PID_FILE ]; then - PID=`cat $PID_FILE` - if ! kill -0 $PID > /dev/null 2>&1; then + if [ -e "$PID_FILE" ]; then + PID=$(cat "$PID_FILE") + if ! kill -0 "$PID" > /dev/null 2>&1; then log_warning_msg "Removing stale $PID_FILE" - rm $PID_FILE + rm "$PID_FILE" fi fi } handle_datadir () { - [ -d $DATA_DIR ] || mkdir -p $DATA_DIR && chown -R $RUN_AS $DATA_DIR > /dev/null || { + [ -d "$DATA_DIR" ] || mkdir -p "$DATA_DIR" && chown -R "$RUN_AS" "$DATA_DIR" > /dev/null || { log_warning_msg "$DESC: Could not create $DATA_DIR, See $SETTINGS, aborting."; return 1;} } handle_updates () { - chown -R $RUN_AS $APP_PATH > /dev/null || { + chown -R "$RUN_AS" "$APP_PATH" > /dev/null || { log_warning_msg "$DESC: $APP_PATH not writable by $RUN_AS for web-updates"; return 0; } } @@ -168,7 +164,7 @@ start_headphones () { handle_updates if ! is_running; then log_daemon_msg "Starting $DESC" - start-stop-daemon -o -d $APP_PATH -c $RUN_AS --start $EXTRA_SSD_OPTS --pidfile $PID_FILE --exec $DAEMON -- $DAEMON_OPTS + start-stop-daemon -o -d "$APP_PATH" -c "$RUN_AS" --start "$EXTRA_SSD"_OPTS --pidfile "$PID_FILE" --exec "$DAEMON" -- "$DAEMON_OPTS" check_retval else log_success_msg "$DESC: already running (pid $PID)" @@ -178,7 +174,7 @@ start_headphones () { stop_headphones () { if is_running; then log_daemon_msg "Stopping $DESC" - start-stop-daemon -o --stop --pidfile $PID_FILE --retry 15 + start-stop-daemon -o --stop --pidfile "$PID_FILE" --retry 15 check_retval else log_success_msg "$DESC: not running" From 327585327f3a3bc5a3a967eed079aecab6778e6d Mon Sep 17 00:00:00 2001 From: Jamie Magee Date: Fri, 27 Nov 2015 13:55:11 +0100 Subject: [PATCH 28/44] pep8, pyflakes, and pylint suggested fixes --- headphones/__init__.py | 27 +- headphones/albumswitcher.py | 10 +- headphones/api.py | 23 +- headphones/cache.py | 49 ++-- headphones/classes.py | 9 +- headphones/common.py | 56 ++-- headphones/config.py | 11 +- headphones/cuesplit.py | 42 +-- headphones/db.py | 32 +-- headphones/getXldProfile.py | 14 +- headphones/helpers.py | 100 ++++---- headphones/importer.py | 304 +++++++++++++--------- headphones/lastfm.py | 13 +- headphones/librarysync.py | 186 ++++++++------ headphones/lock.py | 3 +- headphones/logger.py | 28 +- headphones/lyrics.py | 28 +- headphones/mb.py | 373 +++++++++++++++------------ headphones/metacritic.py | 21 +- headphones/music_encoder.py | 73 ++++-- headphones/notifiers.py | 180 +++++++------ headphones/nzbget.py | 50 ++-- headphones/postprocessor.py | 465 ++++++++++++++++++++++------------ headphones/request.py | 10 +- headphones/rutracker.py | 40 +-- headphones/sab.py | 35 ++- headphones/searcher.py | 394 ++++++++++++++++------------ headphones/torrentfinished.py | 8 +- headphones/transmission.py | 16 +- headphones/updater.py | 4 +- headphones/utorrent.py | 26 +- headphones/versioncheck.py | 38 +-- headphones/webserve.py | 445 ++++++++++++++++++++------------ headphones/webstart.py | 20 +- 34 files changed, 1878 insertions(+), 1255 deletions(-) diff --git a/headphones/__init__.py b/headphones/__init__.py index 52630d5f..eb48476d 100644 --- a/headphones/__init__.py +++ b/headphones/__init__.py @@ -16,21 +16,21 @@ # NZBGet support added by CurlyMo as a part of # XBian - XBMC on the Raspberry Pi -import os import sys import subprocess import threading import webbrowser import sqlite3 -import cherrypy import datetime +import os +import cherrypy from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.triggers.interval import IntervalTrigger - from headphones import versioncheck, logger import headphones.config + # (append new extras to the end) POSSIBLE_EXTRAS = [ "single", @@ -94,7 +94,6 @@ UMASK = None def initialize(config_file): - with INIT_LOCK: global CONFIG @@ -131,11 +130,11 @@ def initialize(config_file): if not QUIET: sys.stderr.write("Unable to create the log directory. " \ - "Logging to screen only.\n") + "Logging to screen only.\n") # Start the logger, disable console if needed logger.initLogger(console=not QUIET, log_dir=CONFIG.LOG_DIR, - verbose=VERBOSE) + verbose=VERBOSE) if not CONFIG.CACHE_DIR: # Put the cache dir in the data dir for now @@ -246,7 +245,6 @@ def daemonize(): def launch_browser(host, port, root): - if host == '0.0.0.0': host = 'localhost' @@ -287,17 +285,19 @@ def initialize_scheduler(): hours = CONFIG.UPDATE_DB_INTERVAL schedule_job(updater.dbUpdate, 'MusicBrainz Update', hours=hours, minutes=0) - #Update check + # Update check if CONFIG.CHECK_GITHUB: if CONFIG.CHECK_GITHUB_INTERVAL: minutes = CONFIG.CHECK_GITHUB_INTERVAL else: minutes = 0 - schedule_job(versioncheck.checkGithub, 'Check GitHub for updates', hours=0, minutes=minutes) + schedule_job(versioncheck.checkGithub, 'Check GitHub for updates', hours=0, + minutes=minutes) # Remove Torrent + data if Post Processed and finished Seeding minutes = CONFIG.TORRENT_REMOVAL_INTERVAL - schedule_job(torrentfinished.checkTorrentFinished, 'Torrent removal check', hours=0, minutes=minutes) + schedule_job(torrentfinished.checkTorrentFinished, 'Torrent removal check', hours=0, + minutes=minutes) # Start scheduler if start_jobs and len(SCHED.get_jobs()): @@ -306,8 +306,8 @@ def initialize_scheduler(): except Exception as e: logger.info(e) - # Debug - #SCHED.print_jobs() + # Debug + # SCHED.print_jobs() def schedule_job(function, name, hours=0, minutes=0): @@ -334,7 +334,6 @@ def schedule_job(function, name, hours=0, minutes=0): def start(): - global started if _INITIALIZED: @@ -349,7 +348,6 @@ def sig_handler(signum=None, frame=None): def dbcheck(): - conn = sqlite3.connect(DB_FILE) c = conn.cursor() c.execute( @@ -609,7 +607,6 @@ def dbcheck(): def shutdown(restart=False, update=False): - cherrypy.engine.exit() SCHED.shutdown(wait=False) diff --git a/headphones/albumswitcher.py b/headphones/albumswitcher.py index 1edf1937..d4d05aa1 100644 --- a/headphones/albumswitcher.py +++ b/headphones/albumswitcher.py @@ -53,7 +53,6 @@ def switch(AlbumID, ReleaseID): c.get_artwork_from_cache(AlbumID=AlbumID) for track in newtrackdata: - controlValueDict = {"TrackID": track['TrackID'], "AlbumID": AlbumID} @@ -79,15 +78,18 @@ def switch(AlbumID, ReleaseID): have_track_count = len(myDB.select( 'SELECT * from tracks WHERE AlbumID=? AND Location IS NOT NULL', [AlbumID])) - if oldalbumdata['Status'] == 'Skipped' and ((have_track_count / float(total_track_count)) >= (headphones.CONFIG.ALBUM_COMPLETION_PCT / 100.0)): + if oldalbumdata['Status'] == 'Skipped' and ((have_track_count / float(total_track_count)) >= ( + headphones.CONFIG.ALBUM_COMPLETION_PCT / 100.0)): myDB.action( 'UPDATE albums SET Status=? WHERE AlbumID=?', ['Downloaded', AlbumID]) # Update have track counts on index totaltracks = len(myDB.select( - 'SELECT TrackTitle from tracks WHERE ArtistID=? AND AlbumID IN (SELECT AlbumID FROM albums WHERE Status != "Ignored")', [newalbumdata['ArtistID']])) + 'SELECT TrackTitle from tracks WHERE ArtistID=? AND AlbumID IN (SELECT AlbumID FROM albums WHERE Status != "Ignored")', + [newalbumdata['ArtistID']])) havetracks = len(myDB.select( - 'SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [newalbumdata['ArtistID']])) + 'SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', + [newalbumdata['ArtistID']])) controlValueDict = {"ArtistID": newalbumdata['ArtistID']} diff --git a/headphones/api.py b/headphones/api.py index 6a51f55d..b5f2e26a 100644 --- a/headphones/api.py +++ b/headphones/api.py @@ -13,21 +13,25 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -from headphones import db, mb, updater, importer, searcher, cache, postprocessor, versioncheck, logger - -import headphones import json -cmd_list = ['getIndex', 'getArtist', 'getAlbum', 'getUpcoming', 'getWanted', 'getSnatched', 'getSimilar', 'getHistory', 'getLogs', - 'findArtist', 'findAlbum', 'addArtist', 'delArtist', 'pauseArtist', 'resumeArtist', 'refreshArtist', - 'addAlbum', 'queueAlbum', 'unqueueAlbum', 'forceSearch', 'forceProcess', 'forceActiveArtistsUpdate', - 'getVersion', 'checkGithub', 'shutdown', 'restart', 'update', 'getArtistArt', 'getAlbumArt', +from headphones import db, mb, updater, importer, searcher, cache, postprocessor, versioncheck, \ + logger +import headphones + +cmd_list = ['getIndex', 'getArtist', 'getAlbum', 'getUpcoming', 'getWanted', 'getSnatched', + 'getSimilar', 'getHistory', 'getLogs', + 'findArtist', 'findAlbum', 'addArtist', 'delArtist', 'pauseArtist', 'resumeArtist', + 'refreshArtist', + 'addAlbum', 'queueAlbum', 'unqueueAlbum', 'forceSearch', 'forceProcess', + 'forceActiveArtistsUpdate', + 'getVersion', 'checkGithub', 'shutdown', 'restart', 'update', 'getArtistArt', + 'getAlbumArt', 'getArtistInfo', 'getAlbumInfo', 'getArtistThumb', 'getAlbumThumb', 'clearLogs', 'choose_specific_download', 'download_specific_release'] class Api(object): - def __init__(self): self.apikey = None @@ -170,7 +174,7 @@ class Api(object): self.data = self._dic_from_query( "SELECT * from albums WHERE Status='Snatched'") return - + def _getSimilar(self, **kwargs): self.data = self._dic_from_query('SELECT * from lastfmcloud') return @@ -432,7 +436,6 @@ class Api(object): results_as_dicts = [] for result in results: - result_dict = { 'title': result[0], 'size': result[1], diff --git a/headphones/cache.py b/headphones/cache.py index 1089a71d..166d5577 100644 --- a/headphones/cache.py +++ b/headphones/cache.py @@ -14,8 +14,8 @@ # along with Headphones. If not, see . import os -import headphones +import headphones from headphones import db, helpers, logger, lastfm, request LASTFM_API_KEY = "690e1ed3bc00bc91804cd8f7fe5ed6d4" @@ -45,8 +45,8 @@ class Cache(object): def __init__(self): self.id = None - self.id_type = None # 'artist' or 'album' - set automatically depending on whether ArtistID or AlbumID is passed - self.query_type = None # 'artwork','thumb' or 'info' - set automatically + self.id_type = None # 'artist' or 'album' - set automatically depending on whether ArtistID or AlbumID is passed + self.query_type = None # 'artwork','thumb' or 'info' - set automatically self.artwork_files = [] self.thumb_files = [] @@ -182,13 +182,18 @@ class Cache(object): if ArtistID: self.id = ArtistID self.id_type = 'artist' - db_info = myDB.action('SELECT Summary, Content, LastUpdated FROM descriptions WHERE ArtistID=?', [self.id]).fetchone() + db_info = myDB.action( + 'SELECT Summary, Content, LastUpdated FROM descriptions WHERE ArtistID=?', + [self.id]).fetchone() else: self.id = AlbumID self.id_type = 'album' - db_info = myDB.action('SELECT Summary, Content, LastUpdated FROM descriptions WHERE ReleaseGroupID=?', [self.id]).fetchone() + db_info = myDB.action( + 'SELECT Summary, Content, LastUpdated FROM descriptions WHERE ReleaseGroupID=?', + [self.id]).fetchone() - if not db_info or not db_info['LastUpdated'] or not self._is_current(date=db_info['LastUpdated']): + if not db_info or not db_info['LastUpdated'] or not self._is_current( + date=db_info['LastUpdated']): self._update_cache() info_dict = {'Summary': self.info_summary, 'Content': self.info_content} @@ -309,13 +314,19 @@ class Cache(object): logger.debug('No artist thumbnail image found') else: - dbalbum = myDB.action('SELECT ArtistName, AlbumTitle, ReleaseID FROM albums WHERE AlbumID=?', [self.id]).fetchone() + dbalbum = myDB.action( + 'SELECT ArtistName, AlbumTitle, ReleaseID FROM albums WHERE AlbumID=?', + [self.id]).fetchone() if dbalbum['ReleaseID'] != self.id: - data = lastfm.request_lastfm("album.getinfo", mbid=dbalbum['ReleaseID'], api_key=LASTFM_API_KEY) + data = lastfm.request_lastfm("album.getinfo", mbid=dbalbum['ReleaseID'], + api_key=LASTFM_API_KEY) if not data: - data = lastfm.request_lastfm("album.getinfo", artist=dbalbum['ArtistName'], album=dbalbum['AlbumTitle'], api_key=LASTFM_API_KEY) + data = lastfm.request_lastfm("album.getinfo", artist=dbalbum['ArtistName'], + album=dbalbum['AlbumTitle'], + api_key=LASTFM_API_KEY) else: - data = lastfm.request_lastfm("album.getinfo", artist=dbalbum['ArtistName'], album=dbalbum['AlbumTitle'], api_key=LASTFM_API_KEY) + data = lastfm.request_lastfm("album.getinfo", artist=dbalbum['ArtistName'], + album=dbalbum['AlbumTitle'], api_key=LASTFM_API_KEY) if not data: return @@ -357,7 +368,8 @@ class Cache(object): # Save the image URL to the database if image_url: if self.id_type == 'artist': - myDB.action('UPDATE artists SET ArtworkURL=? WHERE ArtistID=?', [image_url, self.id]) + myDB.action('UPDATE artists SET ArtworkURL=? WHERE ArtistID=?', + [image_url, self.id]) else: myDB.action('UPDATE albums SET ArtworkURL=? WHERE AlbumID=?', [image_url, self.id]) @@ -378,7 +390,8 @@ class Cache(object): if not os.path.isdir(self.path_to_art_cache): try: os.makedirs(self.path_to_art_cache) - os.chmod(self.path_to_art_cache, int(headphones.CONFIG.FOLDER_PERMISSIONS, 8)) + os.chmod(self.path_to_art_cache, + int(headphones.CONFIG.FOLDER_PERMISSIONS, 8)) except OSError as e: logger.error('Unable to create artwork cache dir. Error: %s', e) self.artwork_errors = True @@ -393,7 +406,8 @@ class Cache(object): ext = os.path.splitext(image_url)[1] - artwork_path = os.path.join(self.path_to_art_cache, self.id + '.' + helpers.today() + ext) + artwork_path = os.path.join(self.path_to_art_cache, + self.id + '.' + helpers.today() + ext) try: with open(artwork_path, 'wb') as f: f.write(artwork) @@ -406,7 +420,8 @@ class Cache(object): # Grab the thumbnail as well if we're getting the full artwork (as long # as it's missing/outdated. - if thumb_url and self.query_type in ['thumb', 'artwork'] and not (self.thumb_files and self._is_current(self.thumb_files[0])): + if thumb_url and self.query_type in ['thumb', 'artwork'] and not ( + self.thumb_files and self._is_current(self.thumb_files[0])): artwork = request.request_content(thumb_url, timeout=20) if artwork: @@ -414,7 +429,8 @@ class Cache(object): if not os.path.isdir(self.path_to_art_cache): try: os.makedirs(self.path_to_art_cache) - os.chmod(self.path_to_art_cache, int(headphones.CONFIG.FOLDER_PERMISSIONS, 8)) + os.chmod(self.path_to_art_cache, + int(headphones.CONFIG.FOLDER_PERMISSIONS, 8)) except OSError as e: logger.error('Unable to create artwork cache dir. Error: %s' + e) self.thumb_errors = True @@ -429,7 +445,8 @@ class Cache(object): ext = os.path.splitext(image_url)[1] - thumb_path = os.path.join(self.path_to_art_cache, 'T_' + self.id + '.' + helpers.today() + ext) + thumb_path = os.path.join(self.path_to_art_cache, + 'T_' + self.id + '.' + helpers.today() + ext) try: with open(thumb_path, 'wb') as f: f.write(artwork) diff --git a/headphones/classes.py b/headphones/classes.py index 96315ba7..6015a0f2 100644 --- a/headphones/classes.py +++ b/headphones/classes.py @@ -13,9 +13,9 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -######################################### -## Stolen from Sick-Beard's classes.py ## -######################################### +####################################### +# Stolen from Sick-Beard's classes.py # +####################################### import urllib @@ -133,4 +133,5 @@ class Proper: self.episode = -1 def __str__(self): - return str(self.date) + " " + self.name + " " + str(self.season) + "x" + str(self.episode) + " of " + str(self.tvdbid) + return str(self.date) + " " + self.name + " " + str(self.season) + "x" + str( + self.episode) + " of " + str(self.tvdbid) diff --git a/headphones/common.py b/headphones/common.py index 02bfc3a2..0a33bde7 100644 --- a/headphones/common.py +++ b/headphones/common.py @@ -20,15 +20,16 @@ Created on Aug 1, 2011 ''' import platform import operator + import os import re - from headphones import version -#Identify Our Application + +# Identify Our Application USER_AGENT = 'Headphones/-' + version.HEADPHONES_VERSION + ' (' + platform.system() + ' ' + platform.release() + ')' -### Notification Types +# Notification Types NOTIFY_SNATCH = 1 NOTIFY_DOWNLOAD = 2 @@ -36,26 +37,25 @@ notifyStrings = {} notifyStrings[NOTIFY_SNATCH] = "Started Download" notifyStrings[NOTIFY_DOWNLOAD] = "Download Finished" -### Release statuses -UNKNOWN = -1 # should never happen -UNAIRED = 1 # releases that haven't dropped yet -SNATCHED = 2 # qualified with quality -WANTED = 3 # releases we don't have but want to get -DOWNLOADED = 4 # qualified with quality -SKIPPED = 5 # releases we don't want -ARCHIVED = 6 # releases that you don't have locally (counts toward download completion stats) -IGNORED = 7 # releases that you don't want included in your download stats -SNATCHED_PROPER = 9 # qualified with quality +# Release statuses +UNKNOWN = -1 # should never happen +UNAIRED = 1 # releases that haven't dropped yet +SNATCHED = 2 # qualified with quality +WANTED = 3 # releases we don't have but want to get +DOWNLOADED = 4 # qualified with quality +SKIPPED = 5 # releases we don't want +ARCHIVED = 6 # releases that you don't have locally (counts toward download completion stats) +IGNORED = 7 # releases that you don't want included in your download stats +SNATCHED_PROPER = 9 # qualified with quality class Quality: - NONE = 0 - B192 = 1 << 1 # 2 - VBR = 1 << 2 # 4 - B256 = 1 << 3 # 8 - B320 = 1 << 4 #16 - FLAC = 1 << 5 #32 + B192 = 1 << 1 # 2 + VBR = 1 << 2 # 4 + B256 = 1 << 3 # 8 + B320 = 1 << 4 # 16 + FLAC = 1 << 5 # 32 # put these bits at the other end of the spectrum, far enough out that they shouldn't interfere UNKNOWN = 1 << 15 @@ -75,7 +75,8 @@ class Quality: def _getStatusStrings(status): toReturn = {} for x in Quality.qualityStrings.keys(): - toReturn[Quality.compositeStatus(status, x)] = Quality.statusPrefixes[status] + " (" + Quality.qualityStrings[x] + ")" + toReturn[Quality.compositeStatus(status, x)] = Quality.statusPrefixes[status] + " (" + \ + Quality.qualityStrings[x] + ")" return toReturn @staticmethod @@ -103,6 +104,9 @@ class Quality: @staticmethod def nameQuality(name): + def checkName(list, func): + return func([re.search(x, name, re.I) for x in list]) + name = os.path.basename(name) # if we have our exact text then assume we put it there @@ -115,9 +119,7 @@ class Quality: if regex_match: return x - checkName = lambda list, func: func([re.search(x, name, re.I) for x in list]) - - #TODO: fix quality checking here + # TODO: fix quality checking here if checkName(["mp3", "192"], any) and not checkName(["flac"], all): return Quality.B192 elif checkName(["mp3", "256"], any) and not checkName(["flac"], all): @@ -131,7 +133,6 @@ class Quality: @staticmethod def assumeQuality(name): - if name.lower().endswith(".mp3"): return Quality.MP3 elif name.lower().endswith(".flac"): @@ -167,13 +168,16 @@ class Quality: SNATCHED = None SNATCHED_PROPER = None + Quality.DOWNLOADED = [Quality.compositeStatus(DOWNLOADED, x) for x in Quality.qualityStrings.keys()] Quality.SNATCHED = [Quality.compositeStatus(SNATCHED, x) for x in Quality.qualityStrings.keys()] -Quality.SNATCHED_PROPER = [Quality.compositeStatus(SNATCHED_PROPER, x) for x in Quality.qualityStrings.keys()] +Quality.SNATCHED_PROPER = [Quality.compositeStatus(SNATCHED_PROPER, x) for x in + Quality.qualityStrings.keys()] MP3 = Quality.combineQualities([Quality.B192, Quality.B256, Quality.B320, Quality.VBR], []) LOSSLESS = Quality.combineQualities([Quality.FLAC], []) -ANY = Quality.combineQualities([Quality.B192, Quality.B256, Quality.B320, Quality.VBR, Quality.FLAC], []) +ANY = Quality.combineQualities( + [Quality.B192, Quality.B256, Quality.B320, Quality.VBR, Quality.FLAC], []) qualityPresets = (MP3, LOSSLESS, ANY) qualityPresetStrings = {MP3: "MP3 (All bitrates 192+)", diff --git a/headphones/config.py b/headphones/config.py index 97cc5e6c..85f49799 100644 --- a/headphones/config.py +++ b/headphones/config.py @@ -1,7 +1,8 @@ -import headphones.logger import itertools + import os import re +import headphones.logger from configobj import ConfigObj @@ -14,6 +15,7 @@ def bool_int(value): value = 0 return int(bool(value)) + _CONFIG_DEFINITIONS = { 'ADD_ALBUM_ART': (int, 'General', 0), 'ADVANCEDENCODER': (str, 'General', ''), @@ -254,7 +256,7 @@ _CONFIG_DEFINITIONS = { 'UTORRENT_PASSWORD': (str, 'uTorrent', ''), 'UTORRENT_USERNAME': (str, 'uTorrent', ''), 'VERIFY_SSL_CERT': (bool_int, 'Advanced', 1), - 'WAIT_UNTIL_RELEASE_DATE' : (int, 'General', 0), + 'WAIT_UNTIL_RELEASE_DATE': (int, 'General', 0), 'WAFFLES': (int, 'Waffles', 0), 'WAFFLES_PASSKEY': (str, 'Waffles', ''), 'WAFFLES_RATIO': (str, 'Waffles', ''), @@ -272,6 +274,7 @@ _CONFIG_DEFINITIONS = { 'XLDPROFILE': (str, 'General', '') } + # pylint:disable=R0902 # it might be nice to refactor for fewer instance variables class Config(object): @@ -348,7 +351,7 @@ class Config(object): """ Return the extra newznab tuples """ extra_newznabs = list( itertools.izip(*[itertools.islice(self.EXTRA_NEWZNABS, i, None, 3) - for i in range(3)]) + for i in range(3)]) ) return extra_newznabs @@ -367,7 +370,7 @@ class Config(object): """ Return the extra torznab tuples """ extra_torznabs = list( itertools.izip(*[itertools.islice(self.EXTRA_TORZNABS, i, None, 3) - for i in range(3)]) + for i in range(3)]) ) return extra_torznabs diff --git a/headphones/cuesplit.py b/headphones/cuesplit.py index 55112368..6d7ae7a0 100755 --- a/headphones/cuesplit.py +++ b/headphones/cuesplit.py @@ -15,13 +15,13 @@ # Most of this lifted from here: https://github.com/SzieberthAdam/gneposis-cdgrab -import os import sys -import re import subprocess import copy import glob +import os +import re import headphones from headphones import logger from mutagen.flac import FLAC @@ -62,7 +62,7 @@ WAVE_FILE_TYPE_BY_EXTENSION = { '.flac': 'Free Lossless Audio Codec' } -#SHNTOOL_COMPATIBLE = ("Free Lossless Audio Codec", "Waveform Audio", "Monkey's Audio") +# SHNTOOL_COMPATIBLE = ("Free Lossless Audio Codec", "Waveform Audio", "Monkey's Audio") # TODO: Make this better! # this module-level variable is bad. :( @@ -288,7 +288,7 @@ class CueFile(File): global line_content c = self.content.splitlines() header_dict = {} - #remaining_headers = CUE_HEADER + # remaining_headers = CUE_HEADER remaining_headers = copy.copy(CUE_HEADER) line_index = 0 match = True @@ -314,7 +314,8 @@ class CueFile(File): line_content = c[line_index] search_result = re.search(CUE_TRACK, line_content, re.I) if not search_result: - raise ValueError('inconsistent CUE sheet, TRACK expected at line {0}'.format(line_index + 1)) + raise ValueError( + 'inconsistent CUE sheet, TRACK expected at line {0}'.format(line_index + 1)) track_nr = int(search_result.group(1)) line_index += 1 next_track = False @@ -353,7 +354,8 @@ class CueFile(File): track_meta['dcpflag'] = True line_index += 1 else: - raise ValueError('unknown entry in track error, line {0}'.format(line_index + 1)) + raise ValueError( + 'unknown entry in track error, line {0}'.format(line_index + 1)) else: next_track = True @@ -371,8 +373,8 @@ class CueFile(File): if not self.content: try: - with open(self.name, encoding="cp1252") as cue_file: - self.content = cue_file.read() + with open(self.name, encoding="cp1252") as cue_file: + self.content = cue_file.read() except: raise ValueError('Cant encode CUE Sheet.') @@ -406,9 +408,11 @@ class CueFile(File): for i in range(len(self.tracks)): if self.tracks[i]: if self.tracks[i].get('artist'): - content += 'track' + int_to_str(i) + 'artist' + '\t' + self.tracks[i].get('artist') + '\n' + content += 'track' + int_to_str(i) + 'artist' + '\t' + self.tracks[i].get( + 'artist') + '\n' if self.tracks[i].get('title'): - content += 'track' + int_to_str(i) + 'title' + '\t' + self.tracks[i].get('title') + '\n' + content += 'track' + int_to_str(i) + 'title' + '\t' + self.tracks[i].get( + 'title') + '\n' return content def htoa(self): @@ -449,7 +453,8 @@ class MetaFile(File): raise ValueError('Syntax error in album meta file') if not content['tracks'][int(parsed_track.group(1))]: content['tracks'][int(parsed_track.group(1))] = dict() - content['tracks'][int(parsed_track.group(1))][parsed_track.group(2)] = parsed_line.group(2) + content['tracks'][int(parsed_track.group(1))][ + parsed_track.group(2)] = parsed_line.group(2) else: content[parsed_line.group(1)] = parsed_line.group(2) @@ -472,15 +477,16 @@ class MetaFile(File): if 'genre' in CUE_META.content: common_tags['genre'] = CUE_META.content['genre'] - #freeform tags - #freeform_tags['country'] = self.content['country'] - #freeform_tags['releasedate'] = self.content['releasedate'] + # freeform tags + # freeform_tags['country'] = self.content['country'] + # freeform_tags['releasedate'] = self.content['releasedate'] return common_tags, freeform_tags def folders(self): artist = self.content['artist'] - album = self.content['date'] + ' - ' + self.content['title'] + ' (' + self.content['label'] + ' - ' + self.content['catalog'] + ')' + album = self.content['date'] + ' - ' + self.content['title'] + ' (' + self.content[ + 'label'] + ' - ' + self.content['catalog'] + ')' return artist, album def complete(self): @@ -535,6 +541,7 @@ class WaveFile(File): if self.type == 'Free Lossless Audio Codec': return FLAC(self.name) + def split(albumpath): global CUE_META os.chdir(albumpath) @@ -577,7 +584,8 @@ def split(albumpath): import getXldProfile xldprofile, xldformat, _ = getXldProfile.getXldProfile(headphones.CONFIG.XLDPROFILE) if not xldformat: - raise ValueError('Details for xld profile "%s" not found, cannot split cue' % (xldprofile)) + raise ValueError( + 'Details for xld profile "%s" not found, cannot split cue' % (xldprofile)) else: if headphones.CONFIG.ENCODERFOLDER: splitter = os.path.join(headphones.CONFIG.ENCODERFOLDER, 'xld') @@ -590,7 +598,7 @@ def split(albumpath): splitter = 'shntool' if splitter == 'shntool' and not check_splitter(splitter): - raise ValueError('Command not found, ensure shntool or xld installed') + raise ValueError('Command not found, ensure shntool or xld installed') # Determine if file can be split if wave.name_ext not in WAVE_FILE_TYPE_BY_EXTENSION.keys(): diff --git a/headphones/db.py b/headphones/db.py index 02d003e9..8d864aba 100644 --- a/headphones/db.py +++ b/headphones/db.py @@ -13,44 +13,41 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -##################################### -## Stolen from Sick-Beard's db.py ## -##################################### +################################### +# Stolen from Sick-Beard's db.py # +################################### from __future__ import with_statement -import os import sqlite3 +import os import headphones - from headphones import logger def dbFilename(filename="headphones.db"): - return os.path.join(headphones.DATA_DIR, filename) def getCacheSize(): - #this will protect against typecasting problems produced by empty string and None settings + # this will protect against typecasting problems produced by empty string and None settings if not headphones.CONFIG.CACHE_SIZEMB: - #sqlite will work with this (very slowly) + # sqlite will work with this (very slowly) return 0 return int(headphones.CONFIG.CACHE_SIZEMB) class DBConnection: - def __init__(self, filename="headphones.db"): self.filename = filename self.connection = sqlite3.connect(dbFilename(filename), timeout=20) - #don't wait for the disk to finish writing + # don't wait for the disk to finish writing self.connection.execute("PRAGMA synchronous = OFF") - #journal disabled since we never do rollbacks + # journal disabled since we never do rollbacks self.connection.execute("PRAGMA journal_mode = %s" % headphones.CONFIG.JOURNAL_MODE) - #64mb of cache memory,probably need to make it user configurable + # 64mb of cache memory,probably need to make it user configurable self.connection.execute("PRAGMA cache_size=-%s" % (getCacheSize() * 1024)) self.connection.row_factory = sqlite3.Row @@ -92,17 +89,20 @@ class DBConnection: def upsert(self, tableName, valueDict, keyDict): + def genParams(myDict): + return [x + " = ?" for x in myDict.keys()] + changesBefore = self.connection.total_changes - genParams = lambda myDict: [x + " = ?" for x in myDict.keys()] - - update_query = "UPDATE " + tableName + " SET " + ", ".join(genParams(valueDict)) + " WHERE " + " AND ".join(genParams(keyDict)) + update_query = "UPDATE " + tableName + " SET " + ", ".join( + genParams(valueDict)) + " WHERE " + " AND ".join(genParams(keyDict)) self.action(update_query, valueDict.values() + keyDict.values()) if self.connection.total_changes == changesBefore: insert_query = ( - "INSERT INTO " + tableName + " (" + ", ".join(valueDict.keys() + keyDict.keys()) + ")" + + "INSERT INTO " + tableName + " (" + ", ".join( + valueDict.keys() + keyDict.keys()) + ")" + " VALUES (" + ", ".join(["?"] * len(valueDict.keys() + keyDict.keys())) + ")" ) try: diff --git a/headphones/getXldProfile.py b/headphones/getXldProfile.py index 181751a7..6fd1ce2e 100755 --- a/headphones/getXldProfile.py +++ b/headphones/getXldProfile.py @@ -1,29 +1,29 @@ import os.path + import biplist from headphones import logger def getXldProfile(xldProfile): - xldProfileNotFound = xldProfile expanded = os.path.expanduser('~/Library/Preferences/jp.tmkk.XLD.plist') if not os.path.isfile(expanded): logger.warn("Could not find xld preferences at: %s", expanded) - return(xldProfileNotFound, None, None) + return (xldProfileNotFound, None, None) # Get xld preferences plist try: preferences = biplist.readPlist(expanded) except (biplist.InvalidPlistException, biplist.NotBinaryPlistException), e: logger.error("Error reading xld preferences plist: %s", e) - return(xldProfileNotFound, None, None) + return (xldProfileNotFound, None, None) if not isinstance(preferences, dict): logger.error("Error reading xld preferences plist, not a dict: %r", preferences) - return(xldProfileNotFound, None, None) + return (xldProfileNotFound, None, None) - profiles = preferences.get('Profiles', []) # pylint:disable=E1103 + profiles = preferences.get('Profiles', []) # pylint:disable=E1103 xldProfile = xldProfile.lower() for profile in profiles: @@ -175,6 +175,6 @@ def getXldProfile(xldProfile): if xldFormat and not xldBitrate: xldBitrate = 400 - return(xldProfileForCmd, xldFormat, xldBitrate) + return (xldProfileForCmd, xldFormat, xldBitrate) - return(xldProfileNotFound, None, None) + return (xldProfileNotFound, None, None) diff --git a/headphones/helpers.py b/headphones/helpers.py index 55a8778a..b81ab4ff 100644 --- a/headphones/helpers.py +++ b/headphones/helpers.py @@ -13,19 +13,19 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -from beets.mediafile import MediaFile, FileTypeError, UnreadableFileError - from operator import itemgetter - import unicodedata -import headphones import datetime -import fnmatch import shutil import time import sys + +import fnmatch import re import os +from beets.mediafile import MediaFile, FileTypeError, UnreadableFileError +import headphones + # Modified from https://github.com/Verrus/beets-plugin-featInTitle RE_FEATURING = re.compile(r"[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&|vs\.") @@ -35,7 +35,9 @@ RE_CD = re.compile(r"^(CD|dics)\s*[0-9]+$", re.I) def multikeysort(items, columns): - comparers = [((itemgetter(col[1:].strip()), -1) if col.startswith('-') else (itemgetter(col.strip()), 1)) for col in columns] + comparers = [ + ((itemgetter(col[1:].strip()), -1) if col.startswith('-') else (itemgetter(col.strip()), 1)) + for col in columns] def comparer(left, right): for fn, mult in comparers: @@ -56,7 +58,6 @@ def checked(variable): def radio(variable, pos): - if variable == pos: return 'Checked' else: @@ -107,7 +108,6 @@ def latinToAscii(unicrap): def convert_milliseconds(ms): - seconds = ms / 1000 gmtime = time.gmtime(seconds) if seconds > 3600: @@ -119,7 +119,6 @@ def convert_milliseconds(ms): def convert_seconds(s): - gmtime = time.gmtime(s) if s > 3600: minutes = time.strftime("%H:%M:%S", gmtime) @@ -141,7 +140,6 @@ def now(): def get_age(date): - try: split_date = date.split('-') except: @@ -149,14 +147,13 @@ def get_age(date): try: days_old = int(split_date[0]) * 365 + int(split_date[1]) * 30 + int(split_date[2]) - except (IndexError,ValueError): + except (IndexError, ValueError): days_old = False return days_old def bytes_to_mb(bytes): - mb = int(bytes) / 1048576 size = '%.1f MB' % mb return size @@ -172,7 +169,7 @@ def piratesize(size): split = size.split(" ") factor = float(split[0]) unit = split[1].upper() - + if unit == 'MIB': size = factor * 1048576 elif unit == 'MB': @@ -194,7 +191,6 @@ def piratesize(size): def replace_all(text, dic, normalize=False): - if not text: return '' @@ -221,15 +217,14 @@ def replace_illegal_chars(string, type="file"): def cleanName(string): - pass1 = latinToAscii(string).lower() - out_string = re.sub('[\.\-\/\!\@\#\$\%\^\&\*\(\)\+\-\"\'\,\;\:\[\]\{\}\<\>\=\_]', '', pass1).encode('utf-8') + out_string = re.sub('[\.\-\/\!\@\#\$\%\^\&\*\(\)\+\-\"\'\,\;\:\[\]\{\}\<\>\=\_]', '', + pass1).encode('utf-8') return out_string def cleanTitle(title): - title = re.sub('[\.\-\/\_]', ' ', title).lower() # Strip out extra whitespace @@ -312,16 +307,22 @@ def expand_subfolders(f): difference = max(path_depths) - min(path_depths) if difference > 0: - logger.info("Found %d media folders, but depth difference between lowest and deepest media folder is %d (expected zero). If this is a discography or a collection of albums, make sure albums are per folder.", len(media_folders), difference) + logger.info( + "Found %d media folders, but depth difference between lowest and deepest media folder is %d (expected zero). If this is a discography or a collection of albums, make sure albums are per folder.", + len(media_folders), difference) # While already failed, advice the user what he could try. We assume the # directory may contain separate CD's and maybe some extra's. The # structure may look like X albums at same depth, and (one or more) # extra folders with a higher depth. - extra_media_folders = [media_folder[:min(path_depths)] for media_folder in media_folders if len(media_folder) > min(path_depths)] - extra_media_folders = list(set([os.path.join(*media_folder) for media_folder in extra_media_folders])) + extra_media_folders = [media_folder[:min(path_depths)] for media_folder in media_folders if + len(media_folder) > min(path_depths)] + extra_media_folders = list( + set([os.path.join(*media_folder) for media_folder in extra_media_folders])) - logger.info("Please look at the following folder(s), since they cause the depth difference: %s", extra_media_folders) + logger.info( + "Please look at the following folder(s), since they cause the depth difference: %s", + extra_media_folders) return # Convert back to paths and remove duplicates, which may be there after @@ -368,7 +369,7 @@ def path_filter_patterns(paths, patterns, root=None): for path in paths[:]: if path_match_patterns(path, patterns): logger.debug("Path ignored by pattern: %s", - os.path.join(root or "", path)) + os.path.join(root or "", path)) ignored += 1 paths.remove(path) @@ -378,11 +379,11 @@ def path_filter_patterns(paths, patterns, root=None): def extract_data(s): - s = s.replace('_', ' ') - #headphones default format - pattern = re.compile(r'(?P.*?)\s\-\s(?P.*?)\s[\[\(](?P.*?)[\]\)]', re.VERBOSE) + # headphones default format + pattern = re.compile(r'(?P.*?)\s\-\s(?P.*?)\s[\[\(](?P.*?)[\]\)]', + re.VERBOSE) match = pattern.match(s) if match: @@ -391,7 +392,7 @@ def extract_data(s): year = match.group("year") return (name, album, year) - #Gonna take a guess on this one - might be enough to search on mb + # Gonna take a guess on this one - might be enough to search on mb pat = re.compile(r"(?P.*?)\s*-\s*(?P[^\[(-]*)") match = pat.match(s) @@ -468,7 +469,8 @@ def extract_metadata(f): old_album = new_albums[index] new_albums[index] = RE_CD_ALBUM.sub("", album).strip() - logger.debug("Stripped albumd number identifier: %s -> %s", old_album, new_albums[index]) + logger.debug("Stripped albumd number identifier: %s -> %s", old_album, + new_albums[index]) # Remove duplicates new_albums = list(set(new_albums)) @@ -498,7 +500,8 @@ def extract_metadata(f): return (artist, albums[0], years[0]) # Not sure what to do here. - logger.info("Found %d artists, %d albums and %d years in metadata, so ignoring", len(artists), len(albums), len(years)) + logger.info("Found %d artists, %d albums and %d years in metadata, so ignoring", len(artists), + len(albums), len(years)) logger.debug("Artists: %s, Albums: %s, Years: %s", artists, albums, years) return (None, None, None) @@ -524,8 +527,10 @@ def preserve_torrent_directory(albumpath): Copy torrent directory to headphones-modified to keep files for seeding. """ from headphones import logger - new_folder = os.path.join(albumpath, 'headphones-modified'.encode(headphones.SYS_ENCODING, 'replace')) - logger.info("Copying files to 'headphones-modified' subfolder to preserve downloaded files for seeding") + new_folder = os.path.join(albumpath, + 'headphones-modified'.encode(headphones.SYS_ENCODING, 'replace')) + logger.info( + "Copying files to 'headphones-modified' subfolder to preserve downloaded files for seeding") try: shutil.copytree(albumpath, new_folder) return new_folder @@ -578,7 +583,9 @@ def cue_split(albumpath): def extract_logline(s): # Default log format - pattern = re.compile(r'(?P.*?)\s\-\s(?P.*?)\s*\:\:\s(?P.*?)\s\:\s(?P.*)', re.VERBOSE) + pattern = re.compile( + r'(?P.*?)\s\-\s(?P.*?)\s*\:\:\s(?P.*?)\s\:\s(?P.*)', + re.VERBOSE) match = pattern.match(s) if match: timestamp = match.group("timestamp") @@ -593,7 +600,7 @@ def extract_logline(s): def extract_song_data(s): from headphones import logger - #headphones default format + # headphones default format pattern = re.compile(r'(?P.*?)\s\-\s(?P.*?)\s\[(?P.*?)\]', re.VERBOSE) match = pattern.match(s) @@ -605,7 +612,7 @@ def extract_song_data(s): else: logger.info("Couldn't parse %s into a valid default format", s) - #newzbin default format + # newzbin default format pattern = re.compile(r'(?P.*?)\s\-\s(?P.*?)\s\((?P\d+?\))', re.VERBOSE) match = pattern.match(s) if match: @@ -619,7 +626,6 @@ def extract_song_data(s): def smartMove(src, dest, delete=True): - from headphones import logger source_dir = os.path.dirname(src) @@ -640,7 +646,8 @@ def smartMove(src, dest, delete=True): os.rename(src, os.path.join(source_dir, newfile)) filename = newfile except Exception as e: - logger.warn('Error renaming %s: %s', src.decode(headphones.SYS_ENCODING, 'replace'), e) + logger.warn('Error renaming %s: %s', + src.decode(headphones.SYS_ENCODING, 'replace'), e) break try: @@ -650,7 +657,9 @@ def smartMove(src, dest, delete=True): shutil.copy(os.path.join(source_dir, filename), os.path.join(dest, filename)) return True except Exception as e: - logger.warn('Error moving file %s: %s', filename.decode(headphones.SYS_ENCODING, 'replace'), e) + logger.warn('Error moving file %s: %s', filename.decode(headphones.SYS_ENCODING, 'replace'), + e) + def walk_directory(basedir, followlinks=True): """ @@ -672,8 +681,8 @@ def walk_directory(basedir, followlinks=True): real_path = os.path.abspath(os.readlink(path)) if real_path in traversed: - logger.debug("Skipping '%s' since it is a symlink to "\ - "'%s', which is already visited.", path, real_path) + logger.debug("Skipping '%s' since it is a symlink to " \ + "'%s', which is already visited.", path, real_path) else: traversed.append(real_path) @@ -689,8 +698,9 @@ def walk_directory(basedir, followlinks=True): for result in _inner(*args): yield result + ######################### -#Sab renaming functions # +# Sab renaming functions # ######################### # TODO: Grab config values from sab to know when these options are checked. For now we'll just iterate through all combinations @@ -739,18 +749,20 @@ def sab_sanitize_foldername(name): if not name: name = 'unknown' - #maxlen = cfg.folder_max_length() - #if len(name) > maxlen: + # maxlen = cfg.folder_max_length() + # if len(name) > maxlen: # name = name[:maxlen] return name + def split_string(mystring, splitvar=','): mylist = [] for each_word in mystring.split(splitvar): mylist.append(each_word.strip()) return mylist + def create_https_certificates(ssl_cert, ssl_key): """ Create a pair of self-signed HTTPS certificares and store in them in @@ -768,11 +780,13 @@ def create_https_certificates(ssl_cert, ssl_key): # Create the CA Certificate cakey = createKeyPair(TYPE_RSA, 2048) careq = createCertRequest(cakey, CN="Certificate Authority") - cacert = createCertificate(careq, (careq, cakey), serial, (0, 60 * 60 * 24 * 365 * 10)) # ten years + cacert = createCertificate(careq, (careq, cakey), serial, + (0, 60 * 60 * 24 * 365 * 10)) # ten years pkey = createKeyPair(TYPE_RSA, 2048) req = createCertRequest(pkey, CN="Headphones") - cert = createCertificate(req, (cacert, cakey), serial, (0, 60 * 60 * 24 * 365 * 10)) # ten years + cert = createCertificate(req, (cacert, cakey), serial, + (0, 60 * 60 * 24 * 365 * 10)) # ten years # Save the key and certificate to disk try: diff --git a/headphones/importer.py b/headphones/importer.py index 81b6bf2a..3cc7f141 100644 --- a/headphones/importer.py +++ b/headphones/importer.py @@ -13,39 +13,39 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -from headphones import logger, helpers, db, mb, lastfm, metacritic - -from beets.mediafile import MediaFile - import time + +from headphones import logger, helpers, db, mb, lastfm, metacritic +from beets.mediafile import MediaFile import headphones blacklisted_special_artist_names = ['[anonymous]', '[data]', '[no artist]', - '[traditional]', '[unknown]', 'Various Artists'] + '[traditional]', '[unknown]', 'Various Artists'] blacklisted_special_artists = ['f731ccc4-e22a-43af-a747-64213329e088', - '33cf029c-63b0-41a0-9855-be2a3665fb3b', - '314e1c25-dde7-4e4d-b2f4-0a7b9f7c56dc', - 'eec63d3c-3b81-4ad4-b1e4-7c147d4d2b61', - '9be7f096-97ec-4615-8957-8d40b5dcbc41', - '125ec42a-7229-4250-afc5-e057484327fe', - '89ad4ac3-39f7-470e-963a-56509c546377'] + '33cf029c-63b0-41a0-9855-be2a3665fb3b', + '314e1c25-dde7-4e4d-b2f4-0a7b9f7c56dc', + 'eec63d3c-3b81-4ad4-b1e4-7c147d4d2b61', + '9be7f096-97ec-4615-8957-8d40b5dcbc41', + '125ec42a-7229-4250-afc5-e057484327fe', + '89ad4ac3-39f7-470e-963a-56509c546377'] def is_exists(artistid): myDB = db.DBConnection() # See if the artist is already in the database - artistlist = myDB.select('SELECT ArtistID, ArtistName from artists WHERE ArtistID=?', [artistid]) + artistlist = myDB.select('SELECT ArtistID, ArtistName from artists WHERE ArtistID=?', + [artistid]) if any(artistid in x for x in artistlist): - logger.info(artistlist[0][1] + u" is already in the database. Updating 'have tracks', but not artist information") + logger.info(artistlist[0][ + 1] + u" is already in the database. Updating 'have tracks', but not artist information") return True else: return False def artistlist_to_mbids(artistlist, forced=False): - for artist in artistlist: if not artist and artist != ' ': @@ -77,9 +77,12 @@ def artistlist_to_mbids(artistlist, forced=False): myDB = db.DBConnection() if not forced: - bl_artist = myDB.action('SELECT * FROM blacklist WHERE ArtistID=?', [artistid]).fetchone() + bl_artist = myDB.action('SELECT * FROM blacklist WHERE ArtistID=?', + [artistid]).fetchone() if bl_artist or artistid in blacklisted_special_artists: - logger.info("Artist ID for '%s' is either blacklisted or Various Artists. To add artist, you must do it manually (Artist ID: %s)" % (artist, artistid)) + logger.info( + "Artist ID for '%s' is either blacklisted or Various Artists. To add artist, you must do it manually (Artist ID: %s)" % ( + artist, artistid)) continue # Add to database if it doesn't exist @@ -88,7 +91,9 @@ def artistlist_to_mbids(artistlist, forced=False): # Just update the tracks if it does else: - havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=?', [artistid])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist])) + havetracks = len( + myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=?', [artistid])) + len( + myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, artistid]) # Delete it from the New Artists if the request came from there @@ -112,7 +117,6 @@ def addArtistIDListToDB(artistidlist): def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): - # Putting this here to get around the circular import. We're using this to update thumbnails for artist/albums from headphones import cache @@ -142,7 +146,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): "Status": "Loading", "IncludeExtras": headphones.CONFIG.INCLUDE_EXTRAS, "Extras": headphones.CONFIG.EXTRAS} - if type=="series": + if type == "series": newValueDict['Type'] = "series" else: newValueDict = {"Status": "Loading"} @@ -151,7 +155,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): myDB.upsert("artists", newValueDict, controlValueDict) - if type=="series": + if type == "series": artist = mb.getSeries(artistid) else: artist = mb.getArtist(artistid, extrasonly) @@ -159,7 +163,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): if artist and artist.get('artist_name') in blacklisted_special_artist_names: logger.warn('Cannot import blocked special purpose artist: %s' % artist.get('artist_name')) myDB.action('DELETE from artists WHERE ArtistID=?', [artistid]) - #in case it's already in the db + # in case it's already in the db myDB.action('DELETE from albums WHERE ArtistID=?', [artistid]) myDB.action('DELETE from tracks WHERE ArtistID=?', [artistid]) return @@ -168,7 +172,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): logger.warn("Error fetching artist info. ID: " + artistid) if dbartist is None: newValueDict = {"ArtistName": "Fetch failed, try refreshing. (%s)" % (artistid), - "Status": "Active"} + "Status": "Active"} else: newValueDict = {"Status": "Active"} myDB.upsert("artists", newValueDict, controlValueDict) @@ -191,7 +195,8 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): # See if we need to grab extras. Artist specific extras take precedence # over global option. Global options are set when adding a new artist try: - db_artist = myDB.action('SELECT IncludeExtras, Extras from artists WHERE ArtistID=?', [artistid]).fetchone() + db_artist = myDB.action('SELECT IncludeExtras, Extras from artists WHERE ArtistID=?', + [artistid]).fetchone() includeExtras = db_artist['IncludeExtras'] except IndexError: includeExtras = False @@ -206,9 +211,12 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): for groups in artist['releasegroups']: group_list.append(groups['id']) if not extrasonly: - remove_missing_groups_from_albums = myDB.select("SELECT AlbumID FROM albums WHERE ArtistID=?", [artistid]) + remove_missing_groups_from_albums = myDB.select( + "SELECT AlbumID FROM albums WHERE ArtistID=?", [artistid]) else: - remove_missing_groups_from_albums = myDB.select('SELECT AlbumID FROM albums WHERE ArtistID=? AND Status="Skipped" AND Type!="Album"', [artistid]) + remove_missing_groups_from_albums = myDB.select( + 'SELECT AlbumID FROM albums WHERE ArtistID=? AND Status="Skipped" AND Type!="Album"', + [artistid]) for items in remove_missing_groups_from_albums: if items['AlbumID'] not in group_list: # Remove all from albums/tracks that aren't in release groups @@ -217,12 +225,16 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): myDB.action("DELETE FROM tracks WHERE AlbumID=?", [items['AlbumID']]) myDB.action("DELETE FROM alltracks WHERE AlbumID=?", [items['AlbumID']]) myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [items['AlbumID']]) - logger.info("[%s] Removing all references to release group %s to reflect MusicBrainz refresh" % (artist['artist_name'], items['AlbumID'])) + logger.info( + "[%s] Removing all references to release group %s to reflect MusicBrainz refresh" % ( + artist['artist_name'], items['AlbumID'])) if not extrasonly: force_repackage = 1 else: if not extrasonly: - logger.info("[%s] There was either an error pulling data from MusicBrainz or there might not be any releases for this category" % artist['artist_name']) + logger.info( + "[%s] There was either an error pulling data from MusicBrainz or there might not be any releases for this category" % + artist['artist_name']) # Then search for releases within releasegroups, if releases don't exist, then remove from allalbums/alltracks album_searches = [] @@ -232,7 +244,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): today = helpers.today() rgid = rg['id'] skip_log = 0 - #Make a user configurable variable to skip update of albums with release dates older than this date (in days) + # Make a user configurable variable to skip update of albums with release dates older than this date (in days) pause_delta = headphones.CONFIG.MB_IGNORE_AGE rg_exists = myDB.action("SELECT * from albums WHERE AlbumID=?", [rg['id']]).fetchone() @@ -247,12 +259,14 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): new_release_group = True if new_release_group: - logger.info("[%s] Now adding: %s (New Release Group)" % (artist['artist_name'], rg['title'])) + logger.info("[%s] Now adding: %s (New Release Group)" % ( + artist['artist_name'], rg['title'])) new_releases = mb.get_new_releases(rgid, includeExtras) else: if check_release_date is None or check_release_date == u"None": - logger.info("[%s] Now updating: %s (No Release Date)" % (artist['artist_name'], rg['title'])) + logger.info("[%s] Now updating: %s (No Release Date)" % ( + artist['artist_name'], rg['title'])) new_releases = mb.get_new_releases(rgid, includeExtras, True) else: if len(check_release_date) == 10: @@ -264,20 +278,24 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): else: release_date = today if helpers.get_age(today) - helpers.get_age(release_date) < pause_delta: - logger.info("[%s] Now updating: %s (Release Date <%s Days)", artist['artist_name'], rg['title'], pause_delta) + logger.info("[%s] Now updating: %s (Release Date <%s Days)", + artist['artist_name'], rg['title'], pause_delta) new_releases = mb.get_new_releases(rgid, includeExtras, True) else: - logger.info("[%s] Skipping: %s (Release Date >%s Days)", artist['artist_name'], rg['title'], pause_delta) + logger.info("[%s] Skipping: %s (Release Date >%s Days)", + artist['artist_name'], rg['title'], pause_delta) skip_log = 1 new_releases = 0 if force_repackage == 1: new_releases = -1 - logger.info('[%s] Forcing repackage of %s (Release Group Removed)', artist['artist_name'], al_title) + logger.info('[%s] Forcing repackage of %s (Release Group Removed)', + artist['artist_name'], al_title) else: new_releases = new_releases else: - logger.info("[%s] Now adding/updating: %s (Comprehensive Force)", artist['artist_name'], rg['title']) + logger.info("[%s] Now adding/updating: %s (Comprehensive Force)", artist['artist_name'], + rg['title']) new_releases = mb.get_new_releases(rgid, includeExtras, forcefull) if new_releases != 0: @@ -291,23 +309,26 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): # This will be used later to build a hybrid release fullreleaselist = [] # Search for releases within a release group - find_hybrid_releases = myDB.action("SELECT * from allalbums WHERE AlbumID=?", [rg['id']]) + find_hybrid_releases = myDB.action("SELECT * from allalbums WHERE AlbumID=?", + [rg['id']]) # Build the dictionary for the fullreleaselist for items in find_hybrid_releases: - if items['ReleaseID'] != rg['id']: #don't include hybrid information, since that's what we're replacing + if items['ReleaseID'] != rg[ + 'id']: # don't include hybrid information, since that's what we're replacing hybrid_release_id = items['ReleaseID'] newValueDict = {"ArtistID": items['ArtistID'], - "ArtistName": items['ArtistName'], - "AlbumTitle": items['AlbumTitle'], - "AlbumID": items['AlbumID'], - "AlbumASIN": items['AlbumASIN'], - "ReleaseDate": items['ReleaseDate'], - "Type": items['Type'], - "ReleaseCountry": items['ReleaseCountry'], - "ReleaseFormat": items['ReleaseFormat'] - } - find_hybrid_tracks = myDB.action("SELECT * from alltracks WHERE ReleaseID=?", [hybrid_release_id]) + "ArtistName": items['ArtistName'], + "AlbumTitle": items['AlbumTitle'], + "AlbumID": items['AlbumID'], + "AlbumASIN": items['AlbumASIN'], + "ReleaseDate": items['ReleaseDate'], + "Type": items['Type'], + "ReleaseCountry": items['ReleaseCountry'], + "ReleaseFormat": items['ReleaseFormat'] + } + find_hybrid_tracks = myDB.action("SELECT * from alltracks WHERE ReleaseID=?", + [hybrid_release_id]) totalTracks = 1 hybrid_track_array = [] for hybrid_tracks in find_hybrid_tracks: @@ -315,9 +336,9 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): 'number': hybrid_tracks['TrackNumber'], 'title': hybrid_tracks['TrackTitle'], 'id': hybrid_tracks['TrackID'], - #'url': hybrid_tracks['TrackURL'], + # 'url': hybrid_tracks['TrackURL'], 'duration': hybrid_tracks['TrackDuration'] - }) + }) totalTracks += 1 newValueDict['ReleaseID'] = hybrid_release_id newValueDict['Tracks'] = hybrid_track_array @@ -327,10 +348,12 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): # This may end up being called with an empty fullreleaselist try: hybridrelease = getHybridRelease(fullreleaselist) - logger.info('[%s] Packaging %s releases into hybrid title' % (artist['artist_name'], rg['title'])) + logger.info('[%s] Packaging %s releases into hybrid title' % ( + artist['artist_name'], rg['title'])) except Exception as e: errors = True - logger.warn('[%s] Unable to get hybrid release information for %s: %s' % (artist['artist_name'], rg['title'], e)) + logger.warn('[%s] Unable to get hybrid release information for %s: %s' % ( + artist['artist_name'], rg['title'], e)) continue # Use the ReleaseGroupID as the ReleaseID for the hybrid release to differentiate it @@ -345,13 +368,14 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): "AlbumASIN": hybridrelease['AlbumASIN'], "ReleaseDate": hybridrelease['ReleaseDate'], "Type": rg['type'] - } + } myDB.upsert("allalbums", newValueDict, controlValueDict) for track in hybridrelease['Tracks']: - cleanname = helpers.cleanName(artist['artist_name'] + ' ' + rg['title'] + ' ' + track['title']) + cleanname = helpers.cleanName( + artist['artist_name'] + ' ' + rg['title'] + ' ' + track['title']) controlValueDict = {"TrackID": track['id'], "ReleaseID": rg['id']} @@ -365,25 +389,29 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): "TrackDuration": track['duration'], "TrackNumber": track['number'], "CleanName": cleanname - } + } - match = myDB.action('SELECT Location, BitRate, Format from have WHERE CleanName=?', [cleanname]).fetchone() + match = myDB.action('SELECT Location, BitRate, Format from have WHERE CleanName=?', + [cleanname]).fetchone() if not match: - match = myDB.action('SELECT Location, BitRate, Format from have WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [artist['artist_name'], rg['title'], track['title']]).fetchone() - #if not match: - #match = myDB.action('SELECT Location, BitRate, Format from have WHERE TrackID=?', [track['id']]).fetchone() + match = myDB.action( + 'SELECT Location, BitRate, Format from have WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', + [artist['artist_name'], rg['title'], track['title']]).fetchone() + # if not match: + # match = myDB.action('SELECT Location, BitRate, Format from have WHERE TrackID=?', [track['id']]).fetchone() if match: newValueDict['Location'] = match['Location'] newValueDict['BitRate'] = match['BitRate'] newValueDict['Format'] = match['Format'] - #myDB.action('UPDATE have SET Matched="True" WHERE Location=?', [match['Location']]) - myDB.action('UPDATE have SET Matched=? WHERE Location=?', (rg['id'], match['Location'])) + # myDB.action('UPDATE have SET Matched="True" WHERE Location=?', [match['Location']]) + myDB.action('UPDATE have SET Matched=? WHERE Location=?', + (rg['id'], match['Location'])) myDB.upsert("alltracks", newValueDict, controlValueDict) # Delete matched tracks from the have table - #myDB.action('DELETE from have WHERE Matched="True"') + # myDB.action('DELETE from have WHERE Matched="True"') # If there's no release in the main albums tables, add the default (hybrid) # If there is a release, check the ReleaseID against the AlbumID to see if they differ (user updated) @@ -408,7 +436,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): "Type": album['Type'], "ReleaseCountry": album['ReleaseCountry'], "ReleaseFormat": album['ReleaseFormat'] - } + } if rg_exists: newValueDict['DateAdded'] = rg_exists['DateAdded'] @@ -425,14 +453,17 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): newValueDict['Status'] = "Wanted" # Sometimes "new" albums are added to musicbrainz after their release date, so let's try to catch these # The first test just makes sure we have year-month-day - elif helpers.get_age(album['ReleaseDate']) and helpers.get_age(today) - helpers.get_age(album['ReleaseDate']) < 21 and headphones.CONFIG.AUTOWANT_UPCOMING: + elif helpers.get_age(album['ReleaseDate']) and helpers.get_age( + today) - helpers.get_age( + album['ReleaseDate']) < 21 and headphones.CONFIG.AUTOWANT_UPCOMING: newValueDict['Status'] = "Wanted" else: newValueDict['Status'] = "Skipped" myDB.upsert("albums", newValueDict, controlValueDict) - tracks = myDB.action('SELECT * from alltracks WHERE ReleaseID=?', [releaseid]).fetchall() + tracks = myDB.action('SELECT * from alltracks WHERE ReleaseID=?', + [releaseid]).fetchall() # This is used to see how many tracks you have from an album - to # mark it as downloaded. Default is 80%, can be set in config as @@ -441,7 +472,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): if total_track_count == 0: logger.warning("Total track count is zero for Release ID " + - "'%s', skipping.", releaseid) + "'%s', skipping.", releaseid) continue for track in tracks: @@ -449,35 +480,43 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): "AlbumID": rg['id']} newValueDict = {"ArtistID": track['ArtistID'], - "ArtistName": track['ArtistName'], - "AlbumTitle": track['AlbumTitle'], - "AlbumASIN": track['AlbumASIN'], - "ReleaseID": track['ReleaseID'], - "TrackTitle": track['TrackTitle'], - "TrackDuration": track['TrackDuration'], - "TrackNumber": track['TrackNumber'], - "CleanName": track['CleanName'], - "Location": track['Location'], - "Format": track['Format'], - "BitRate": track['BitRate'] - } + "ArtistName": track['ArtistName'], + "AlbumTitle": track['AlbumTitle'], + "AlbumASIN": track['AlbumASIN'], + "ReleaseID": track['ReleaseID'], + "TrackTitle": track['TrackTitle'], + "TrackDuration": track['TrackDuration'], + "TrackNumber": track['TrackNumber'], + "CleanName": track['CleanName'], + "Location": track['Location'], + "Format": track['Format'], + "BitRate": track['BitRate'] + } myDB.upsert("tracks", newValueDict, controlValueDict) # Mark albums as downloaded if they have at least 80% (by default, configurable) of the album - have_track_count = len(myDB.select('SELECT * from tracks WHERE AlbumID=? AND Location IS NOT NULL', [rg['id']])) + have_track_count = len( + myDB.select('SELECT * from tracks WHERE AlbumID=? AND Location IS NOT NULL', + [rg['id']])) marked_as_downloaded = False if rg_exists: - if rg_exists['Status'] == 'Skipped' and ((have_track_count / float(total_track_count)) >= (headphones.CONFIG.ALBUM_COMPLETION_PCT / 100.0)): - myDB.action('UPDATE albums SET Status=? WHERE AlbumID=?', ['Downloaded', rg['id']]) + if rg_exists['Status'] == 'Skipped' and ( + (have_track_count / float(total_track_count)) >= ( + headphones.CONFIG.ALBUM_COMPLETION_PCT / 100.0)): + myDB.action('UPDATE albums SET Status=? WHERE AlbumID=?', + ['Downloaded', rg['id']]) marked_as_downloaded = True else: - if (have_track_count / float(total_track_count)) >= (headphones.CONFIG.ALBUM_COMPLETION_PCT / 100.0): - myDB.action('UPDATE albums SET Status=? WHERE AlbumID=?', ['Downloaded', rg['id']]) + if (have_track_count / float(total_track_count)) >= ( + headphones.CONFIG.ALBUM_COMPLETION_PCT / 100.0): + myDB.action('UPDATE albums SET Status=? WHERE AlbumID=?', + ['Downloaded', rg['id']]) marked_as_downloaded = True - logger.info(u"[%s] Seeing if we need album art for %s" % (artist['artist_name'], rg['title'])) + logger.info( + u"[%s] Seeing if we need album art for %s" % (artist['artist_name'], rg['title'])) cache.getThumb(AlbumID=rg['id']) # Start a search for the album if it's new, hasn't been marked as @@ -487,7 +526,8 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): album_searches.append(rg['id']) else: if skip_log == 0: - logger.info(u"[%s] No new releases, so no changes made to %s" % (artist['artist_name'], rg['title'])) + logger.info(u"[%s] No new releases, so no changes made to %s" % ( + artist['artist_name'], rg['title'])) time.sleep(3) finalize_update(artistid, artist['artist_name'], errors) @@ -499,7 +539,9 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): metacritic.update(artistid, artist['artist_name'], artist['releasegroups']) if errors: - logger.info("[%s] Finished updating artist: %s but with errors, so not marking it as updated in the database" % (artist['artist_name'], artist['artist_name'])) + logger.info( + "[%s] Finished updating artist: %s but with errors, so not marking it as updated in the database" % ( + artist['artist_name'], artist['artist_name'])) else: myDB.action('DELETE FROM newartists WHERE ArtistName = ?', [artist['artist_name']]) logger.info(u"Updating complete for: %s" % artist['artist_name']) @@ -518,10 +560,18 @@ def finalize_update(artistid, artistname, errors=False): myDB = db.DBConnection() - latestalbum = myDB.action('SELECT AlbumTitle, ReleaseDate, AlbumID from albums WHERE ArtistID=? order by ReleaseDate DESC', [artistid]).fetchone() - totaltracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND AlbumID IN (SELECT AlbumID FROM albums WHERE Status != "Ignored")', [artistid])) - #havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [artistid])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist['artist_name']])) - havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [artistid])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [artistname])) + latestalbum = myDB.action( + 'SELECT AlbumTitle, ReleaseDate, AlbumID from albums WHERE ArtistID=? order by ReleaseDate DESC', + [artistid]).fetchone() + totaltracks = len(myDB.select( + 'SELECT TrackTitle from tracks WHERE ArtistID=? AND AlbumID IN (SELECT AlbumID FROM albums WHERE Status != "Ignored")', + [artistid])) + # havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [artistid])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ?', [artist['artist_name']])) + havetracks = len( + myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', + [artistid])) + len( + myDB.select('SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', + [artistname])) controlValueDict = {"ArtistID": artistid} @@ -544,7 +594,6 @@ def finalize_update(artistid, artistname, errors=False): def addReleaseById(rid, rgid=None): - myDB = db.DBConnection() # Create minimum info upfront if added from searchresults @@ -563,14 +612,18 @@ def addReleaseById(rid, rgid=None): rgid = None artistid = None release_dict = None - results = myDB.select("SELECT albums.ArtistID, releases.ReleaseGroupID from releases, albums WHERE releases.ReleaseID=? and releases.ReleaseGroupID=albums.AlbumID LIMIT 1", [rid]) + results = myDB.select( + "SELECT albums.ArtistID, releases.ReleaseGroupID from releases, albums WHERE releases.ReleaseID=? and releases.ReleaseGroupID=albums.AlbumID LIMIT 1", + [rid]) for result in results: rgid = result['ReleaseGroupID'] artistid = result['ArtistID'] - logger.debug("Found a cached releaseid : releasegroupid relationship: " + rid + " : " + rgid) + logger.debug( + "Found a cached releaseid : releasegroupid relationship: " + rid + " : " + rgid) if not rgid: - #didn't find it in the cache, get the information from MB - logger.debug("Didn't find releaseID " + rid + " in the cache. Looking up its ReleaseGroupID") + # didn't find it in the cache, get the information from MB + logger.debug( + "Didn't find releaseID " + rid + " in the cache. Looking up its ReleaseGroupID") try: release_dict = mb.getRelease(rid) except Exception as e: @@ -587,10 +640,10 @@ def addReleaseById(rid, rgid=None): rgid = release_dict['rgid'] artistid = release_dict['artist_id'] - #we don't want to make more calls to MB here unless we have to, could be happening quite a lot + # we don't want to make more calls to MB here unless we have to, could be happening quite a lot rg_exists = myDB.select("SELECT * from albums WHERE AlbumID=?", [rgid]) - #make sure the artist exists since I don't know what happens later if it doesn't + # make sure the artist exists since I don't know what happens later if it doesn't artist_exists = myDB.select("SELECT * from artists WHERE ArtistID=?", [artistid]) if not artist_exists and release_dict: @@ -599,7 +652,8 @@ def addReleaseById(rid, rgid=None): else: sortname = release_dict['artist_name'] - logger.info(u"Now manually adding: " + release_dict['artist_name'] + " - with status Paused") + logger.info( + u"Now manually adding: " + release_dict['artist_name'] + " - with status Paused") controlValueDict = {"ArtistID": release_dict['artist_id']} newValueDict = {"ArtistName": release_dict['artist_name'], "ArtistSortName": sortname, @@ -624,13 +678,14 @@ def addReleaseById(rid, rgid=None): myDB.upsert("artists", newValueDict, controlValueDict) elif not artist_exists and not release_dict: - logger.error("Artist does not exist in the database and did not get a valid response from MB. Skipping release.") + logger.error( + "Artist does not exist in the database and did not get a valid response from MB. Skipping release.") if status == 'Loading': myDB.action("DELETE FROM albums WHERE AlbumID=?", [rgid]) return - if not rg_exists and release_dict or status == 'Loading' and release_dict: #it should never be the case that we have an rg and not the artist - #but if it is this will fail + if not rg_exists and release_dict or status == 'Loading' and release_dict: # it should never be the case that we have an rg and not the artist + # but if it is this will fail logger.info(u"Now adding-by-id album (" + release_dict['title'] + ") from id: " + rgid) controlValueDict = {"AlbumID": rgid} if status != 'Loading': @@ -639,7 +694,8 @@ def addReleaseById(rid, rgid=None): newValueDict = {"ArtistID": release_dict['artist_id'], "ReleaseID": rgid, "ArtistName": release_dict['artist_name'], - "AlbumTitle": release_dict['title'] if 'title' in release_dict else release_dict['rg_title'], + "AlbumTitle": release_dict['title'] if 'title' in release_dict else + release_dict['rg_title'], "AlbumASIN": release_dict['asin'], "ReleaseDate": release_dict['date'], "DateAdded": helpers.today(), @@ -650,41 +706,48 @@ def addReleaseById(rid, rgid=None): myDB.upsert("albums", newValueDict, controlValueDict) - #keep a local cache of these so that external programs that are adding releasesByID don't hammer MB + # keep a local cache of these so that external programs that are adding releasesByID don't hammer MB myDB.action('INSERT INTO releases VALUES( ?, ?)', [rid, release_dict['rgid']]) for track in release_dict['tracks']: - cleanname = helpers.cleanName(release_dict['artist_name'] + ' ' + release_dict['rg_title'] + ' ' + track['title']) + cleanname = helpers.cleanName( + release_dict['artist_name'] + ' ' + release_dict['rg_title'] + ' ' + track['title']) controlValueDict = {"TrackID": track['id'], "AlbumID": rgid} newValueDict = {"ArtistID": release_dict['artist_id'], - "ArtistName": release_dict['artist_name'], - "AlbumTitle": release_dict['rg_title'], - "AlbumASIN": release_dict['asin'], - "TrackTitle": track['title'], - "TrackDuration": track['duration'], - "TrackNumber": track['number'], - "CleanName": cleanname - } + "ArtistName": release_dict['artist_name'], + "AlbumTitle": release_dict['rg_title'], + "AlbumASIN": release_dict['asin'], + "TrackTitle": track['title'], + "TrackDuration": track['duration'], + "TrackNumber": track['number'], + "CleanName": cleanname + } - match = myDB.action('SELECT Location, BitRate, Format, Matched from have WHERE CleanName=?', [cleanname]).fetchone() + match = myDB.action( + 'SELECT Location, BitRate, Format, Matched from have WHERE CleanName=?', + [cleanname]).fetchone() if not match: - match = myDB.action('SELECT Location, BitRate, Format, Matched from have WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [release_dict['artist_name'], release_dict['rg_title'], track['title']]).fetchone() + match = myDB.action( + 'SELECT Location, BitRate, Format, Matched from have WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', + [release_dict['artist_name'], release_dict['rg_title'], + track['title']]).fetchone() - #if not match: - #match = myDB.action('SELECT Location, BitRate, Format from have WHERE TrackID=?', [track['id']]).fetchone() + # if not match: + # match = myDB.action('SELECT Location, BitRate, Format from have WHERE TrackID=?', [track['id']]).fetchone() if match: newValueDict['Location'] = match['Location'] newValueDict['BitRate'] = match['BitRate'] newValueDict['Format'] = match['Format'] - #myDB.action('DELETE from have WHERE Location=?', [match['Location']]) + # myDB.action('DELETE from have WHERE Location=?', [match['Location']]) # If the album has been scanned before adding the release it will be unmatched, update to matched if match['Matched'] == 'Failed': - myDB.action('UPDATE have SET Matched=? WHERE Location=?', (release_dict['rgid'], match['Location'])) + myDB.action('UPDATE have SET Matched=? WHERE Location=?', + (release_dict['rgid'], match['Location'])) myDB.upsert("tracks", newValueDict, controlValueDict) @@ -703,7 +766,8 @@ def addReleaseById(rid, rgid=None): searcher.searchforalbum(rgid, False) elif not rg_exists and not release_dict: - logger.error("ReleaseGroup does not exist in the database and did not get a valid response from MB. Skipping release.") + logger.error( + "ReleaseGroup does not exist in the database and did not get a valid response from MB. Skipping release.") if status == 'Loading': myDB.action("DELETE FROM albums WHERE AlbumID=?", [rgid]) return @@ -811,11 +875,13 @@ def getHybridRelease(fullreleaselist): sortable_release_list.sort(key=lambda x: getSortableReleaseDate(x['releasedate'])) - average_tracks = sum(x['trackscount'] for x in sortable_release_list) / float(len(sortable_release_list)) + average_tracks = sum(x['trackscount'] for x in sortable_release_list) / float( + len(sortable_release_list)) for item in sortable_release_list: item['trackscount_delta'] = abs(average_tracks - item['trackscount']) - a = helpers.multikeysort(sortable_release_list, ['-hasasin', 'country', 'format', 'trackscount_delta']) + a = helpers.multikeysort(sortable_release_list, + ['-hasasin', 'country', 'format', 'trackscount_delta']) release_dict = {'ReleaseDate': sortable_release_list[0]['releasedate'], 'Tracks': a[0]['tracks'], diff --git a/headphones/lastfm.py b/headphones/lastfm.py index 720c8dd9..89ab7448 100644 --- a/headphones/lastfm.py +++ b/headphones/lastfm.py @@ -14,15 +14,14 @@ # along with Headphones. If not, see . import random -import headphones -import headphones.lock - -from headphones import db, logger, request - from collections import defaultdict -TIMEOUT = 60.0 # seconds -REQUEST_LIMIT = 1.0 / 5 # seconds +import headphones +import headphones.lock +from headphones import db, logger, request + +TIMEOUT = 60.0 # seconds +REQUEST_LIMIT = 1.0 / 5 # seconds ENTRY_POINT = "http://ws.audioscrobbler.com/2.0/" API_KEY = "395e6ec6bb557382fc41fde867bce66f" diff --git a/headphones/librarysync.py b/headphones/librarysync.py index 17381d80..35247442 100644 --- a/headphones/librarysync.py +++ b/headphones/librarysync.py @@ -14,17 +14,16 @@ # along with Headphones. If not, see . import os + import headphones - from beets.mediafile import MediaFile, FileTypeError, UnreadableFileError - from headphones import db, logger, helpers, importer, lastfm + # You can scan a single directory and append it to the current library by # specifying append=True, ArtistID and ArtistName. def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, - cron=False, artistScan=False): - + cron=False, artistScan=False): if cron and not headphones.CONFIG.LIBRARYSCAN: return @@ -40,7 +39,8 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, dir = dir.encode(headphones.SYS_ENCODING) if not os.path.isdir(dir): - logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, 'replace')) + logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, + 'replace')) return myDB = db.DBConnection() @@ -50,13 +50,16 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, if not append: # Clean up bad filepaths - tracks = myDB.select('SELECT Location from alltracks WHERE Location IS NOT NULL UNION SELECT Location from tracks WHERE Location IS NOT NULL') + tracks = myDB.select( + 'SELECT Location from alltracks WHERE Location IS NOT NULL UNION SELECT Location from tracks WHERE Location IS NOT NULL') for track in tracks: encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING, 'replace') if not os.path.isfile(encoded_track_string): - myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) - myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']]) + myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', + [None, None, None, track['Location']]) + myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', + [None, None, None, track['Location']]) del_have_tracks = myDB.select('SELECT Location, Matched, ArtistName from have') @@ -67,7 +70,9 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, # Make sure deleted files get accounted for when updating artist track counts new_artists.append(track['ArtistName']) myDB.action('DELETE FROM have WHERE Location=?', [track['Location']]) - logger.info('File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode(headphones.SYS_ENCODING, 'replace')) + logger.info( + 'File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode( + headphones.SYS_ENCODING, 'replace')) bitrates = [] song_list = [] @@ -89,9 +94,14 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, latest_subdirectory.append(subdirectory) if file_count == 0 and r.replace(dir, '') != '': - logger.info("[%s] Now scanning subdirectory %s" % (dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) - elif latest_subdirectory[file_count] != latest_subdirectory[file_count - 1] and file_count != 0: - logger.info("[%s] Now scanning subdirectory %s" % (dir.decode(headphones.SYS_ENCODING, 'replace'), subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) + logger.info("[%s] Now scanning subdirectory %s" % ( + dir.decode(headphones.SYS_ENCODING, 'replace'), + subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) + elif latest_subdirectory[file_count] != latest_subdirectory[ + file_count - 1] and file_count != 0: + logger.info("[%s] Now scanning subdirectory %s" % ( + dir.decode(headphones.SYS_ENCODING, 'replace'), + subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) song = os.path.join(r, files) @@ -102,10 +112,13 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, try: f = MediaFile(song) except (FileTypeError, UnreadableFileError): - logger.warning("Cannot read media file '%s', skipping. It may be corrupted or not a media file.", unicode_song_path) + logger.warning( + "Cannot read media file '%s', skipping. It may be corrupted or not a media file.", + unicode_song_path) continue except IOError: - logger.warning("Cannnot read media file '%s', skipping. Does the file exists?", unicode_song_path) + logger.warning("Cannnot read media file '%s', skipping. Does the file exists?", + unicode_song_path) continue # Grab the bitrates for the auto detect bit rate option @@ -131,45 +144,52 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, controlValueDict = {'Location': unicode_song_path} newValueDict = {'TrackID': f.mb_trackid, - #'ReleaseID' : f.mb_albumid, - 'ArtistName': f_artist, - 'AlbumTitle': f.album, - 'TrackNumber': f.track, - 'TrackLength': f.length, - 'Genre': f.genre, - 'Date': f.date, - 'TrackTitle': f.title, - 'BitRate': f.bitrate, - 'Format': f.format, - 'CleanName': CleanName - } + # 'ReleaseID' : f.mb_albumid, + 'ArtistName': f_artist, + 'AlbumTitle': f.album, + 'TrackNumber': f.track, + 'TrackLength': f.length, + 'Genre': f.genre, + 'Date': f.date, + 'TrackTitle': f.title, + 'BitRate': f.bitrate, + 'Format': f.format, + 'CleanName': CleanName + } - #song_list.append(song_dict) - check_exist_song = myDB.action("SELECT * FROM have WHERE Location=?", [unicode_song_path]).fetchone() - #Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. + # song_list.append(song_dict) + check_exist_song = myDB.action("SELECT * FROM have WHERE Location=?", + [unicode_song_path]).fetchone() + # Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. if not check_exist_song: - #This is a new track + # This is a new track if f_artist: new_artists.append(f_artist) myDB.upsert("have", newValueDict, controlValueDict) new_song_count += 1 else: - if check_exist_song['ArtistName'] != f_artist or check_exist_song['AlbumTitle'] != f.album or check_exist_song['TrackTitle'] != f.title: - #Important track metadata has been modified, need to run matcher again + if check_exist_song['ArtistName'] != f_artist or check_exist_song[ + 'AlbumTitle'] != f.album or check_exist_song['TrackTitle'] != f.title: + # Important track metadata has been modified, need to run matcher again if f_artist and f_artist != check_exist_song['ArtistName']: new_artists.append(f_artist) - elif f_artist and f_artist == check_exist_song['ArtistName'] and check_exist_song['Matched'] != "Ignored": + elif f_artist and f_artist == check_exist_song['ArtistName'] and \ + check_exist_song['Matched'] != "Ignored": new_artists.append(f_artist) else: continue newValueDict['Matched'] = None myDB.upsert("have", newValueDict, controlValueDict) - myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) - myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, unicode_song_path]) + myDB.action( + 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', + [None, None, None, unicode_song_path]) + myDB.action( + 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', + [None, None, None, unicode_song_path]) new_song_count += 1 else: - #This track information hasn't changed + # This track information hasn't changed if f_artist and check_exist_song['Matched'] != "Ignored": new_artists.append(f_artist) @@ -177,9 +197,13 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, # Now we start track matching logger.info("%s new/modified songs found and added to the database" % new_song_count) - song_list = myDB.action("SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]) - total_number_of_songs = myDB.action("SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]).fetchone()[0] - logger.info("Found " + str(total_number_of_songs) + " new/modified tracks in: '" + dir.decode(headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") + song_list = myDB.action("SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", + [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]) + total_number_of_songs = \ + myDB.action("SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", + [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]).fetchone()[0] + logger.info("Found " + str(total_number_of_songs) + " new/modified tracks in: '" + dir.decode( + headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) # When we insert into the database, the tracks with the most specific information will overwrite the more general matches @@ -205,22 +229,24 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, if completion_percentage % 10 == 0: logger.info("Track matching is " + str(completion_percentage) + "% complete") - #THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT - #EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, - #ARE NEVER FOUND, AND THE OTHER CLAUSES WERE NEVER HIT. FURTHERMORE, OTHER MATCHING FUNCTIONS IN THIS PROGRAM - #(IMPORTER.PY, MB.PY) SIMPLY DO A [ARTIST/ALBUM/TRACK] OR CLEANNAME MATCH, SO IT'S ALL CONSISTENT. + # THE "MORE-SPECIFIC" CLAUSES HERE HAVE ALL BEEN REMOVED. WHEN RUNNING A LIBRARY SCAN, THE ONLY CLAUSES THAT + # EVER GOT HIT WERE [ARTIST/ALBUM/TRACK] OR CLEANNAME. ARTISTID & RELEASEID ARE NEVER PASSED TO THIS FUNCTION, + # ARE NEVER FOUND, AND THE OTHER CLAUSES WERE NEVER HIT. FURTHERMORE, OTHER MATCHING FUNCTIONS IN THIS PROGRAM + # (IMPORTER.PY, MB.PY) SIMPLY DO A [ARTIST/ALBUM/TRACK] OR CLEANNAME MATCH, SO IT'S ALL CONSISTENT. if song['ArtistName'] and song['AlbumTitle'] and song['TrackTitle']: - track = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() + track = myDB.action( + 'SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from tracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', + [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() have_updated = False if track: controlValueDict = {'ArtistName': track['ArtistName'], - 'AlbumTitle': track['AlbumTitle'], - 'TrackTitle': track['TrackTitle']} + 'AlbumTitle': track['AlbumTitle'], + 'TrackTitle': track['TrackTitle']} newValueDict = {'Location': song['Location'], - 'BitRate': song['BitRate'], - 'Format': song['Format']} + 'BitRate': song['BitRate'], + 'Format': song['Format']} myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} @@ -228,12 +254,13 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True else: - track = myDB.action('SELECT CleanName, AlbumID from tracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() + track = myDB.action('SELECT CleanName, AlbumID from tracks WHERE CleanName LIKE ?', + [song['CleanName']]).fetchone() if track: controlValueDict = {'CleanName': track['CleanName']} newValueDict = {'Location': song['Location'], - 'BitRate': song['BitRate'], - 'Format': song['Format']} + 'BitRate': song['BitRate'], + 'Format': song['Format']} myDB.upsert("tracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} @@ -246,26 +273,30 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, myDB.upsert("have", newValueDict2, controlValueDict2) have_updated = True - alltrack = myDB.action('SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() + alltrack = myDB.action( + 'SELECT ArtistName, AlbumTitle, TrackTitle, AlbumID from alltracks WHERE ArtistName LIKE ? AND AlbumTitle LIKE ? AND TrackTitle LIKE ?', + [song['ArtistName'], song['AlbumTitle'], song['TrackTitle']]).fetchone() if alltrack: controlValueDict = {'ArtistName': alltrack['ArtistName'], - 'AlbumTitle': alltrack['AlbumTitle'], - 'TrackTitle': alltrack['TrackTitle']} + 'AlbumTitle': alltrack['AlbumTitle'], + 'TrackTitle': alltrack['TrackTitle']} newValueDict = {'Location': song['Location'], - 'BitRate': song['BitRate'], - 'Format': song['Format']} + 'BitRate': song['BitRate'], + 'Format': song['Format']} myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} newValueDict2 = {'Matched': alltrack['AlbumID']} myDB.upsert("have", newValueDict2, controlValueDict2) else: - alltrack = myDB.action('SELECT CleanName, AlbumID from alltracks WHERE CleanName LIKE ?', [song['CleanName']]).fetchone() + alltrack = myDB.action( + 'SELECT CleanName, AlbumID from alltracks WHERE CleanName LIKE ?', + [song['CleanName']]).fetchone() if alltrack: controlValueDict = {'CleanName': alltrack['CleanName']} newValueDict = {'Location': song['Location'], - 'BitRate': song['BitRate'], - 'Format': song['Format']} + 'BitRate': song['BitRate'], + 'Format': song['Format']} myDB.upsert("alltracks", newValueDict, controlValueDict) controlValueDict2 = {'Location': song['Location']} @@ -283,9 +314,10 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) - #######myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) + #######myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) - logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) + logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, + 'replace')) if not append or artistScan: logger.info('Updating scanned artist track counts') @@ -294,29 +326,32 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, unique_artists = {}.fromkeys(new_artists).keys() current_artists = myDB.select('SELECT ArtistName, ArtistID from artists') - #There was a bug where artists with special characters (-,') would show up in new artists. + # There was a bug where artists with special characters (-,') would show up in new artists. artist_list = [ x for x in unique_artists if helpers.cleanName(x).lower() not in [ helpers.cleanName(y[0]).lower() for y in current_artists + ] ] - ] artists_checked = [ x for x in unique_artists if helpers.cleanName(x).lower() in [ helpers.cleanName(y[0]).lower() for y in current_artists + ] ] - ] # Update track counts for artist in artists_checked: # Have tracks are selected from tracks table and not all tracks because of duplicates # We update the track count upon an album switch to compliment this havetracks = ( - len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistName like ? AND Location IS NOT NULL', [artist])) - + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [artist])) + len(myDB.select( + 'SELECT TrackTitle from tracks WHERE ArtistName like ? AND Location IS NOT NULL', + [artist])) + len(myDB.select( + 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', + [artist])) ) # Note: some people complain about having "artist have tracks" > # of tracks total in artist official releases # (can fix by getting rid of second len statement) @@ -330,7 +365,7 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, importer.artistlist_to_mbids(artist_list) else: logger.info('To add these artists, go to Manage->Manage New Artists') - #myDB.action('DELETE from newartists') + # myDB.action('DELETE from newartists') for artist in artist_list: myDB.action('INSERT OR IGNORE INTO newartists VALUES (?)', [artist]) @@ -341,7 +376,11 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, # If we're appending a new album to the database, update the artists total track counts logger.info('Updating artist track counts') - havetracks = len(myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', [ArtistID])) + len(myDB.select('SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', [ArtistName])) + havetracks = len( + myDB.select('SELECT TrackTitle from tracks WHERE ArtistID=? AND Location IS NOT NULL', + [ArtistID])) + len(myDB.select( + 'SELECT TrackTitle from have WHERE ArtistName like ? AND Matched = "Failed"', + [ArtistName])) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [havetracks, ArtistID]) if not append: @@ -352,18 +391,21 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, logger.info('Library scan complete') -#ADDED THIS SECTION TO MARK ALBUMS AS DOWNLOADED IF ARTISTS ARE ADDED EN MASSE BEFORE LIBRARY IS SCANNED + +# ADDED THIS SECTION TO MARK ALBUMS AS DOWNLOADED IF ARTISTS ARE ADDED EN MASSE BEFORE LIBRARY IS SCANNED def update_album_status(AlbumID=None): myDB = db.DBConnection() logger.info('Counting matched tracks to mark albums as skipped/downloaded') if AlbumID: - album_status_updater = myDB.action('SELECT AlbumID, AlbumTitle, Status from albums WHERE AlbumID=?', [AlbumID]) + album_status_updater = myDB.action( + 'SELECT AlbumID, AlbumTitle, Status from albums WHERE AlbumID=?', [AlbumID]) else: album_status_updater = myDB.action('SELECT AlbumID, AlbumTitle, Status from albums') for album in album_status_updater: - track_counter = myDB.action('SELECT Location from tracks where AlbumID=?', [album['AlbumID']]) + track_counter = myDB.action('SELECT Location from tracks where AlbumID=?', + [album['AlbumID']]) total_tracks = 0 have_tracks = 0 for track in track_counter: @@ -383,7 +425,7 @@ def update_album_status(AlbumID=None): # I think we can only automatically change Skipped->Downloaded when updating # There was a bug report where this was causing infinite downloads if the album was # recent, but matched to less than 80%. It would go Downloaded->Skipped->Wanted->Downloaded->Skipped->Wanted->etc.... - #else: + # else: # if album['Status'] == "Skipped" or album['Status'] == "Downloaded": # new_album_status = "Skipped" # else: diff --git a/headphones/lock.py b/headphones/lock.py index 1ec3f2c8..8d385738 100644 --- a/headphones/lock.py +++ b/headphones/lock.py @@ -2,11 +2,12 @@ Locking-related classes """ -import headphones.logger import time import threading import Queue +import headphones.logger + class TimedLock(object): """ diff --git a/headphones/logger.py b/headphones/logger.py index 84c08df3..b01e880f 100644 --- a/headphones/logger.py +++ b/headphones/logger.py @@ -13,24 +13,24 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -from headphones import helpers - -from logutils.queue import QueueHandler, QueueListener from logging import handlers - import multiprocessing import contextlib -import headphones import threading import traceback import logging import errno import sys + import os +from headphones import helpers +from logutils.queue import QueueHandler, QueueListener +import headphones + # These settings are for file logging only FILENAME = "headphones.log" -MAX_SIZE = 1000000 # 1 MB +MAX_SIZE = 1000000 # 1 MB MAX_FILES = 5 # Headphones logger @@ -39,6 +39,7 @@ logger = logging.getLogger("headphones") # Global queue for multiprocessing logging queue = None + class LogListHandler(logging.Handler): """ Log handler for Web UI. @@ -71,8 +72,8 @@ def listener(): # http://stackoverflow.com/questions/2009278 for more information. if e.errno == errno.EACCES: logger.warning("Multiprocess logging disabled, because " - "current user cannot map shared memory. You won't see any" \ - "logging generated by the worker processed.") + "current user cannot map shared memory. You won't see any" \ + "logging generated by the worker processed.") # Multiprocess logging may be disabled. if not queue: @@ -149,8 +150,10 @@ def initLogger(console=False, log_dir=False, verbose=False): if log_dir: filename = os.path.join(log_dir, FILENAME) - file_formatter = logging.Formatter('%(asctime)s - %(levelname)-7s :: %(threadName)s : %(message)s', '%d-%b-%Y %H:%M:%S') - file_handler = handlers.RotatingFileHandler(filename, maxBytes=MAX_SIZE, backupCount=MAX_FILES) + file_formatter = logging.Formatter( + '%(asctime)s - %(levelname)-7s :: %(threadName)s : %(message)s', '%d-%b-%Y %H:%M:%S') + file_handler = handlers.RotatingFileHandler(filename, maxBytes=MAX_SIZE, + backupCount=MAX_FILES) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(file_formatter) @@ -158,7 +161,8 @@ def initLogger(console=False, log_dir=False, verbose=False): # Setup console logger if console: - console_formatter = logging.Formatter('%(asctime)s - %(levelname)s :: %(threadName)s : %(message)s', '%d-%b-%Y %H:%M:%S') + console_formatter = logging.Formatter( + '%(asctime)s - %(levelname)s :: %(threadName)s : %(message)s', '%d-%b-%Y %H:%M:%S') console_handler = logging.StreamHandler() console_handler.setFormatter(console_formatter) console_handler.setLevel(logging.DEBUG) @@ -212,11 +216,13 @@ def initHooks(global_exceptions=True, thread_exceptions=True, pass_original=True raise except: excepthook(*sys.exc_info()) + self.run = new_run # Monkey patch the run() by monkey patching the __init__ method threading.Thread.__init__ = new_init + # Expose logger methods info = logger.info warn = logger.warn diff --git a/headphones/lyrics.py b/headphones/lyrics.py index 066b97c5..c0f45ed5 100644 --- a/headphones/lyrics.py +++ b/headphones/lyrics.py @@ -13,18 +13,17 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -import re import htmlentitydefs +import re from headphones import logger, request def getLyrics(artist, song): - params = {"artist": artist.encode('utf-8'), - "song": song.encode('utf-8'), - "fmt": 'xml' - } + "song": song.encode('utf-8'), + "fmt": 'xml' + } url = 'http://lyrics.wikia.com/api.php' data = request.request_minidom(url, params=params) @@ -46,10 +45,13 @@ def getLyrics(artist, song): logger.warn('Error fetching lyrics from: %s' % lyricsurl) return - m = re.compile('''
.*?
(.*?) %s', downloaded_track.decode(headphones.SYS_ENCODING, 'replace'), new_file_name.decode(headphones.SYS_ENCODING, 'replace')) + logger.debug('Renaming %s ---> %s', + downloaded_track.decode(headphones.SYS_ENCODING, 'replace'), + new_file_name.decode(headphones.SYS_ENCODING, 'replace')) try: os.rename(downloaded_track, new_file) except Exception as e: - logger.error('Error renaming file: %s. Error: %s', downloaded_track.decode(headphones.SYS_ENCODING, 'replace'), e) + logger.error('Error renaming file: %s. Error: %s', + downloaded_track.decode(headphones.SYS_ENCODING, 'replace'), e) continue def updateFilePermissions(albumpaths): - for folder in albumpaths: logger.info("Updating file permissions in %s", folder) for r, d, f in os.walk(folder): @@ -1053,7 +1152,8 @@ def updateFilePermissions(albumpaths): logger.error("Could not change permissions for file: %s", full_path) continue else: - logger.debug("Not changing file permissions, since it is disabled: %s", full_path.decode(headphones.SYS_ENCODING, 'replace')) + logger.debug("Not changing file permissions, since it is disabled: %s", + full_path.decode(headphones.SYS_ENCODING, 'replace')) def renameUnprocessedFolder(path, tag): @@ -1076,7 +1176,6 @@ def renameUnprocessedFolder(path, tag): def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_original_folder=False): - logger.info('Force checking download folder for completed downloads') ignored = 0 @@ -1089,9 +1188,11 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig if dir: download_dirs.append(dir.encode(headphones.SYS_ENCODING, 'replace')) if headphones.CONFIG.DOWNLOAD_DIR and not dir: - download_dirs.append(headphones.CONFIG.DOWNLOAD_DIR.encode(headphones.SYS_ENCODING, 'replace')) + download_dirs.append( + headphones.CONFIG.DOWNLOAD_DIR.encode(headphones.SYS_ENCODING, 'replace')) if headphones.CONFIG.DOWNLOAD_TORRENT_DIR and not dir: - download_dirs.append(headphones.CONFIG.DOWNLOAD_TORRENT_DIR.encode(headphones.SYS_ENCODING, 'replace')) + download_dirs.append( + headphones.CONFIG.DOWNLOAD_TORRENT_DIR.encode(headphones.SYS_ENCODING, 'replace')) # If DOWNLOAD_DIR and DOWNLOAD_TORRENT_DIR are the same, remove the duplicate to prevent us from trying to process the same folder twice. download_dirs = list(set(download_dirs)) @@ -1108,7 +1209,8 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig # Scan for subfolders subfolders = os.listdir(download_dir) ignored += helpers.path_filter_patterns(subfolders, - headphones.CONFIG.IGNORED_FOLDERS, root=download_dir) + headphones.CONFIG.IGNORED_FOLDERS, + root=download_dir) for folder in subfolders: path_to_folder = os.path.join(download_dir, folder) @@ -1125,7 +1227,7 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig if folders: logger.debug('Expanded post processing folders: %s', folders) logger.info('Found %d folders to process (%d ignored).', - len(folders), ignored) + len(folders), ignored) else: logger.info('Found no folders to process. Aborting.') return @@ -1143,15 +1245,23 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig # underscores -> dots (this might be hit or miss since it assumes all # spaces/underscores came from sab replacing values logger.debug('Attempting to find album in the snatched table') - snatched = myDB.action('SELECT AlbumID, Title, Kind, Status from snatched WHERE FolderName LIKE ?', [folder_basename]).fetchone() + snatched = myDB.action( + 'SELECT AlbumID, Title, Kind, Status from snatched WHERE FolderName LIKE ?', + [folder_basename]).fetchone() if snatched: - if headphones.CONFIG.KEEP_TORRENT_FILES and snatched['Kind'] == 'torrent' and snatched['Status'] == 'Processed': - logger.info('%s is a torrent folder being preserved for seeding and has already been processed. Skipping.', folder_basename) + if headphones.CONFIG.KEEP_TORRENT_FILES and snatched['Kind'] == 'torrent' and snatched[ + 'Status'] == 'Processed': + logger.info( + '%s is a torrent folder being preserved for seeding and has already been processed. Skipping.', + folder_basename) continue else: - logger.info('Found a match in the database: %s. Verifying to make sure it is the correct album', snatched['Title']) - verify(snatched['AlbumID'], folder, snatched['Kind'], keep_original_folder=keep_original_folder) + logger.info( + 'Found a match in the database: %s. Verifying to make sure it is the correct album', + snatched['Title']) + verify(snatched['AlbumID'], folder, snatched['Kind'], + keep_original_folder=keep_original_folder) continue # Attempt 2: strip release group id from filename @@ -1165,13 +1275,19 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig if rgid: rgid = possible_rgid - release = myDB.action('SELECT ArtistName, AlbumTitle, AlbumID from albums WHERE AlbumID=?', [rgid]).fetchone() + release = myDB.action( + 'SELECT ArtistName, AlbumTitle, AlbumID from albums WHERE AlbumID=?', + [rgid]).fetchone() if release: - logger.info('Found a match in the database: %s - %s. Verifying to make sure it is the correct album', release['ArtistName'], release['AlbumTitle']) - verify(release['AlbumID'], folder, forced=True, keep_original_folder=keep_original_folder) + logger.info( + 'Found a match in the database: %s - %s. Verifying to make sure it is the correct album', + release['ArtistName'], release['AlbumTitle']) + verify(release['AlbumID'], folder, forced=True, + keep_original_folder=keep_original_folder) continue else: - logger.info('Found a (possibly) valid Musicbrainz release group id in album folder name.') + logger.info( + 'Found a (possibly) valid Musicbrainz release group id in album folder name.') verify(rgid, folder, forced=True) continue @@ -1184,13 +1300,18 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig name = album = year = None if name and album: - release = myDB.action('SELECT AlbumID, ArtistName, AlbumTitle from albums WHERE ArtistName LIKE ? and AlbumTitle LIKE ?', [name, album]).fetchone() + release = myDB.action( + 'SELECT AlbumID, ArtistName, AlbumTitle from albums WHERE ArtistName LIKE ? and AlbumTitle LIKE ?', + [name, album]).fetchone() if release: - logger.info('Found a match in the database: %s - %s. Verifying to make sure it is the correct album', release['ArtistName'], release['AlbumTitle']) + logger.info( + 'Found a match in the database: %s - %s. Verifying to make sure it is the correct album', + release['ArtistName'], release['AlbumTitle']) verify(release['AlbumID'], folder, keep_original_folder=keep_original_folder) continue else: - logger.info('Querying MusicBrainz for the release group id for: %s - %s', name, album) + logger.info('Querying MusicBrainz for the release group id for: %s - %s', name, + album) try: rgid = mb.findAlbumID(helpers.latinToAscii(name), helpers.latinToAscii(album)) except: @@ -1219,13 +1340,18 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig name = album = None if name and album: - release = myDB.action('SELECT AlbumID, ArtistName, AlbumTitle from albums WHERE ArtistName LIKE ? and AlbumTitle LIKE ?', [name, album]).fetchone() + release = myDB.action( + 'SELECT AlbumID, ArtistName, AlbumTitle from albums WHERE ArtistName LIKE ? and AlbumTitle LIKE ?', + [name, album]).fetchone() if release: - logger.info('Found a match in the database: %s - %s. Verifying to make sure it is the correct album', release['ArtistName'], release['AlbumTitle']) + logger.info( + 'Found a match in the database: %s - %s. Verifying to make sure it is the correct album', + release['ArtistName'], release['AlbumTitle']) verify(release['AlbumID'], folder, keep_original_folder=keep_original_folder) continue else: - logger.info('Querying MusicBrainz for the release group id for: %s - %s', name, album) + logger.info('Querying MusicBrainz for the release group id for: %s - %s', name, + album) try: rgid = mb.findAlbumID(helpers.latinToAscii(name), helpers.latinToAscii(album)) except: @@ -1243,13 +1369,18 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig logger.debug('Attempt to extract album name by assuming it is the folder name') if '-' not in folder_basename: - release = myDB.action('SELECT AlbumID, ArtistName, AlbumTitle from albums WHERE AlbumTitle LIKE ?', [folder_basename]).fetchone() + release = myDB.action( + 'SELECT AlbumID, ArtistName, AlbumTitle from albums WHERE AlbumTitle LIKE ?', + [folder_basename]).fetchone() if release: - logger.info('Found a match in the database: %s - %s. Verifying to make sure it is the correct album', release['ArtistName'], release['AlbumTitle']) + logger.info( + 'Found a match in the database: %s - %s. Verifying to make sure it is the correct album', + release['ArtistName'], release['AlbumTitle']) verify(release['AlbumID'], folder, keep_original_folder=keep_original_folder) continue else: - logger.info('Querying MusicBrainz for the release group id for: %s', folder_basename) + logger.info('Querying MusicBrainz for the release group id for: %s', + folder_basename) try: rgid = mb.findAlbumID(album=helpers.latinToAscii(folder_basename)) except: @@ -1264,6 +1395,6 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig # Fail here logger.info("Couldn't parse '%s' into any valid format. If adding " \ - "albums from another source, they must be in an 'Artist - Album " \ - "[Year]' format, or end with the musicbrainz release group id.", - folder_basename) + "albums from another source, they must be in an 'Artist - Album " \ + "[Year]' format, or end with the musicbrainz release group id.", + folder_basename) diff --git a/headphones/request.py b/headphones/request.py index a3493f79..0fa843f7 100644 --- a/headphones/request.py +++ b/headphones/request.py @@ -13,16 +13,16 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -from headphones import logger - from xml.dom import minidom -from bs4 import BeautifulSoup +import collections +from bs4 import BeautifulSoup import requests +from headphones import logger import feedparser import headphones import headphones.lock -import collections + # Disable SSL certificate warnings. We have our own handling requests.packages.urllib3.disable_warnings() @@ -211,7 +211,7 @@ def server_message(response): # First attempt is to 'read' the response as HTML if response.headers.get("content-type") and \ - "text/html" in response.headers.get("content-type"): + "text/html" in response.headers.get("content-type"): try: soup = BeautifulSoup(response.content, "html5lib") except Exception: diff --git a/headphones/rutracker.py b/headphones/rutracker.py index f106e133..8395fbf3 100644 --- a/headphones/rutracker.py +++ b/headphones/rutracker.py @@ -1,19 +1,17 @@ #!/usr/bin/env python import urllib -import requests as requests -from urlparse import urlparse -from bs4 import BeautifulSoup - -import os import time -import re +from urlparse import urlparse +import re +import requests as requests +from bs4 import BeautifulSoup import headphones from headphones import logger -class Rutracker(object): +class Rutracker(object): def __init__(self): self.session = requests.session() self.timeout = 60 @@ -58,7 +56,8 @@ class Rutracker(object): self.loggedin = True logger.info("Successfully logged in to rutracker") else: - logger.error("Could not login to rutracker, credentials maybe incorrect, site is down or too many attempts. Try again later") + logger.error( + "Could not login to rutracker, credentials maybe incorrect, site is down or too many attempts. Try again later") self.loggedin = False return self.loggedin except Exception as e: @@ -111,7 +110,7 @@ class Rutracker(object): soup = BeautifulSoup(r.content, 'html5lib') # Debug - #logger.debug (soup.prettify()) + # logger.debug (soup.prettify()) # Check if still logged in if not self.still_logged_in(soup): @@ -130,7 +129,8 @@ class Rutracker(object): return None minimumseeders = int(headphones.CONFIG.NUMBEROFSEEDERS) - 1 - for item in zip(i.find_all(class_='hl-tags'),i.find_all(class_='dl-stub'),i.find_all(class_='seedmed')): + for item in zip(i.find_all(class_='hl-tags'), i.find_all(class_='dl-stub'), + i.find_all(class_='seedmed')): title = item[0].get_text() url = item[1].get('href') size_formatted = item[1].get_text()[:-2] @@ -149,12 +149,15 @@ class Rutracker(object): if size < self.maxsize and minimumseeders < int(seeds): logger.info('Found %s. Size: %s' % (title, size_formatted)) - #Torrent topic page - torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t'] + # Torrent topic page + torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])[ + 't'] topicurl = 'http://rutracker.org/forum/viewtopic.php?t=' + torrent_id rulist.append((title, size, topicurl, 'rutracker.org', 'torrent', True)) else: - logger.info("%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)" % (title, size, int(seeds))) + logger.info( + "%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)" % ( + title, size, int(seeds))) if not rulist: logger.info("No valid results found from rutracker") @@ -165,7 +168,6 @@ class Rutracker(object): logger.error("An unknown error occurred in the rutracker parser: %s" % e) return None - def get_torrent_data(self, url): """ return the .torrent data @@ -176,14 +178,14 @@ class Rutracker(object): cookie = {'bb_dl': torrent_id} try: headers = {'Referer': url} - r = self.session.post(url=downloadurl, cookies=cookie, headers=headers, timeout=self.timeout) + r = self.session.post(url=downloadurl, cookies=cookie, headers=headers, + timeout=self.timeout) return r.content except Exception as e: logger.error('Error getting torrent: %s', e) return False - - #TODO get this working in utorrent.py + # TODO get this working in utorrent.py def utorrent_add_file(self, data): host = headphones.CONFIG.UTORRENT_HOST @@ -197,7 +199,8 @@ class Rutracker(object): base_url = host url = base_url + '/gui/' - self.session.auth = (headphones.CONFIG.UTORRENT_USERNAME, headphones.CONFIG.UTORRENT_PASSWORD) + self.session.auth = ( + headphones.CONFIG.UTORRENT_USERNAME, headphones.CONFIG.UTORRENT_PASSWORD) try: r = self.session.get(url + 'token.html') @@ -221,4 +224,3 @@ class Rutracker(object): self.session.post(url, params={'action': 'add-file'}, files=files) except Exception as e: logger.exception('Error adding file to utorrent %s', e) - diff --git a/headphones/sab.py b/headphones/sab.py index 79a67a0e..fa7c3309 100644 --- a/headphones/sab.py +++ b/headphones/sab.py @@ -13,26 +13,24 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -##################################### -## Stolen from Sick-Beard's sab.py ## -##################################### +################################### +# Stolen from Sick-Beard's sab.py # +################################### -import MultipartPostHandler -import headphones import cookielib -import httplib +import headphones from headphones.common import USER_AGENT from headphones import logger, helpers, request def sab_api_call(request_type=None, params={}, **kwargs): - if not headphones.CONFIG.SAB_HOST.startswith('http'): headphones.CONFIG.SAB_HOST = 'http://' + headphones.CONFIG.SAB_HOST if headphones.CONFIG.SAB_HOST.endswith('/'): - headphones.CONFIG.SAB_HOST = headphones.CONFIG.SAB_HOST[0:len(headphones.CONFIG.SAB_HOST) - 1] + headphones.CONFIG.SAB_HOST = headphones.CONFIG.SAB_HOST[ + 0:len(headphones.CONFIG.SAB_HOST) - 1] url = headphones.CONFIG.SAB_HOST + "/" + "api?" @@ -42,11 +40,11 @@ def sab_api_call(request_type=None, params={}, **kwargs): params['ma_password'] = headphones.CONFIG.SAB_PASSWORD if headphones.CONFIG.SAB_APIKEY: params['apikey'] = headphones.CONFIG.SAB_APIKEY - - if request_type=='send_nzb' and headphones.CONFIG.SAB_CATEGORY: + + if request_type == 'send_nzb' and headphones.CONFIG.SAB_CATEGORY: params['cat'] = headphones.CONFIG.SAB_CATEGORY - params['output']='json' + params['output'] = 'json' response = request.request_json(url, params=params, **kwargs) @@ -57,8 +55,8 @@ def sab_api_call(request_type=None, params={}, **kwargs): logger.debug("Successfully connected to SABnzbd on url: %s" % headphones.CONFIG.SAB_HOST) return response -def sendNZB(nzb): +def sendNZB(nzb): params = {} # if it's a normal result we just pass SAB the URL if nzb.resultType == "nzb": @@ -87,7 +85,8 @@ def sendNZB(nzb): response = sab_api_call('send_nzb', params=params) elif nzb.resultType == "nzbdata": cookies = cookielib.CookieJar() - response = sab_api_call('send_nzb', params=params, method="post", files=files, cookies=cookies, headers=headers) + response = sab_api_call('send_nzb', params=params, method="post", files=files, + cookies=cookies, headers=headers) if not response: logger.info(u"No data returned from SABnzbd, NZB not sent") @@ -102,15 +101,15 @@ def sendNZB(nzb): def checkConfig(): - params = {'mode': 'get_config', - 'section': 'misc', - } + 'section': 'misc', + } config_options = sab_api_call(params=params) - + if not config_options: - logger.warn("Unable to read SABnzbd config file - cannot determine renaming options (might affect auto & forced post processing)") + logger.warn( + "Unable to read SABnzbd config file - cannot determine renaming options (might affect auto & forced post processing)") return (0, 0) replace_spaces = config_options['config']['misc']['replace_spaces'] diff --git a/headphones/searcher.py b/headphones/searcher.py index 93451107..aa07442b 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -15,35 +15,32 @@ # NZBGet support added by CurlyMo as a part of XBian - XBMC on the Raspberry Pi -import urllib -import urlparse -from pygazelle import api as gazelleapi -from pygazelle import encoding as gazelleencoding -from pygazelle import format as gazelleformat from base64 import b16encode, b32decode from hashlib import sha1 - -import os -import re import string -import shutil import random import urllib import datetime -import headphones import subprocess import unicodedata +import urlparse +import os +import re +from pygazelle import api as gazelleapi +from pygazelle import encoding as gazelleencoding +from pygazelle import format as gazelleformat +import headphones from headphones.common import USER_AGENT from headphones import logger, db, helpers, classes, sab, nzbget, request from headphones import utorrent, transmission, notifiers, rutracker - from bencode import bencode, bdecode + # Magnet to torrent services, for Black hole. Stolen from CouchPotato. TORRENT_TO_MAGNET_SERVICES = [ - #'https://zoink.it/torrent/%s.torrent', - #'http://torrage.com/torrent/%s.torrent', + # 'https://zoink.it/torrent/%s.torrent', + # 'http://torrage.com/torrent/%s.torrent', 'https://torcache.net/torrent/%s.torrent', ] @@ -123,10 +120,10 @@ def read_torrent_name(torrent_file, default_name=None): except KeyError: if default_name: logger.warning("Couldn't get name from torrent file: %s. " \ - "Defaulting to '%s'", e, default_name) + "Defaulting to '%s'", e, default_name) else: logger.warning("Couldn't get name from torrent file: %s. No " \ - "default given", e) + "default given", e) # Return default return default_name @@ -147,7 +144,7 @@ def calculate_torrent_hash(link, data=None): torrent_hash = sha1(bencode(info)).hexdigest() else: raise ValueError("Cannot calculate torrent hash without magnet link " \ - "or data") + "or data") return torrent_hash.upper() @@ -173,7 +170,7 @@ def get_seed_ratio(provider): elif provider == 'Mininova': seed_ratio = headphones.CONFIG.MININOVA_RATIO elif provider == 'Strike': - seed_ratio = headphones.CONFIG.STRIKE_RATIO + seed_ratio = headphones.CONFIG.STRIKE_RATIO else: seed_ratio = None @@ -187,13 +184,13 @@ def get_seed_ratio(provider): def searchforalbum(albumid=None, new=False, losslessOnly=False, - choose_specific_download=False): - + choose_specific_download=False): logger.info('Searching for wanted albums') myDB = db.DBConnection() if not albumid: - results = myDB.select('SELECT * from albums WHERE Status="Wanted" OR Status="Wanted Lossless"') + results = myDB.select( + 'SELECT * from albums WHERE Status="Wanted" OR Status="Wanted Lossless"') for album in results: @@ -205,11 +202,13 @@ def searchforalbum(albumid=None, new=False, losslessOnly=False, try: release_date = datetime.datetime.strptime(album['ReleaseDate'], "%Y-%m-%d") except: - logger.warn("No valid date for: %s. Skipping automatic search" % album['AlbumTitle']) + logger.warn( + "No valid date for: %s. Skipping automatic search" % album['AlbumTitle']) continue if release_date > datetime.datetime.today(): - logger.info("Skipping: %s. Waiting for release date of: %s" % (album['AlbumTitle'], album['ReleaseDate'])) + logger.info("Skipping: %s. Waiting for release date of: %s" % ( + album['AlbumTitle'], album['ReleaseDate'])) continue new = True @@ -217,7 +216,8 @@ def searchforalbum(albumid=None, new=False, losslessOnly=False, if album['Status'] == "Wanted Lossless": losslessOnly = True - logger.info('Searching for "%s - %s" since it is marked as wanted' % (album['ArtistName'], album['AlbumTitle'])) + logger.info('Searching for "%s - %s" since it is marked as wanted' % ( + album['ArtistName'], album['AlbumTitle'])) do_sorted_search(album, new, losslessOnly) elif albumid and choose_specific_download: @@ -228,21 +228,25 @@ def searchforalbum(albumid=None, new=False, losslessOnly=False, else: album = myDB.action('SELECT * from albums WHERE AlbumID=?', [albumid]).fetchone() - logger.info('Searching for "%s - %s" since it was marked as wanted' % (album['ArtistName'], album['AlbumTitle'])) + logger.info('Searching for "%s - %s" since it was marked as wanted' % ( + album['ArtistName'], album['AlbumTitle'])) do_sorted_search(album, new, losslessOnly) logger.info('Search for wanted albums complete') def do_sorted_search(album, new, losslessOnly, choose_specific_download=False): - - NZB_PROVIDERS = (headphones.CONFIG.HEADPHONES_INDEXER or headphones.CONFIG.NEWZNAB or headphones.CONFIG.NZBSORG or headphones.CONFIG.OMGWTFNZBS) - NZB_DOWNLOADERS = (headphones.CONFIG.SAB_HOST or headphones.CONFIG.BLACKHOLE_DIR or headphones.CONFIG.NZBGET_HOST) - TORRENT_PROVIDERS = (headphones.CONFIG.TORZNAB or headphones.CONFIG.KAT or headphones.CONFIG.PIRATEBAY or headphones.CONFIG.OLDPIRATEBAY or headphones.CONFIG.MININOVA or headphones.CONFIG.WAFFLES or headphones.CONFIG.RUTRACKER or headphones.CONFIG.WHATCD or headphones.CONFIG.STRIKE) + NZB_PROVIDERS = ( + headphones.CONFIG.HEADPHONES_INDEXER or headphones.CONFIG.NEWZNAB or headphones.CONFIG.NZBSORG or headphones.CONFIG.OMGWTFNZBS) + NZB_DOWNLOADERS = ( + headphones.CONFIG.SAB_HOST or headphones.CONFIG.BLACKHOLE_DIR or headphones.CONFIG.NZBGET_HOST) + TORRENT_PROVIDERS = ( + headphones.CONFIG.TORZNAB or headphones.CONFIG.KAT or headphones.CONFIG.PIRATEBAY or headphones.CONFIG.OLDPIRATEBAY or headphones.CONFIG.MININOVA or headphones.CONFIG.WAFFLES or headphones.CONFIG.RUTRACKER or headphones.CONFIG.WHATCD or headphones.CONFIG.STRIKE) results = [] myDB = db.DBConnection() - albumlength = myDB.select('SELECT sum(TrackDuration) from tracks WHERE AlbumID=?', [album['AlbumID']])[0][0] + albumlength = \ + myDB.select('SELECT sum(TrackDuration) from tracks WHERE AlbumID=?', [album['AlbumID']])[0][0] if headphones.CONFIG.PREFER_TORRENTS == 0 and not choose_specific_download: @@ -269,7 +273,8 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False): nzb_results = searchNZB(album, new, losslessOnly, albumlength, choose_specific_download) if TORRENT_PROVIDERS: - torrent_results = searchTorrent(album, new, losslessOnly, albumlength, choose_specific_download) + torrent_results = searchTorrent(album, new, losslessOnly, albumlength, + choose_specific_download) if not nzb_results: nzb_results = [] @@ -283,7 +288,7 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False): return results # Filter all results that do not comply - results = [ result for result in results if result[5] ] + results = [result for result in results if result[5]] # Sort the remaining results sorted_search_results = sort_search_results(results, album, new, albumlength) @@ -305,14 +310,14 @@ def removeDisallowedFilenameChars(filename): def more_filtering(results, album, albumlength, new): - low_size_limit = None high_size_limit = None allow_lossless = False myDB = db.DBConnection() # Lossless - ignore results if target size outside bitrate range - if headphones.CONFIG.PREFERRED_QUALITY == 3 and albumlength and (headphones.CONFIG.LOSSLESS_BITRATE_FROM or headphones.CONFIG.LOSSLESS_BITRATE_TO): + if headphones.CONFIG.PREFERRED_QUALITY == 3 and albumlength and ( + headphones.CONFIG.LOSSLESS_BITRATE_FROM or headphones.CONFIG.LOSSLESS_BITRATE_TO): if headphones.CONFIG.LOSSLESS_BITRATE_FROM: low_size_limit = albumlength / 1000 * int(headphones.CONFIG.LOSSLESS_BITRATE_FROM) * 128 if headphones.CONFIG.LOSSLESS_BITRATE_TO: @@ -325,9 +330,11 @@ def more_filtering(results, album, albumlength, new): targetsize = albumlength / 1000 * int(headphones.CONFIG.PREFERRED_BITRATE) * 128 logger.info('Target size: %s' % helpers.bytes_to_mb(targetsize)) if headphones.CONFIG.PREFERRED_BITRATE_LOW_BUFFER: - low_size_limit = targetsize * int(headphones.CONFIG.PREFERRED_BITRATE_LOW_BUFFER) / 100 + low_size_limit = targetsize * int( + headphones.CONFIG.PREFERRED_BITRATE_LOW_BUFFER) / 100 if headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER: - high_size_limit = targetsize * int(headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER) / 100 + high_size_limit = targetsize * int( + headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER) / 100 if headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS: allow_lossless = True @@ -335,22 +342,18 @@ def more_filtering(results, album, albumlength, new): for result in results: - normalizedAlbumArtist = removeDisallowedFilenameChars(album['ArtistName']) - normalizedAlbumTitle = removeDisallowedFilenameChars(album['AlbumTitle']) - normalizedResultTitle = removeDisallowedFilenameChars(result[0]) - artistTitleCount = normalizedResultTitle.count(normalizedAlbumArtist) - - # WHAT DOES THIS DO? - #if normalizedAlbumArtist in normalizedAlbumTitle and artistTitleCount < 2: - # logger.info("Removing %s from %s" % (result[0], result[3])) - # continue - if low_size_limit and (int(result[1]) < low_size_limit): - logger.info("%s from %s is too small for this album - not considering it. (Size: %s, Minsize: %s)", result[0], result[3], helpers.bytes_to_mb(result[1]), helpers.bytes_to_mb(low_size_limit)) + logger.info( + "%s from %s is too small for this album - not considering it. (Size: %s, Minsize: %s)", + result[0], result[3], helpers.bytes_to_mb(result[1]), + helpers.bytes_to_mb(low_size_limit)) continue if high_size_limit and (int(result[1]) > high_size_limit): - logger.info("%s from %s is too large for this album - not considering it. (Size: %s, Maxsize: %s)", result[0], result[3], helpers.bytes_to_mb(result[1]), helpers.bytes_to_mb(high_size_limit)) + logger.info( + "%s from %s is too large for this album - not considering it. (Size: %s, Maxsize: %s)", + result[0], result[3], helpers.bytes_to_mb(result[1]), + helpers.bytes_to_mb(high_size_limit)) # Keep lossless results if there are no good lossy matches if not (allow_lossless and 'flac' in result[0].lower()): @@ -360,7 +363,8 @@ def more_filtering(results, album, albumlength, new): alreadydownloaded = myDB.select('SELECT * from snatched WHERE URL=?', [result[2]]) if len(alreadydownloaded): - logger.info('%s has already been downloaded from %s. Skipping.' % (result[0], result[3])) + logger.info( + '%s has already been downloaded from %s. Skipping.' % (result[0], result[3])) continue newlist.append(result) @@ -371,9 +375,9 @@ def more_filtering(results, album, albumlength, new): def sort_search_results(resultlist, album, new, albumlength): - if new and not len(resultlist): - logger.info('No more results found for: %s - %s' % (album['ArtistName'], album['AlbumTitle'])) + logger.info( + 'No more results found for: %s - %s' % (album['ArtistName'], album['AlbumTitle'])) return None # Add a priority if it has any of the preferred words @@ -387,7 +391,8 @@ def sort_search_results(resultlist, album, new, albumlength): if any(word.lower() in result[0].lower() for word in preferred_words): priority = 1 # add a search provider priority (weighted based on position) - i = next((i for i, word in enumerate(preferred_words) if word in result[3].lower()), None) + i = next((i for i, word in enumerate(preferred_words) if word in result[3].lower()), + None) if i is not None: priority += round((len(preferred_words) - i) / float(len(preferred_words)), 2) @@ -401,8 +406,10 @@ def sort_search_results(resultlist, album, new, albumlength): targetsize = albumlength / 1000 * int(headphones.CONFIG.PREFERRED_BITRATE) * 128 if not targetsize: - logger.info('No track information for %s - %s. Defaulting to highest quality' % (album['ArtistName'], album['AlbumTitle'])) - finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True) + logger.info('No track information for %s - %s. Defaulting to highest quality' % ( + album['ArtistName'], album['AlbumTitle'])) + finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), + reverse=True) else: newlist = [] @@ -412,36 +419,43 @@ def sort_search_results(resultlist, album, new, albumlength): # Add lossless results to the "flac list" which we can use if there are no good lossy matches if 'flac' in result[0].lower(): - flac_list.append((result[0], result[1], result[2], result[3], result[4], result[5])) + flac_list.append( + (result[0], result[1], result[2], result[3], result[4], result[5])) continue delta = abs(targetsize - int(result[1])) - newlist.append((result[0], result[1], result[2], result[3], result[4], result[5], delta)) + newlist.append( + (result[0], result[1], result[2], result[3], result[4], result[5], delta)) finallist = sorted(newlist, key=lambda title: (-title[5], title[6])) - if not len(finallist) and len(flac_list) and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS: - logger.info("Since there were no appropriate lossy matches (and at least one lossless match), going to use lossless instead") - finallist = sorted(flac_list, key=lambda title: (title[5], int(title[1])), reverse=True) + if not len(finallist) and len( + flac_list) and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS: + logger.info( + "Since there were no appropriate lossy matches (and at least one lossless match), going to use lossless instead") + finallist = sorted(flac_list, key=lambda title: (title[5], int(title[1])), + reverse=True) except Exception: logger.exception('Unhandled exception') - logger.info('No track information for %s - %s. Defaulting to highest quality', album['ArtistName'], album['AlbumTitle']) + logger.info('No track information for %s - %s. Defaulting to highest quality', + album['ArtistName'], album['AlbumTitle']) - finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True) + finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), + reverse=True) else: finallist = sorted(resultlist, key=lambda title: (title[5], int(title[1])), reverse=True) if not len(finallist): - logger.info('No appropriate matches found for %s - %s', album['ArtistName'], album['AlbumTitle']) + logger.info('No appropriate matches found for %s - %s', album['ArtistName'], + album['AlbumTitle']) return None return finallist def get_year_from_release_date(release_date): - try: year = release_date[:4] except TypeError: @@ -450,11 +464,13 @@ def get_year_from_release_date(release_date): return year -def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_specific_download=False): +def searchNZB(album, new=False, losslessOnly=False, albumlength=None, + choose_specific_download=False): reldate = album['ReleaseDate'] year = get_year_from_release_date(reldate) - dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', '.': '', ':': ''} + dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', + '*': '', '.': '', ':': ''} cleanalbum = helpers.latinToAscii(helpers.replace_all(album['AlbumTitle'], dic)).strip() cleanartist = helpers.latinToAscii(helpers.replace_all(album['ArtistName'], dic)).strip() @@ -468,7 +484,8 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_spe # FLAC usually doesn't have a year for some reason so leave it out. # Various Artist albums might be listed as VA, so I'll leave that out too # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums - if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len(album['AlbumTitle']) < 4: + if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len( + album['AlbumTitle']) < 4: term = cleanartist + ' ' + cleanalbum + ' ' + year elif album['ArtistName'] == 'Various Artists': term = cleanalbum + ' ' + year @@ -542,7 +559,8 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_spe newznab_hosts = [] if headphones.CONFIG.NEWZNAB_HOST and headphones.CONFIG.NEWZNAB_ENABLED: - newznab_hosts.append((headphones.CONFIG.NEWZNAB_HOST, headphones.CONFIG.NEWZNAB_APIKEY, headphones.CONFIG.NEWZNAB_ENABLED)) + newznab_hosts.append((headphones.CONFIG.NEWZNAB_HOST, headphones.CONFIG.NEWZNAB_APIKEY, + headphones.CONFIG.NEWZNAB_ENABLED)) for newznab_host in headphones.CONFIG.get_extra_newznabs(): if newznab_host[2] == '1' or newznab_host[2] == 1: @@ -575,7 +593,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_spe categories = categories + ",4050" # Request results - logger.info('Parsing results from %s using search term: %s' % (newznab_host[0],term)) + logger.info('Parsing results from %s using search term: %s' % (newznab_host[0], term)) headers = {'User-Agent': USER_AGENT} params = { @@ -602,13 +620,15 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_spe title = item.title size = int(item.links[1]['length']) if all(word.lower() in title.lower() for word in term.split()): - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + logger.info( + 'Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) resultlist.append((title, size, url, provider, 'nzb', True)) else: logger.info('Skipping %s, not all search term words found' % title) except Exception as e: - logger.exception("An unknown error occurred trying to parse the feed: %s" % e) + logger.exception( + "An unknown error occurred trying to parse the feed: %s" % e) if headphones.CONFIG.NZBSORG: provider = "nzbsorg" @@ -663,7 +683,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_spe elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless: categories = "22,7" else: - categories = "7" + categories = "7" if album['Type'] == 'Other': categories = "29" @@ -708,7 +728,8 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_spe # # Also will filter flac & remix albums if not specifically looking for it # This code also checks the ignored words and required words - results = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)] + results = [result for result in resultlist if + verifyresult(result[0], artistterm, term, losslessOnly)] # Additional filtering for size etc if results and not choose_specific_download: @@ -718,8 +739,8 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, choose_spe def send_to_downloader(data, bestqual, album): - - logger.info(u'Found best result from %s: %s - %s', bestqual[3], bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1])) + logger.info(u'Found best result from %s: %s - %s', bestqual[3], bestqual[2], + bestqual[0], helpers.bytes_to_mb(bestqual[1])) # Get rid of any dodgy chars here so we can prevent sab from renaming our downloads kind = bestqual[4] seed_ratio = None @@ -768,7 +789,10 @@ def send_to_downloader(data, bestqual, album): logger.error('Couldn\'t write NZB file: %s', e) return else: - folder_name = '%s - %s [%s]' % (helpers.latinToAscii(album['ArtistName']).encode('UTF-8').replace('/', '_'), helpers.latinToAscii(album['AlbumTitle']).encode('UTF-8').replace('/', '_'), get_year_from_release_date(album['ReleaseDate'])) + folder_name = '%s - %s [%s]' % ( + helpers.latinToAscii(album['ArtistName']).encode('UTF-8').replace('/', '_'), + helpers.latinToAscii(album['AlbumTitle']).encode('UTF-8').replace('/', '_'), + get_year_from_release_date(album['ReleaseDate'])) # Blackhole if headphones.CONFIG.TORRENT_DOWNLOADER == 0: @@ -783,9 +807,11 @@ def send_to_downloader(data, bestqual, album): if headphones.SYS_PLATFORM == 'win32': os.startfile(bestqual[2]) elif headphones.SYS_PLATFORM == 'darwin': - subprocess.Popen(["open", bestqual[2]], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + subprocess.Popen(["open", bestqual[2]], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) else: - subprocess.Popen(["xdg-open", bestqual[2]], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + subprocess.Popen(["xdg-open", bestqual[2]], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) # Gonna just take a guess at this..... Is there a better way to find this out? folder_name = bestqual[0] @@ -810,20 +836,20 @@ def send_to_downloader(data, bestqual, album): return # Extract folder name from torrent folder_name = read_torrent_name(download_path, - bestqual[0]) + bestqual[0]) # Break for loop break else: # No service succeeded logger.warning("Unable to convert magnet with hash " \ - "'%s' into a torrent file.", torrent_hash) + "'%s' into a torrent file.", torrent_hash) return else: logger.error("Cannot save magnet link in blackhole. " \ - "Please switch your torrent downloader to " \ - "Transmission or uTorrent, or allow Headphones " \ - "to open or convert magnet links") + "Please switch your torrent downloader to " \ + "Transmission or uTorrent, or allow Headphones " \ + "to open or convert magnet links") return else: @@ -860,7 +886,7 @@ def send_to_downloader(data, bestqual, album): if seed_ratio is not None: transmission.setSeedRatio(torrentid, seed_ratio) - else:# if headphones.CONFIG.TORRENT_DOWNLOADER == 2: + else: # if headphones.CONFIG.TORRENT_DOWNLOADER == 2: logger.info("Sending torrent to uTorrent") # Add torrent @@ -894,11 +920,16 @@ def send_to_downloader(data, bestqual, album): myDB = db.DBConnection() myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [album['AlbumID']]) - myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', [album['AlbumID'], bestqual[0], bestqual[1], bestqual[2], "Snatched", folder_name, kind]) + myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', + [album['AlbumID'], bestqual[0], bestqual[1], bestqual[2], "Snatched", folder_name, + kind]) # Store the torrent id so we can check later if it's finished seeding and can be removed if seed_ratio is not None and seed_ratio != 0 and torrentid: - myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', [album['AlbumID'], bestqual[0], bestqual[1], bestqual[2], "Seed_Snatched", torrentid, kind]) + myDB.action( + 'INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?, ?)', + [album['AlbumID'], bestqual[0], bestqual[1], bestqual[2], "Seed_Snatched", torrentid, + kind]) # notify artist = album[1] @@ -959,11 +990,11 @@ def send_to_downloader(data, bestqual, album): message = 'Snatched from ' + provider + '. ' + name email.notify(title, message) -def verifyresult(title, artistterm, term, lossless): +def verifyresult(title, artistterm, term, lossless): title = re.sub('[\.\-\/\_]', ' ', title) - #if artistterm != 'Various Artists': + # if artistterm != 'Various Artists': # # if not re.search('^' + re.escape(artistterm), title, re.IGNORECASE): # #logger.info("Removed from results: " + title + " (artist not at string start).") @@ -975,22 +1006,28 @@ def verifyresult(title, artistterm, term, lossless): # logger.info("Removed from results: " + title + " (pre substring result).") # return False - #another attempt to weed out substrings. We don't want "Vol III" when we were looking for "Vol II" + # another attempt to weed out substrings. We don't want "Vol III" when we were looking for "Vol II" # Filter out remix search results (if we're not looking for it) if 'remix' not in term.lower() and 'remix' in title.lower(): - logger.info("Removed %s from results because it's a remix album and we're not looking for a remix album right now.", title) + logger.info( + "Removed %s from results because it's a remix album and we're not looking for a remix album right now.", + title) return False # Filter out FLAC if we're not specifically looking for it - if headphones.CONFIG.PREFERRED_QUALITY == (0 or '0') and 'flac' in title.lower() and not lossless: - logger.info("Removed %s from results because it's a lossless album and we're not looking for a lossless album right now.", title) + if headphones.CONFIG.PREFERRED_QUALITY == ( + 0 or '0') and 'flac' in title.lower() and not lossless: + logger.info( + "Removed %s from results because it's a lossless album and we're not looking for a lossless album right now.", + title) return False if headphones.CONFIG.IGNORED_WORDS: for each_word in helpers.split_string(headphones.CONFIG.IGNORED_WORDS): if each_word.lower() in title.lower(): - logger.info("Removed '%s' from results because it contains ignored word: '%s'", title, each_word) + logger.info("Removed '%s' from results because it contains ignored word: '%s'", + title, each_word) return False if headphones.CONFIG.REQUIRED_WORDS: @@ -1000,17 +1037,22 @@ def verifyresult(title, artistterm, term, lossless): if any(word.lower() in title.lower() for word in or_words): continue else: - logger.info("Removed '%s' from results because it doesn't contain any of the required words in: '%s'", title, str(or_words)) + logger.info( + "Removed '%s' from results because it doesn't contain any of the required words in: '%s'", + title, str(or_words)) return False if each_word.lower() not in title.lower(): - logger.info("Removed '%s' from results because it doesn't contain required word: '%s'", title, each_word) + logger.info( + "Removed '%s' from results because it doesn't contain required word: '%s'", + title, each_word) return False if headphones.CONFIG.IGNORE_CLEAN_RELEASES: - for each_word in ['clean','edited','censored']: + for each_word in ['clean', 'edited', 'censored']: logger.debug("Checking if '%s' is in search result: '%s'", each_word, title) if each_word.lower() in title.lower() and each_word.lower() not in term.lower(): - logger.info("Removed '%s' from results because it contains clean album word: '%s'", title, each_word) + logger.info("Removed '%s' from results because it contains clean album word: '%s'", + title, each_word) return False tokens = re.split('\W', term, re.IGNORECASE | re.UNICODE) @@ -1022,27 +1064,31 @@ def verifyresult(title, artistterm, term, lossless): continue if not re.search('(?:\W|^)+' + token + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE): cleantoken = ''.join(c for c in token if c not in string.punctuation) - if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE): + if not not re.search('(?:\W|^)+' + cleantoken + '(?:\W|$)+', title, + re.IGNORECASE | re.UNICODE): dic = {'!': 'i', '$': 's'} dumbtoken = helpers.replace_all(token, dic) - if not not re.search('(?:\W|^)+' + dumbtoken + '(?:\W|$)+', title, re.IGNORECASE | re.UNICODE): - logger.info("Removed from results: %s (missing tokens: %s and %s)", title, token, cleantoken) + if not not re.search('(?:\W|^)+' + dumbtoken + '(?:\W|$)+', title, + re.IGNORECASE | re.UNICODE): + logger.info("Removed from results: %s (missing tokens: %s and %s)", title, + token, cleantoken) return False return True -def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose_specific_download=False): +def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, + choose_specific_download=False): global gazelle # persistent what.cd api object to reduce number of login attempts - global ruobj # and rutracker + global ruobj # and rutracker - albumid = album['AlbumID'] reldate = album['ReleaseDate'] year = get_year_from_release_date(reldate) # MERGE THIS WITH THE TERM CLEANUP FROM searchNZB - dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', '*': ''} + dic = {'...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': ' ', + '*': ''} semi_cleanalbum = helpers.replace_all(album['AlbumTitle'], dic) cleanalbum = helpers.latinToAscii(semi_cleanalbum) @@ -1059,7 +1105,8 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose # FLAC usually doesn't have a year for some reason so I'll leave it out # Various Artist albums might be listed as VA, so I'll leave that out too # Only use the year if the term could return a bunch of different albums, i.e. self-titled albums - if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len(album['AlbumTitle']) < 4: + if album['ArtistName'] in album['AlbumTitle'] or len(album['ArtistName']) < 4 or len( + album['AlbumTitle']) < 4: term = cleanartist + ' ' + cleanalbum + ' ' + year elif album['ArtistName'] == 'Various Artists': term = cleanalbum + ' ' + year @@ -1103,7 +1150,8 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose torznab_hosts = [] if headphones.CONFIG.TORZNAB_HOST and headphones.CONFIG.TORZNAB_ENABLED: - torznab_hosts.append((headphones.CONFIG.TORZNAB_HOST, headphones.CONFIG.TORZNAB_APIKEY, headphones.CONFIG.TORZNAB_ENABLED)) + torznab_hosts.append((headphones.CONFIG.TORZNAB_HOST, headphones.CONFIG.TORZNAB_APIKEY, + headphones.CONFIG.TORZNAB_ENABLED)) for torznab_host in headphones.CONFIG.get_extra_torznabs(): if torznab_host[2] == '1' or torznab_host[2] == 1: @@ -1125,7 +1173,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose provider = torznab_host[0] # Request results - logger.info('Parsing results from %s using search term: %s' % (torznab_host[0],term)) + logger.info('Parsing results from %s using search term: %s' % (torznab_host[0], term)) headers = {'User-Agent': USER_AGENT} params = { @@ -1152,13 +1200,15 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose title = item.title size = int(item.links[1]['length']) if all(word.lower() in title.lower() for word in term.split()): - logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) + logger.info( + 'Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) resultlist.append((title, size, url, provider, 'torrent', True)) else: logger.info('Skipping %s, not all search term words found' % title) except Exception as e: - logger.exception("An unknown error occurred trying to parse the feed: %s" % e) + logger.exception( + "An unknown error occurred trying to parse the feed: %s" % e) if headphones.CONFIG.KAT: provider = "Kick Ass Torrents" @@ -1185,7 +1235,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose providerurl += " category:music/" # Requesting content - logger.info("Searching %s using term: %s" % (provider,ka_term)) + logger.info("Searching %s using term: %s" % (provider, ka_term)) params = { "field": "seeders", @@ -1194,7 +1244,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose } data = request.request_feed(url=providerurl, params=params, - whitelist_status_code=[404]) + whitelist_status_code=[404]) # Process feed if data: @@ -1214,7 +1264,9 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose resultlist.append((title, size, url, provider, 'torrent', True)) logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) else: - logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d)', title, size, int(seeders)) + logger.info( + '%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d)', + title, size, int(seeders)) except Exception as e: logger.exception("Unhandled exception in the KAT parser") @@ -1243,7 +1295,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose query_items.extend(['format:(%s)' % format, 'size:[0 TO %d]' % maxsize, - '-seeders:0']) # cut out dead torrents + '-seeders:0']) # cut out dead torrents if bitrate: query_items.append('bitrate:"%s"' % bitrate) @@ -1256,8 +1308,8 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose "passkey": headphones.CONFIG.WAFFLES_PASSKEY, "rss": "1", "c0": "1", - "s": "seeders", # sort by - "d": "desc", # direction + "s": "seeders", # sort by + "d": "desc", # direction "q": " ".join(query_items) } @@ -1281,7 +1333,9 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose resultlist.append((title, size, url, provider, 'torrent', True)) logger.info('Found %s. Size: %s', title, helpers.bytes_to_mb(size)) except Exception as e: - logger.error(u"An error occurred while trying to parse the response from Waffles.fm: %s", e) + logger.error( + u"An error occurred while trying to parse the response from Waffles.fm: %s", + e) # rutracker.org if headphones.CONFIG.RUTRACKER: @@ -1341,7 +1395,8 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose if re.search(bitrate, encoding_string, flags=re.I): bitrate_string = encoding_string if bitrate_string not in gazelleencoding.ALL_ENCODINGS: - logger.info(u"Your preferred bitrate is not one of the available What.cd filters, so not using it as a search parameter.") + logger.info( + u"Your preferred bitrate is not one of the available What.cd filters, so not using it as a search parameter.") maxsize = 10000000000 elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless: # Highest quality including lossless search_formats = [gazelleformat.FLAC, gazelleformat.MP3] @@ -1353,28 +1408,35 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose if not gazelle or not gazelle.logged_in(): try: logger.info(u"Attempting to log in to What.cd...") - gazelle = gazelleapi.GazelleAPI(headphones.CONFIG.WHATCD_USERNAME, headphones.CONFIG.WHATCD_PASSWORD) + gazelle = gazelleapi.GazelleAPI(headphones.CONFIG.WHATCD_USERNAME, + headphones.CONFIG.WHATCD_PASSWORD) gazelle._login() except Exception as e: gazelle = None - logger.error(u"What.cd credentials incorrect or site is down. Error: %s %s" % (e.__class__.__name__, str(e))) + logger.error(u"What.cd credentials incorrect or site is down. Error: %s %s" % ( + e.__class__.__name__, str(e))) if gazelle and gazelle.logged_in(): logger.info(u"Searching %s..." % provider) all_torrents = [] for search_format in search_formats: if usersearchterm: - all_torrents.extend(gazelle.search_torrents(searchstr=usersearchterm, format=search_format, encoding=bitrate_string)['results']) + all_torrents.extend( + gazelle.search_torrents(searchstr=usersearchterm, format=search_format, + encoding=bitrate_string)['results']) else: all_torrents.extend(gazelle.search_torrents(artistname=semi_clean_artist_term, - groupname=semi_clean_album_term, - format=search_format, encoding=bitrate_string)['results']) + groupname=semi_clean_album_term, + format=search_format, + encoding=bitrate_string)['results']) # filter on format, size, and num seeders logger.info(u"Filtering torrents by format, maximum size, and minimum seeders...") - match_torrents = [t for t in all_torrents if t.size <= maxsize and t.seeders >= minimumseeders] + match_torrents = [t for t in all_torrents if + t.size <= maxsize and t.seeders >= minimumseeders] - logger.info(u"Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) + logger.info( + u"Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) # sort by times d/l'd if not len(match_torrents): @@ -1382,21 +1444,25 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose elif len(match_torrents) > 1: logger.info(u"Found %d matching releases from %s for %s - %s after filtering" % (len(match_torrents), provider, artistterm, albumterm)) - logger.info("Sorting torrents by times snatched and preferred bitrate %s..." % bitrate_string) + logger.info( + "Sorting torrents by times snatched and preferred bitrate %s..." % bitrate_string) match_torrents.sort(key=lambda x: int(x.snatched), reverse=True) if gazelleformat.MP3 in search_formats: # sort by size after rounding to nearest 10MB...hacky, but will favor highest quality - match_torrents.sort(key=lambda x: int(10 * round(x.size / 1024. / 1024. / 10.)), reverse=True) + match_torrents.sort(key=lambda x: int(10 * round(x.size / 1024. / 1024. / 10.)), + reverse=True) if search_formats and None not in search_formats: - match_torrents.sort(key=lambda x: int(search_formats.index(x.format))) # prefer lossless -# if bitrate: -# match_torrents.sort(key=lambda x: re.match("mp3", x.getTorrentDetails(), flags=re.I), reverse=True) -# match_torrents.sort(key=lambda x: str(bitrate) in x.getTorrentFolderName(), reverse=True) - logger.info(u"New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) + match_torrents.sort( + key=lambda x: int(search_formats.index(x.format))) # prefer lossless + # if bitrate: + # match_torrents.sort(key=lambda x: re.match("mp3", x.getTorrentDetails(), flags=re.I), reverse=True) + # match_torrents.sort(key=lambda x: str(bitrate) in x.getTorrentFolderName(), reverse=True) + logger.info( + u"New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) for torrent in match_torrents: if not torrent.file_path: - torrent.group.update_group_data() # will load the file_path for the individual torrents + torrent.group.update_group_data() # will load the file_path for the individual torrents resultlist.append((torrent.file_path, torrent.size, gazelle.generate_torrent_link(torrent.id), @@ -1415,23 +1481,24 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose providerurl = fix_url("https://thepiratebay.se") # Build URL - providerurl = providerurl + "/search/" + tpb_term + "/0/7/" # 7 is sort by seeders + providerurl = providerurl + "/search/" + tpb_term + "/0/7/" # 7 is sort by seeders # Pick category for torrents if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly: - category = '104' # FLAC + category = '104' # FLAC maxsize = 10000000000 elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless: - category = '100' # General audio category + category = '100' # General audio category maxsize = 10000000000 else: - category = '101' # MP3 only + category = '101' # MP3 only maxsize = 300000000 # Request content logger.info("Searching The Pirate Bay using term: %s", tpb_term) - headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36'} + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36'} data = request.request_soup(url=providerurl + category, headers=headers) # Process content @@ -1463,7 +1530,8 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose if url.lower().startswith("//"): url = "http:" + url - formatted_size = re.search('Size (.*),', unicode(item)).group(1).replace(u'\xa0', ' ') + formatted_size = re.search('Size (.*),', unicode(item)).group(1).replace( + u'\xa0', ' ') size = helpers.piratesize(formatted_size) if size < maxsize and minimumseeders < seeds and url is not None: @@ -1471,7 +1539,9 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose logger.info('Found %s. Size: %s' % (title, formatted_size)) else: match = False - logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (title, size, int(seeds))) + logger.info( + '%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % ( + title, size, int(seeds))) resultlist.append((title, size, url, provider, "torrent", match)) except Exception as e: @@ -1493,9 +1563,10 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose # Requesting content logger.info("Parsing results from Old Pirate Bay") - headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36'} + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36'} provider_url = fix_url(headphones.CONFIG.OLDPIRATEBAY_URL) + \ - "/search.php?" + urllib.urlencode({"q": tpb_term, "iht": 6}) + "/search.php?" + urllib.urlencode({"q": tpb_term, "iht": 6}) data = request.request_soup(url=provider_url, headers=headers) @@ -1513,7 +1584,8 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose rightformat = True title = links[1].text seeds = int(item.select("td.seeders-row")[0].text) - url = links[0]["href"] # Magnet link. The actual download link is not based on the URL + url = links[0][ + "href"] # Magnet link. The actual download link is not based on the URL formatted_size = item.select("td.size-row")[0].text size = helpers.piratesize(formatted_size) @@ -1523,11 +1595,14 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose logger.info('Found %s. Size: %s' % (title, formatted_size)) else: match = False - logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (title, size, int(seeds))) + logger.info( + '%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % ( + title, size, int(seeds))) resultlist.append((title, size, url, provider, "torrent", match)) except Exception as e: - logger.error(u"An unknown error occurred in the Old Pirate Bay parser: %s" % e) + logger.error( + u"An unknown error occurred in the Old Pirate Bay parser: %s" % e) # Strike if headphones.CONFIG.STRIKE: @@ -1547,7 +1622,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose logger.info("Searching %s using term: %s" % (provider, s_term)) data = request.request_json(url=providerurl, - whitelist_status_code=[404]) + whitelist_status_code=[404]) if not data or not data.get('torrents'): logger.info("No results found on %s using search term: %s" % (provider, s_term)) @@ -1558,7 +1633,6 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose seeders = item['seeds'] url = item['magnet_uri'] size = int(item['size']) - subcategory = item['sub_category'] if size < maxsize and minimumseeders < int(seeders): resultlist.append((title, size, url, provider, 'torrent', True)) @@ -1577,15 +1651,15 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly: # categories = "7" #music - format = "2" #flac + format = "2" # flac maxsize = 10000000000 elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless: # categories = "7" #music - format = "10" #mp3+flac + format = "10" # mp3+flac maxsize = 10000000000 else: # categories = "7" #music - format = "8" #mp3 + format = "8" # mp3 maxsize = 300000000 # Requesting content @@ -1614,7 +1688,8 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose size = int(item.links[1]['length']) if format == "2": torrent = request.request_content(url) - if not torrent or (int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1): + if not torrent or (int(torrent.find(".mp3")) > 0 and int( + torrent.find(".flac")) < 1): rightformat = False if rightformat and size < maxsize and minimumseeders < seeds: @@ -1622,16 +1697,19 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) else: match = False - logger.info('%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % (title, size, int(seeds), rightformat)) + logger.info( + '%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i, Format: %s)' % ( + title, size, int(seeds), rightformat)) resultlist.append((title, size, url, provider, 'torrent', match)) except Exception as e: logger.exception("Unhandled exception in Mininova Parser") - #attempt to verify that this isn't a substring result - #when looking for "Foo - Foo" we don't want "Foobar" - #this should be less of an issue when it isn't a self-titled album so we'll only check vs artist - results = [result for result in resultlist if verifyresult(result[0], artistterm, term, losslessOnly)] + # attempt to verify that this isn't a substring result + # when looking for "Foo - Foo" we don't want "Foobar" + # this should be less of an issue when it isn't a self-titled album so we'll only check vs artist + results = [result for result in resultlist if + verifyresult(result[0], artistterm, term, losslessOnly)] # Additional filtering for size etc if results and not choose_specific_download: @@ -1639,11 +1717,11 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose return results + # THIS IS KIND OF A MESS AND PROBABLY NEEDS TO BE CLEANED UP def preprocess(resultlist): - for result in resultlist: if result[4] == 'torrent': @@ -1651,7 +1729,7 @@ def preprocess(resultlist): if result[3] == 'rutracker.org': return ruobj.get_torrent_data(result[2]), result - #Get out of here if we're using Transmission + # Get out of here if we're using Transmission if headphones.CONFIG.TORRENT_DOWNLOADER == 1: ## if not a magnet link still need the .torrent to generate hash... uTorrent support labeling return True, result # Get out of here if it's a magnet link @@ -1667,13 +1745,15 @@ def preprocess(resultlist): elif result[3] == 'What.cd': headers['User-Agent'] = 'Headphones' elif result[3] == "The Pirate Bay" or result[3] == "Old Pirate Bay": - headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36' + headers[ + 'User-Agent'] = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36' return request.request_content(url=result[2], headers=headers), result else: headers = {'User-Agent': USER_AGENT} if result[3] == 'headphones': - return request.request_content(url=result[2], headers=headers, auth=(headphones.CONFIG.HPUSER, headphones.CONFIG.HPPASS)), result + return request.request_content(url=result[2], headers=headers, auth=( + headphones.CONFIG.HPUSER, headphones.CONFIG.HPPASS)), result else: return request.request_content(url=result[2], headers=headers), result diff --git a/headphones/torrentfinished.py b/headphones/torrentfinished.py index 24e9a4b0..bc622291 100644 --- a/headphones/torrentfinished.py +++ b/headphones/torrentfinished.py @@ -13,13 +13,14 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -from headphones import db, utorrent, transmission, logger - import threading + +from headphones import db, utorrent, transmission, logger import headphones postprocessor_lock = threading.Lock() + def checkTorrentFinished(): """ Remove Torrent + data if Post Processed and finished Seeding @@ -41,6 +42,7 @@ def checkTorrentFinished(): torrent_removed = utorrent.removeTorrent(hash, True) if torrent_removed: - myDB.action('DELETE from snatched WHERE status = "Seed_Processed" and AlbumID=?', [albumid]) + myDB.action('DELETE from snatched WHERE status = "Seed_Processed" and AlbumID=?', + [albumid]) logger.info("Checking finished torrents completed") diff --git a/headphones/transmission.py b/headphones/transmission.py index ab5ae6c9..3ad0327c 100644 --- a/headphones/transmission.py +++ b/headphones/transmission.py @@ -13,14 +13,15 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -from headphones import logger, request - import time import json import base64 import urlparse + +from headphones import logger, request import headphones + # This is just a simple script to send torrents to transmission. The # intention is to turn this into a class where we can check the state # of the download, set the download dir, etc. @@ -96,7 +97,6 @@ def setSeedRatio(torrentid, ratio): def removeTorrent(torrentid, remove_data=False): - method = 'torrent-get' arguments = {'ids': torrentid, 'fields': ['isFinished', 'name']} @@ -118,7 +118,8 @@ def removeTorrent(torrentid, remove_data=False): response = torrentAction(method, arguments) return True else: - logger.info('%s has not finished seeding yet, torrent will not be removed, will try again on next run' % name) + logger.info( + '%s has not finished seeding yet, torrent will not be removed, will try again on next run' % name) except: return False @@ -126,7 +127,6 @@ def removeTorrent(torrentid, remove_data=False): def torrentAction(method, arguments): - host = headphones.CONFIG.TRANSMISSION_HOST username = headphones.CONFIG.TRANSMISSION_USERNAME password = headphones.CONFIG.TRANSMISSION_PASSWORD @@ -152,7 +152,7 @@ def torrentAction(method, arguments): # Retrieve session id auth = (username, password) if username and password else None response = request.request_response(host, auth=auth, - whitelist_status_code=[401, 409]) + whitelist_status_code=[401, 409]) if response is None: logger.error("Error gettings Transmission session ID") @@ -162,7 +162,7 @@ def torrentAction(method, arguments): if response.status_code == 401: if auth: logger.error("Username and/or password not accepted by " \ - "Transmission") + "Transmission") else: logger.error("Transmission authorization required") @@ -179,7 +179,7 @@ def torrentAction(method, arguments): data = {'method': method, 'arguments': arguments} response = request.request_json(host, method="POST", data=json.dumps(data), - headers=headers, auth=auth) + headers=headers, auth=auth) print response diff --git a/headphones/updater.py b/headphones/updater.py index b750c013..0dc49e52 100644 --- a/headphones/updater.py +++ b/headphones/updater.py @@ -17,10 +17,10 @@ from headphones import logger, db, importer def dbUpdate(forcefull=False): - myDB = db.DBConnection() - active_artists = myDB.select('SELECT ArtistID, ArtistName from artists WHERE Status="Active" or Status="Loading" order by LastUpdated ASC') + active_artists = myDB.select( + 'SELECT ArtistID, ArtistName from artists WHERE Status="Active" or Status="Loading" order by LastUpdated ASC') logger.info('Starting update for %i active artists', len(active_artists)) for artist in active_artists: diff --git a/headphones/utorrent.py b/headphones/utorrent.py index a826e928..43befbd2 100644 --- a/headphones/utorrent.py +++ b/headphones/utorrent.py @@ -14,26 +14,24 @@ # along with Headphones. If not, see . import urllib +import json +import time +from collections import namedtuple import urllib2 import urlparse import cookielib -import json + import re import os -import time - import headphones - from headphones import logger -from collections import namedtuple class utorrentclient(object): - TOKEN_REGEX = "" UTSetting = namedtuple("UTSetting", ["name", "int", "str", "access"]) - def __init__(self, base_url=None, username=None, password=None,): + def __init__(self, base_url=None, username=None, password=None, ): host = headphones.CONFIG.UTORRENT_HOST if not host.startswith('http'): @@ -50,7 +48,7 @@ class utorrentclient(object): self.password = headphones.CONFIG.UTORRENT_PASSWORD self.opener = self._make_opener('uTorrent', self.base_url, self.username, self.password) self.token = self._get_token() - #TODO refresh token, when necessary + # TODO refresh token, when necessary def _make_opener(self, realm, base_url, username, password): """uTorrent API need HTTP Basic Auth and cookie support for token verify.""" @@ -83,7 +81,7 @@ class utorrentclient(object): return self._action(params) def add_url(self, url): - #can receive magnet or normal .torrent link + # can receive magnet or normal .torrent link params = [('action', 'add-url'), ('s', url)] return self._action(params) @@ -187,7 +185,9 @@ def removeTorrent(hash, remove_data=False): uTorrentClient.remove(hash, remove_data) return True else: - logger.info('%s has not finished seeding yet, torrent will not be removed, will try again on next run' % torrent[2]) + logger.info( + '%s has not finished seeding yet, torrent will not be removed, will try again on next run' % + torrent[2]) return False return False @@ -203,7 +203,6 @@ def setSeedRatio(hash, ratio): def dirTorrent(hash, cacheid=None, return_name=None): - uTorrentClient = utorrentclient() if not cacheid: @@ -228,19 +227,20 @@ def dirTorrent(hash, cacheid=None, return_name=None): return None, None + def addTorrent(link): uTorrentClient = utorrentclient() uTorrentClient.add_url(link) def getFolder(hash): - uTorrentClient = utorrentclient() # Get Active Directory from settings active_dir, completed_dir = getSettingsDirectories() if not active_dir: - logger.error('Could not get "Put new downloads in:" directory from uTorrent settings, please ensure it is set') + logger.error( + 'Could not get "Put new downloads in:" directory from uTorrent settings, please ensure it is set') return None # Get Torrent Folder Name diff --git a/headphones/versioncheck.py b/headphones/versioncheck.py index ea2a8e3f..30098886 100644 --- a/headphones/versioncheck.py +++ b/headphones/versioncheck.py @@ -13,18 +13,17 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -import re -import os import tarfile import platform -import headphones import subprocess +import re +import os +import headphones from headphones import logger, version, request def runGit(args): - if headphones.CONFIG.GIT_PATH: git_locations = ['"' + headphones.CONFIG.GIT_PATH + '"'] else: @@ -40,7 +39,8 @@ def runGit(args): try: logger.debug('Trying to execute: "' + cmd + '" with shell in ' + headphones.PROG_DIR) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, cwd=headphones.PROG_DIR) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, + cwd=headphones.PROG_DIR) output, err = p.communicate() output = output.strip() @@ -62,7 +62,6 @@ def runGit(args): def getVersion(): - if version.HEADPHONES_VERSION.startswith('win32build'): headphones.INSTALL_TYPE = 'win' @@ -92,7 +91,8 @@ def getVersion(): branch_name = branch_name if not branch_name and headphones.CONFIG.GIT_BRANCH: - logger.error('Could not retrieve branch name from git. Falling back to %s' % headphones.CONFIG.GIT_BRANCH) + logger.error( + 'Could not retrieve branch name from git. Falling back to %s' % headphones.CONFIG.GIT_BRANCH) branch_name = headphones.CONFIG.GIT_BRANCH if not branch_name: logger.error('Could not retrieve branch name from git. Defaulting to master') @@ -123,11 +123,13 @@ def checkGithub(): # Get the latest version available from github logger.info('Retrieving latest version information from GitHub') - url = 'https://api.github.com/repos/%s/headphones/commits/%s' % (headphones.CONFIG.GIT_USER, headphones.CONFIG.GIT_BRANCH) + url = 'https://api.github.com/repos/%s/headphones/commits/%s' % ( + headphones.CONFIG.GIT_USER, headphones.CONFIG.GIT_BRANCH) version = request.request_json(url, timeout=20, validator=lambda x: type(x) == dict) if version is None: - logger.warn('Could not get the latest version from GitHub. Are you running a local development version?') + logger.warn( + 'Could not get the latest version from GitHub. Are you running a local development version?') return headphones.CURRENT_VERSION headphones.LATEST_VERSION = version['sha'] @@ -135,7 +137,8 @@ def checkGithub(): # See how many commits behind we are if not headphones.CURRENT_VERSION: - logger.info('You are running an unknown version of Headphones. Run the updater to identify your version') + logger.info( + 'You are running an unknown version of Headphones. Run the updater to identify your version') return headphones.LATEST_VERSION if headphones.LATEST_VERSION == headphones.CURRENT_VERSION: @@ -143,8 +146,10 @@ def checkGithub(): return headphones.LATEST_VERSION logger.info('Comparing currently installed version with latest GitHub version') - url = 'https://api.github.com/repos/%s/headphones/compare/%s...%s' % (headphones.CONFIG.GIT_USER, headphones.LATEST_VERSION, headphones.CURRENT_VERSION) - commits = request.request_json(url, timeout=20, whitelist_status_code=404, validator=lambda x: type(x) == dict) + url = 'https://api.github.com/repos/%s/headphones/compare/%s...%s' % ( + headphones.CONFIG.GIT_USER, headphones.LATEST_VERSION, headphones.CURRENT_VERSION) + commits = request.request_json(url, timeout=20, whitelist_status_code=404, + validator=lambda x: type(x) == dict) if commits is None: logger.warn('Could not get commits behind from GitHub.') @@ -158,7 +163,8 @@ def checkGithub(): headphones.COMMITS_BEHIND = 0 if headphones.COMMITS_BEHIND > 0: - logger.info('New version is available. You are %s commits behind' % headphones.COMMITS_BEHIND) + logger.info( + 'New version is available. You are %s commits behind' % headphones.COMMITS_BEHIND) elif headphones.COMMITS_BEHIND == 0: logger.info('Headphones is up to date') @@ -185,7 +191,8 @@ def update(): logger.info('Output: ' + str(output)) else: - tar_download_url = 'https://github.com/%s/headphones/tarball/%s' % (headphones.CONFIG.GIT_USER, headphones.CONFIG.GIT_BRANCH) + tar_download_url = 'https://github.com/%s/headphones/tarball/%s' % ( + headphones.CONFIG.GIT_USER, headphones.CONFIG.GIT_BRANCH) update_dir = os.path.join(headphones.PROG_DIR, 'update') version_path = os.path.join(headphones.PROG_DIR, 'version.txt') @@ -214,7 +221,8 @@ def update(): os.remove(tar_download_path) # Find update dir name - update_dir_contents = [x for x in os.listdir(update_dir) if os.path.isdir(os.path.join(update_dir, x))] + update_dir_contents = [x for x in os.listdir(update_dir) if + os.path.isdir(os.path.join(update_dir, x))] if len(update_dir_contents) != 1: logger.error("Invalid update data, update failed: " + str(update_dir_contents)) return diff --git a/headphones/webserve.py b/headphones/webserve.py index 815c5e9f..cdc503a0 100644 --- a/headphones/webserve.py +++ b/headphones/webserve.py @@ -15,18 +15,8 @@ # NZBGet support added by CurlyMo as a part of XBian - XBMC on the Raspberry Pi -from headphones import logger, searcher, db, importer, mb, lastfm, librarysync, helpers, notifiers -from headphones.helpers import checked, radio, today, cleanName - -from mako.lookup import TemplateLookup -from mako import exceptions - from operator import itemgetter - -import headphones import threading -import cherrypy -import urllib2 import hashlib import random import urllib @@ -34,8 +24,16 @@ import json import time import cgi import sys +import urllib2 + import os import re +from headphones import logger, searcher, db, importer, mb, lastfm, librarysync, helpers, notifiers +from headphones.helpers import checked, radio, today, cleanName +from mako.lookup import TemplateLookup +from mako import exceptions +import headphones +import cherrypy try: # pylint:disable=E0611 @@ -48,7 +46,6 @@ except ImportError: def serve_template(templatename, **kwargs): - interface_dir = os.path.join(str(headphones.PROG_DIR), 'data/interfaces/') template_dir = os.path.join(str(interface_dir), headphones.CONFIG.INTERFACE) @@ -62,7 +59,6 @@ def serve_template(templatename, **kwargs): class WebInterface(object): - @cherrypy.expose def index(self): raise cherrypy.HTTPRedirect("home") @@ -90,7 +86,8 @@ class WebInterface(object): if not artist: raise cherrypy.HTTPRedirect("home") - albums = myDB.select('SELECT * from albums WHERE ArtistID=? order by ReleaseDate DESC', [ArtistID]) + albums = myDB.select('SELECT * from albums WHERE ArtistID=? order by ReleaseDate DESC', + [ArtistID]) # Serve the extras up as a dict to make things easier for new templates (append new extras to the end) extras_list = headphones.POSSIBLE_EXTRAS @@ -109,7 +106,8 @@ class WebInterface(object): extras_dict[extra] = "" i += 1 - return serve_template(templatename="artist.html", title=artist['ArtistName'], artist=artist, albums=albums, extras=extras_dict) + return serve_template(templatename="artist.html", title=artist['ArtistName'], artist=artist, + albums=albums, extras=extras_dict) @cherrypy.expose def albumPage(self, AlbumID): @@ -128,8 +126,10 @@ class WebInterface(object): if not album: raise cherrypy.HTTPRedirect("home") - tracks = myDB.select('SELECT * from tracks WHERE AlbumID=? ORDER BY CAST(TrackNumber AS INTEGER)', [AlbumID]) - description = myDB.action('SELECT * from descriptions WHERE ReleaseGroupID=?', [AlbumID]).fetchone() + tracks = myDB.select( + 'SELECT * from tracks WHERE AlbumID=? ORDER BY CAST(TrackNumber AS INTEGER)', [AlbumID]) + description = myDB.action('SELECT * from descriptions WHERE ReleaseGroupID=?', + [AlbumID]).fetchone() if not album['ArtistName']: title = ' - ' @@ -139,7 +139,8 @@ class WebInterface(object): title = title + "" else: title = title + album['AlbumTitle'] - return serve_template(templatename="album.html", title=title, album=album, tracks=tracks, description=description) + return serve_template(templatename="album.html", title=title, album=album, tracks=tracks, + description=description) @cherrypy.expose def search(self, name, type): @@ -151,7 +152,9 @@ class WebInterface(object): searchresults = mb.findRelease(name, limit=100) else: searchresults = mb.findSeries(name, limit=100) - return serve_template(templatename="searchresults.html", title='Search Results for: "' + cgi.escape(name) + '"', searchresults=searchresults, name=cgi.escape(name), type=type) + return serve_template(templatename="searchresults.html", + title='Search Results for: "' + cgi.escape(name) + '"', + searchresults=searchresults, name=cgi.escape(name), type=type) @cherrypy.expose def addArtist(self, artistid): @@ -162,7 +165,8 @@ class WebInterface(object): @cherrypy.expose def addSeries(self, seriesid): - thread = threading.Thread(target=importer.addArtisttoDB, args=[seriesid, False, False, "series"]) + thread = threading.Thread(target=importer.addArtisttoDB, + args=[seriesid, False, False, "series"]) thread.start() thread.join(1) raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % seriesid) @@ -200,12 +204,18 @@ class WebInterface(object): controlValueDict = {'ArtistID': ArtistID} newValueDict = {'IncludeExtras': 0} myDB.upsert("artists", newValueDict, controlValueDict) - extraalbums = myDB.select('SELECT AlbumID from albums WHERE ArtistID=? AND Status="Skipped" AND Type!="Album"', [ArtistID]) + extraalbums = myDB.select( + 'SELECT AlbumID from albums WHERE ArtistID=? AND Status="Skipped" AND Type!="Album"', + [ArtistID]) for album in extraalbums: - myDB.action('DELETE from tracks WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']]) - myDB.action('DELETE from albums WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']]) - myDB.action('DELETE from allalbums WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']]) - myDB.action('DELETE from alltracks WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']]) + myDB.action('DELETE from tracks WHERE ArtistID=? AND AlbumID=?', + [ArtistID, album['AlbumID']]) + myDB.action('DELETE from albums WHERE ArtistID=? AND AlbumID=?', + [ArtistID, album['AlbumID']]) + myDB.action('DELETE from allalbums WHERE ArtistID=? AND AlbumID=?', + [ArtistID, album['AlbumID']]) + myDB.action('DELETE from alltracks WHERE ArtistID=? AND AlbumID=?', + [ArtistID, album['AlbumID']]) myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [album['AlbumID']]) from headphones import cache c = cache.Cache() @@ -242,7 +252,9 @@ class WebInterface(object): from headphones import cache c = cache.Cache() - rgids = myDB.select('SELECT AlbumID FROM albums WHERE ArtistID=? UNION SELECT AlbumID FROM allalbums WHERE ArtistID=?', [ArtistID, ArtistID]) + rgids = myDB.select( + 'SELECT AlbumID FROM albums WHERE ArtistID=? UNION SELECT AlbumID FROM allalbums WHERE ArtistID=?', + [ArtistID, ArtistID]) for rgid in rgids: albumid = rgid['AlbumID'] myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [albumid]) @@ -269,17 +281,19 @@ class WebInterface(object): def scanArtist(self, ArtistID): myDB = db.DBConnection() - artist_name = myDB.select('SELECT DISTINCT ArtistName FROM artists WHERE ArtistID=?', [ArtistID])[0][0] + artist_name = \ + myDB.select('SELECT DISTINCT ArtistName FROM artists WHERE ArtistID=?', [ArtistID])[0][0] logger.info(u"Scanning artist: %s", artist_name) full_folder_format = headphones.CONFIG.FOLDER_FORMAT folder_format = re.findall(r'(.*?[Aa]rtist?)\.*', full_folder_format)[0] - acceptable_formats = ["$artist","$sortartist","$first/$artist","$first/$sortartist"] + acceptable_formats = ["$artist", "$sortartist", "$first/$artist", "$first/$sortartist"] if not folder_format.lower() in acceptable_formats: - logger.info("Can't determine the artist folder from the configured folder_format. Not scanning") + logger.info( + "Can't determine the artist folder from the configured folder_format. Not scanning") return # Format the folder to match the settings @@ -299,12 +313,12 @@ class WebInterface(object): firstchar = sortname[0] values = {'$Artist': artist, - '$SortArtist': sortname, - '$First': firstchar.upper(), - '$artist': artist.lower(), - '$sortartist': sortname.lower(), - '$first': firstchar.lower(), - } + '$SortArtist': sortname, + '$First': firstchar.upper(), + '$artist': artist.lower(), + '$sortartist': sortname.lower(), + '$first': firstchar.lower(), + } folder = helpers.replace_all(folder_format.strip(), values, normalize=True) @@ -332,14 +346,17 @@ class WebInterface(object): if not os.path.isdir(artistfolder): logger.debug("Cannot find directory: " + artistfolder) continue - threading.Thread(target=librarysync.libraryScan, kwargs={"dir":artistfolder, "artistScan":True, "ArtistID":ArtistID, "ArtistName":artist_name}).start() + threading.Thread(target=librarysync.libraryScan, + kwargs={"dir": artistfolder, "artistScan": True, "ArtistID": ArtistID, + "ArtistName": artist_name}).start() raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID) @cherrypy.expose def deleteEmptyArtists(self): logger.info(u"Deleting all empty artists") myDB = db.DBConnection() - emptyArtistIDs = [row['ArtistID'] for row in myDB.select("SELECT ArtistID FROM artists WHERE LatestAlbum IS NULL")] + emptyArtistIDs = [row['ArtistID'] for row in + myDB.select("SELECT ArtistID FROM artists WHERE LatestAlbum IS NULL")] for ArtistID in emptyArtistIDs: self.removeArtist(ArtistID) @@ -371,8 +388,11 @@ class WebInterface(object): if ArtistID: ArtistIDT = ArtistID else: - ArtistIDT = myDB.action('SELECT ArtistID FROM albums WHERE AlbumID=?', [mbid]).fetchone()[0] - myDB.action('UPDATE artists SET TotalTracks=(SELECT COUNT(*) FROM tracks WHERE ArtistID = ? AND AlbumTitle IN (SELECT AlbumTitle FROM albums WHERE Status != "Ignored")) WHERE ArtistID = ?', [ArtistIDT, ArtistIDT]) + ArtistIDT = \ + myDB.action('SELECT ArtistID FROM albums WHERE AlbumID=?', [mbid]).fetchone()[0] + myDB.action( + 'UPDATE artists SET TotalTracks=(SELECT COUNT(*) FROM tracks WHERE ArtistID = ? AND AlbumTitle IN (SELECT AlbumTitle FROM albums WHERE Status != "Ignored")) WHERE ArtistID = ?', + [ArtistIDT, ArtistIDT]) if ArtistID: raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID) else: @@ -385,8 +405,10 @@ class WebInterface(object): if action == "ignore": myDB = db.DBConnection() for artist in args: - myDB.action('DELETE FROM newartists WHERE ArtistName=?', [artist.decode(headphones.SYS_ENCODING, 'replace')]) - myDB.action('UPDATE have SET Matched="Ignored" WHERE ArtistName=?', [artist.decode(headphones.SYS_ENCODING, 'replace')]) + myDB.action('DELETE FROM newartists WHERE ArtistName=?', + [artist.decode(headphones.SYS_ENCODING, 'replace')]) + myDB.action('UPDATE have SET Matched="Ignored" WHERE ArtistName=?', + [artist.decode(headphones.SYS_ENCODING, 'replace')]) logger.info("Artist %s removed from new artist list and set to ignored" % artist) raise cherrypy.HTTPRedirect("home") @@ -440,12 +462,12 @@ class WebInterface(object): (data, bestqual) = searcher.preprocess(result) if data and bestqual: - myDB = db.DBConnection() - album = myDB.action('SELECT * from albums WHERE AlbumID=?', [AlbumID]).fetchone() - searcher.send_to_downloader(data, bestqual, album) - return json.dumps({'result':'success'}) + myDB = db.DBConnection() + album = myDB.action('SELECT * from albums WHERE AlbumID=?', [AlbumID]).fetchone() + searcher.send_to_downloader(data, bestqual, album) + return json.dumps({'result': 'success'}) else: - return json.dumps({'result':'failure'}) + return json.dumps({'result': 'failure'}) @cherrypy.expose def unqueueAlbum(self, AlbumID, ArtistID): @@ -462,10 +484,12 @@ class WebInterface(object): myDB = db.DBConnection() myDB.action('DELETE from have WHERE Matched=?', [AlbumID]) - album = myDB.action('SELECT ArtistID, ArtistName, AlbumTitle from albums where AlbumID=?', [AlbumID]).fetchone() + album = myDB.action('SELECT ArtistID, ArtistName, AlbumTitle from albums where AlbumID=?', + [AlbumID]).fetchone() if album: ArtistID = album['ArtistID'] - myDB.action('DELETE from have WHERE ArtistName=? AND AlbumTitle=?', [album['ArtistName'], album['AlbumTitle']]) + myDB.action('DELETE from have WHERE ArtistName=? AND AlbumTitle=?', + [album['ArtistName'], album['AlbumTitle']]) myDB.action('DELETE from albums WHERE AlbumID=?', [AlbumID]) myDB.action('DELETE from tracks WHERE AlbumID=?', [AlbumID]) @@ -505,9 +529,11 @@ class WebInterface(object): @cherrypy.expose def upcoming(self): myDB = db.DBConnection() - upcoming = myDB.select("SELECT * from albums WHERE ReleaseDate > date('now') order by ReleaseDate ASC") + upcoming = myDB.select( + "SELECT * from albums WHERE ReleaseDate > date('now') order by ReleaseDate ASC") wanted = myDB.select("SELECT * from albums WHERE Status='Wanted'") - return serve_template(templatename="upcoming.html", title="Upcoming", upcoming=upcoming, wanted=wanted) + return serve_template(templatename="upcoming.html", title="Upcoming", upcoming=upcoming, + wanted=wanted) @cherrypy.expose def manage(self): @@ -519,7 +545,8 @@ class WebInterface(object): def manageArtists(self): myDB = db.DBConnection() artists = myDB.select('SELECT * from artists order by ArtistSortName COLLATE NOCASE') - return serve_template(templatename="manageartists.html", title="Manage Artists", artists=artists) + return serve_template(templatename="manageartists.html", title="Manage Artists", + artists=artists) @cherrypy.expose def manageAlbums(self, Status=None): @@ -530,87 +557,115 @@ class WebInterface(object): albums = myDB.select('SELECT * from albums WHERE Status=?', [Status]) else: albums = myDB.select('SELECT * from albums') - return serve_template(templatename="managealbums.html", title="Manage Albums", albums=albums) + return serve_template(templatename="managealbums.html", title="Manage Albums", + albums=albums) @cherrypy.expose def manageNew(self): myDB = db.DBConnection() newartists = myDB.select('SELECT * from newartists') - return serve_template(templatename="managenew.html", title="Manage New Artists", newartists=newartists) + return serve_template(templatename="managenew.html", title="Manage New Artists", + newartists=newartists) @cherrypy.expose def manageUnmatched(self): myDB = db.DBConnection() have_album_dictionary = [] headphones_album_dictionary = [] - have_albums = myDB.select('SELECT ArtistName, AlbumTitle, TrackTitle, CleanName from have WHERE Matched = "Failed" GROUP BY AlbumTitle ORDER BY ArtistName') + have_albums = myDB.select( + 'SELECT ArtistName, AlbumTitle, TrackTitle, CleanName from have WHERE Matched = "Failed" GROUP BY AlbumTitle ORDER BY ArtistName') for albums in have_albums: - #Have to skip over manually matched tracks + # Have to skip over manually matched tracks if albums['ArtistName'] and albums['AlbumTitle'] and albums['TrackTitle']: - original_clean = helpers.cleanName(albums['ArtistName'] + " " + albums['AlbumTitle'] + " " + albums['TrackTitle']) - # else: - # original_clean = None + original_clean = helpers.cleanName( + albums['ArtistName'] + " " + albums['AlbumTitle'] + " " + albums['TrackTitle']) + # else: + # original_clean = None if original_clean == albums['CleanName']: - have_dict = {'ArtistName': albums['ArtistName'], 'AlbumTitle': albums['AlbumTitle']} + have_dict = {'ArtistName': albums['ArtistName'], + 'AlbumTitle': albums['AlbumTitle']} have_album_dictionary.append(have_dict) - headphones_albums = myDB.select('SELECT ArtistName, AlbumTitle from albums ORDER BY ArtistName') + headphones_albums = myDB.select( + 'SELECT ArtistName, AlbumTitle from albums ORDER BY ArtistName') for albums in headphones_albums: if albums['ArtistName'] and albums['AlbumTitle']: - headphones_dict = {'ArtistName': albums['ArtistName'], 'AlbumTitle': albums['AlbumTitle']} + headphones_dict = {'ArtistName': albums['ArtistName'], + 'AlbumTitle': albums['AlbumTitle']} headphones_album_dictionary.append(headphones_dict) - #unmatchedalbums = [f for f in have_album_dictionary if f not in [x for x in headphones_album_dictionary]] + # unmatchedalbums = [f for f in have_album_dictionary if f not in [x for x in headphones_album_dictionary]] - check = set([(cleanName(d['ArtistName']).lower(), cleanName(d['AlbumTitle']).lower()) for d in headphones_album_dictionary]) - unmatchedalbums = [d for d in have_album_dictionary if (cleanName(d['ArtistName']).lower(), cleanName(d['AlbumTitle']).lower()) not in check] + check = set( + [(cleanName(d['ArtistName']).lower(), cleanName(d['AlbumTitle']).lower()) for d in + headphones_album_dictionary]) + unmatchedalbums = [d for d in have_album_dictionary if ( + cleanName(d['ArtistName']).lower(), cleanName(d['AlbumTitle']).lower()) not in check] - return serve_template(templatename="manageunmatched.html", title="Manage Unmatched Items", unmatchedalbums=unmatchedalbums) + return serve_template(templatename="manageunmatched.html", title="Manage Unmatched Items", + unmatchedalbums=unmatchedalbums) @cherrypy.expose - def markUnmatched(self, action=None, existing_artist=None, existing_album=None, new_artist=None, new_album=None): + def markUnmatched(self, action=None, existing_artist=None, existing_album=None, new_artist=None, + new_album=None): myDB = db.DBConnection() if action == "ignoreArtist": artist = existing_artist - myDB.action('UPDATE have SET Matched="Ignored" WHERE ArtistName=? AND Matched = "Failed"', [artist]) + myDB.action( + 'UPDATE have SET Matched="Ignored" WHERE ArtistName=? AND Matched = "Failed"', + [artist]) elif action == "ignoreAlbum": artist = existing_artist album = existing_album - myDB.action('UPDATE have SET Matched="Ignored" WHERE ArtistName=? AND AlbumTitle=? AND Matched = "Failed"', (artist, album)) + myDB.action( + 'UPDATE have SET Matched="Ignored" WHERE ArtistName=? AND AlbumTitle=? AND Matched = "Failed"', + (artist, album)) elif action == "matchArtist": existing_artist_clean = helpers.cleanName(existing_artist).lower() new_artist_clean = helpers.cleanName(new_artist).lower() if new_artist_clean != existing_artist_clean: - have_tracks = myDB.action('SELECT Matched, CleanName, Location, BitRate, Format FROM have WHERE ArtistName=?', [existing_artist]) + have_tracks = myDB.action( + 'SELECT Matched, CleanName, Location, BitRate, Format FROM have WHERE ArtistName=?', + [existing_artist]) update_count = 0 for entry in have_tracks: old_clean_filename = entry['CleanName'] if old_clean_filename.startswith(existing_artist_clean): - new_clean_filename = old_clean_filename.replace(existing_artist_clean, new_artist_clean, 1) - myDB.action('UPDATE have SET CleanName=? WHERE ArtistName=? AND CleanName=?', [new_clean_filename, existing_artist, old_clean_filename]) + new_clean_filename = old_clean_filename.replace(existing_artist_clean, + new_artist_clean, 1) + myDB.action( + 'UPDATE have SET CleanName=? WHERE ArtistName=? AND CleanName=?', + [new_clean_filename, existing_artist, old_clean_filename]) controlValueDict = {"CleanName": new_clean_filename} newValueDict = {"Location": entry['Location'], "BitRate": entry['BitRate'], "Format": entry['Format'] } - #Attempt to match tracks with new CleanName - match_alltracks = myDB.action('SELECT CleanName from alltracks WHERE CleanName=?', [new_clean_filename]).fetchone() + # Attempt to match tracks with new CleanName + match_alltracks = myDB.action( + 'SELECT CleanName from alltracks WHERE CleanName=?', + [new_clean_filename]).fetchone() if match_alltracks: myDB.upsert("alltracks", newValueDict, controlValueDict) - match_tracks = myDB.action('SELECT CleanName, AlbumID from tracks WHERE CleanName=?', [new_clean_filename]).fetchone() + match_tracks = myDB.action( + 'SELECT CleanName, AlbumID from tracks WHERE CleanName=?', + [new_clean_filename]).fetchone() if match_tracks: myDB.upsert("tracks", newValueDict, controlValueDict) - myDB.action('UPDATE have SET Matched="Manual" WHERE CleanName=?', [new_clean_filename]) + myDB.action('UPDATE have SET Matched="Manual" WHERE CleanName=?', + [new_clean_filename]) update_count += 1 - #This was throwing errors and I don't know why, but it seems to be working fine. - #else: - #logger.info("There was an error modifying Artist %s. This should not have happened" % existing_artist) - logger.info("Manual matching yielded %s new matches for Artist: %s" % (update_count, new_artist)) + # This was throwing errors and I don't know why, but it seems to be working fine. + # else: + # logger.info("There was an error modifying Artist %s. This should not have happened" % existing_artist) + logger.info("Manual matching yielded %s new matches for Artist: %s" % ( + update_count, new_artist)) if update_count > 0: librarysync.update_album_status() else: - logger.info("Artist %s already named appropriately; nothing to modify" % existing_artist) + logger.info( + "Artist %s already named appropriately; nothing to modify" % existing_artist) elif action == "matchAlbum": existing_artist_clean = helpers.cleanName(existing_artist).lower() @@ -620,83 +675,115 @@ class WebInterface(object): existing_clean_string = existing_artist_clean + " " + existing_album_clean new_clean_string = new_artist_clean + " " + new_album_clean if existing_clean_string != new_clean_string: - have_tracks = myDB.action('SELECT Matched, CleanName, Location, BitRate, Format FROM have WHERE ArtistName=? AND AlbumTitle=?', (existing_artist, existing_album)) + have_tracks = myDB.action( + 'SELECT Matched, CleanName, Location, BitRate, Format FROM have WHERE ArtistName=? AND AlbumTitle=?', + (existing_artist, existing_album)) update_count = 0 for entry in have_tracks: old_clean_filename = entry['CleanName'] if old_clean_filename.startswith(existing_clean_string): - new_clean_filename = old_clean_filename.replace(existing_clean_string, new_clean_string, 1) - myDB.action('UPDATE have SET CleanName=? WHERE ArtistName=? AND AlbumTitle=? AND CleanName=?', [new_clean_filename, existing_artist, existing_album, old_clean_filename]) + new_clean_filename = old_clean_filename.replace(existing_clean_string, + new_clean_string, 1) + myDB.action( + 'UPDATE have SET CleanName=? WHERE ArtistName=? AND AlbumTitle=? AND CleanName=?', + [new_clean_filename, existing_artist, existing_album, + old_clean_filename]) controlValueDict = {"CleanName": new_clean_filename} newValueDict = {"Location": entry['Location'], "BitRate": entry['BitRate'], "Format": entry['Format'] } - #Attempt to match tracks with new CleanName - match_alltracks = myDB.action('SELECT CleanName from alltracks WHERE CleanName=?', [new_clean_filename]).fetchone() + # Attempt to match tracks with new CleanName + match_alltracks = myDB.action( + 'SELECT CleanName from alltracks WHERE CleanName=?', + [new_clean_filename]).fetchone() if match_alltracks: myDB.upsert("alltracks", newValueDict, controlValueDict) - match_tracks = myDB.action('SELECT CleanName, AlbumID from tracks WHERE CleanName=?', [new_clean_filename]).fetchone() + match_tracks = myDB.action( + 'SELECT CleanName, AlbumID from tracks WHERE CleanName=?', + [new_clean_filename]).fetchone() if match_tracks: myDB.upsert("tracks", newValueDict, controlValueDict) - myDB.action('UPDATE have SET Matched="Manual" WHERE CleanName=?', [new_clean_filename]) + myDB.action('UPDATE have SET Matched="Manual" WHERE CleanName=?', + [new_clean_filename]) album_id = match_tracks['AlbumID'] update_count += 1 - #This was throwing errors and I don't know why, but it seems to be working fine. - #else: - #logger.info("There was an error modifying Artist %s / Album %s with clean name %s" % (existing_artist, existing_album, existing_clean_string)) - logger.info("Manual matching yielded %s new matches for Artist: %s / Album: %s" % (update_count, new_artist, new_album)) + # This was throwing errors and I don't know why, but it seems to be working fine. + # else: + # logger.info("There was an error modifying Artist %s / Album %s with clean name %s" % (existing_artist, existing_album, existing_clean_string)) + logger.info("Manual matching yielded %s new matches for Artist: %s / Album: %s" % ( + update_count, new_artist, new_album)) if update_count > 0: librarysync.update_album_status(album_id) else: - logger.info("Artist %s / Album %s already named appropriately; nothing to modify" % (existing_artist, existing_album)) + logger.info( + "Artist %s / Album %s already named appropriately; nothing to modify" % ( + existing_artist, existing_album)) @cherrypy.expose def manageManual(self): myDB = db.DBConnection() manual_albums = [] - manualalbums = myDB.select('SELECT ArtistName, AlbumTitle, TrackTitle, CleanName, Matched from have') + manualalbums = myDB.select( + 'SELECT ArtistName, AlbumTitle, TrackTitle, CleanName, Matched from have') for albums in manualalbums: if albums['ArtistName'] and albums['AlbumTitle'] and albums['TrackTitle']: - original_clean = helpers.cleanName(albums['ArtistName'] + " " + albums['AlbumTitle'] + " " + albums['TrackTitle']) - if albums['Matched'] == "Ignored" or albums['Matched'] == "Manual" or albums['CleanName'] != original_clean: + original_clean = helpers.cleanName( + albums['ArtistName'] + " " + albums['AlbumTitle'] + " " + albums['TrackTitle']) + if albums['Matched'] == "Ignored" or albums['Matched'] == "Manual" or albums[ + 'CleanName'] != original_clean: if albums['Matched'] == "Ignored": album_status = "Ignored" elif albums['Matched'] == "Manual" or albums['CleanName'] != original_clean: album_status = "Matched" - manual_dict = {'ArtistName': albums['ArtistName'], 'AlbumTitle': albums['AlbumTitle'], 'AlbumStatus': album_status} + manual_dict = {'ArtistName': albums['ArtistName'], + 'AlbumTitle': albums['AlbumTitle'], 'AlbumStatus': album_status} if manual_dict not in manual_albums: manual_albums.append(manual_dict) manual_albums_sorted = sorted(manual_albums, key=itemgetter('ArtistName', 'AlbumTitle')) - return serve_template(templatename="managemanual.html", title="Manage Manual Items", manualalbums=manual_albums_sorted) + return serve_template(templatename="managemanual.html", title="Manage Manual Items", + manualalbums=manual_albums_sorted) @cherrypy.expose def markManual(self, action=None, existing_artist=None, existing_album=None): myDB = db.DBConnection() if action == "unignoreArtist": artist = existing_artist - myDB.action('UPDATE have SET Matched="Failed" WHERE ArtistName=? AND Matched="Ignored"', [artist]) + myDB.action('UPDATE have SET Matched="Failed" WHERE ArtistName=? AND Matched="Ignored"', + [artist]) logger.info("Artist: %s successfully restored to unmatched list" % artist) elif action == "unignoreAlbum": artist = existing_artist album = existing_album - myDB.action('UPDATE have SET Matched="Failed" WHERE ArtistName=? AND AlbumTitle=? AND Matched="Ignored"', (artist, album)) + myDB.action( + 'UPDATE have SET Matched="Failed" WHERE ArtistName=? AND AlbumTitle=? AND Matched="Ignored"', + (artist, album)) logger.info("Album: %s successfully restored to unmatched list" % album) elif action == "unmatchArtist": artist = existing_artist - update_clean = myDB.select('SELECT ArtistName, AlbumTitle, TrackTitle, CleanName, Matched from have WHERE ArtistName=?', [artist]) + update_clean = myDB.select( + 'SELECT ArtistName, AlbumTitle, TrackTitle, CleanName, Matched from have WHERE ArtistName=?', + [artist]) update_count = 0 for tracks in update_clean: - original_clean = helpers.cleanName(tracks['ArtistName'] + " " + tracks['AlbumTitle'] + " " + tracks['TrackTitle']).lower() + original_clean = helpers.cleanName( + tracks['ArtistName'] + " " + tracks['AlbumTitle'] + " " + tracks[ + 'TrackTitle']).lower() album = tracks['AlbumTitle'] track_title = tracks['TrackTitle'] if tracks['CleanName'] != original_clean: - myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE CleanName=?', [None, None, None, tracks['CleanName']]) - myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE CleanName=?', [None, None, None, tracks['CleanName']]) - myDB.action('UPDATE have SET CleanName=?, Matched="Failed" WHERE ArtistName=? AND AlbumTitle=? AND TrackTitle=?', (original_clean, artist, album, track_title)) + myDB.action( + 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE CleanName=?', + [None, None, None, tracks['CleanName']]) + myDB.action( + 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE CleanName=?', + [None, None, None, tracks['CleanName']]) + myDB.action( + 'UPDATE have SET CleanName=?, Matched="Failed" WHERE ArtistName=? AND AlbumTitle=? AND TrackTitle=?', + (original_clean, artist, album, track_title)) update_count += 1 if update_count > 0: librarysync.update_album_status() @@ -705,18 +792,29 @@ class WebInterface(object): elif action == "unmatchAlbum": artist = existing_artist album = existing_album - update_clean = myDB.select('SELECT ArtistName, AlbumTitle, TrackTitle, CleanName, Matched from have WHERE ArtistName=? AND AlbumTitle=?', (artist, album)) + update_clean = myDB.select( + 'SELECT ArtistName, AlbumTitle, TrackTitle, CleanName, Matched from have WHERE ArtistName=? AND AlbumTitle=?', + (artist, album)) update_count = 0 for tracks in update_clean: - original_clean = helpers.cleanName(tracks['ArtistName'] + " " + tracks['AlbumTitle'] + " " + tracks['TrackTitle']).lower() + original_clean = helpers.cleanName( + tracks['ArtistName'] + " " + tracks['AlbumTitle'] + " " + tracks[ + 'TrackTitle']).lower() track_title = tracks['TrackTitle'] if tracks['CleanName'] != original_clean: - album_id_check = myDB.action('SELECT AlbumID from tracks WHERE CleanName=?', [tracks['CleanName']]).fetchone() + album_id_check = myDB.action('SELECT AlbumID from tracks WHERE CleanName=?', + [tracks['CleanName']]).fetchone() if album_id_check: album_id = album_id_check[0] - myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE CleanName=?', [None, None, None, tracks['CleanName']]) - myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE CleanName=?', [None, None, None, tracks['CleanName']]) - myDB.action('UPDATE have SET CleanName=?, Matched="Failed" WHERE ArtistName=? AND AlbumTitle=? AND TrackTitle=?', (original_clean, artist, album, track_title)) + myDB.action( + 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE CleanName=?', + [None, None, None, tracks['CleanName']]) + myDB.action( + 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE CleanName=?', + [None, None, None, tracks['CleanName']]) + myDB.action( + 'UPDATE have SET CleanName=?, Matched="Failed" WHERE ArtistName=? AND AlbumTitle=? AND TrackTitle=?', + (original_clean, artist, album, track_title)) update_count += 1 if update_count > 0: librarysync.update_album_status(album_id) @@ -802,7 +900,9 @@ class WebInterface(object): @cherrypy.expose def forcePostProcess(self, dir=None, album_dir=None, keep_original_folder=False): from headphones import postprocessor - threading.Thread(target=postprocessor.forcePostProcess, kwargs={'dir': dir, 'album_dir': album_dir, 'keep_original_folder':keep_original_folder == 'True'}).start() + threading.Thread(target=postprocessor.forcePostProcess, + kwargs={'dir': dir, 'album_dir': album_dir, + 'keep_original_folder': keep_original_folder == 'True'}).start() raise cherrypy.HTTPRedirect("home") @cherrypy.expose @@ -814,7 +914,8 @@ class WebInterface(object): @cherrypy.expose def history(self): myDB = db.DBConnection() - history = myDB.select('''SELECT AlbumID, Title, Size, URL, DateAdded, Status, Kind, ifnull(FolderName, '?') FolderName FROM snatched WHERE Status NOT LIKE "Seed%" ORDER BY DateAdded DESC''') + history = myDB.select( + '''SELECT AlbumID, Title, Size, URL, DateAdded, Status, Kind, ifnull(FolderName, '?') FolderName FROM snatched WHERE Status NOT LIKE "Seed%" ORDER BY DateAdded DESC''') return serve_template(templatename="history.html", title="History", history=history) @cherrypy.expose @@ -831,13 +932,14 @@ class WebInterface(object): def toggleVerbose(self): headphones.VERBOSE = not headphones.VERBOSE logger.initLogger(console=not headphones.QUIET, - log_dir=headphones.CONFIG.LOG_DIR, verbose=headphones.VERBOSE) + log_dir=headphones.CONFIG.LOG_DIR, verbose=headphones.VERBOSE) logger.info("Verbose toggled, set to %s", headphones.VERBOSE) logger.debug("If you read this message, debug logging is available") raise cherrypy.HTTPRedirect("logs") @cherrypy.expose - def getLog(self, iDisplayStart=0, iDisplayLength=100, iSortCol_0=0, sSortDir_0="desc", sSearch="", **kwargs): + def getLog(self, iDisplayStart=0, iDisplayLength=100, iSortCol_0=0, sSortDir_0="desc", + sSearch="", **kwargs): iDisplayStart = int(iDisplayStart) iDisplayLength = int(iDisplayLength) @@ -845,7 +947,8 @@ class WebInterface(object): if sSearch == "": filtered = headphones.LOG_LIST[::] else: - filtered = [row for row in headphones.LOG_LIST for column in row if sSearch.lower() in column.lower()] + filtered = [row for row in headphones.LOG_LIST for column in row if + sSearch.lower() in column.lower()] sortcolumn = 0 if iSortCol_0 == '1': @@ -864,7 +967,8 @@ class WebInterface(object): }) @cherrypy.expose - def getArtists_json(self, iDisplayStart=0, iDisplayLength=100, sSearch="", iSortCol_0='0', sSortDir_0='asc', **kwargs): + def getArtists_json(self, iDisplayStart=0, iDisplayLength=100, sSearch="", iSortCol_0='0', + sSortDir_0='asc', **kwargs): iDisplayStart = int(iDisplayStart) iDisplayLength = int(iDisplayLength) filtered = [] @@ -885,15 +989,18 @@ class WebInterface(object): filtered = myDB.select(query) totalcount = len(filtered) else: - query = 'SELECT * from artists WHERE ArtistSortName LIKE "%' + sSearch + '%" OR LatestAlbum LIKE "%' + sSearch + '%"' + 'ORDER BY %s COLLATE NOCASE %s' % (sortcolumn, sSortDir_0) + query = 'SELECT * from artists WHERE ArtistSortName LIKE "%' + sSearch + '%" OR LatestAlbum LIKE "%' + sSearch + '%"' + 'ORDER BY %s COLLATE NOCASE %s' % ( + sortcolumn, sSortDir_0) filtered = myDB.select(query) totalcount = myDB.select('SELECT COUNT(*) from artists')[0][0] if sortbyhavepercent: - filtered.sort(key=lambda x: (float(x['HaveTracks']) / x['TotalTracks'] if x['TotalTracks'] > 0 else 0.0, x['HaveTracks'] if x['HaveTracks'] else 0.0), reverse=sSortDir_0 == "asc") + filtered.sort(key=lambda x: ( + float(x['HaveTracks']) / x['TotalTracks'] if x['TotalTracks'] > 0 else 0.0, + x['HaveTracks'] if x['HaveTracks'] else 0.0), reverse=sSortDir_0 == "asc") - #can't figure out how to change the datatables default sorting order when its using an ajax datasource so ill - #just reverse it here and the first click on the "Latest Album" header will sort by descending release date + # can't figure out how to change the datatables default sorting order when its using an ajax datasource so ill + # just reverse it here and the first click on the "Latest Album" header will sort by descending release date if sortcolumn == 'ReleaseDate': filtered.reverse() @@ -901,16 +1008,16 @@ class WebInterface(object): rows = [] for artist in artists: row = {"ArtistID": artist['ArtistID'], - "ArtistName": artist["ArtistName"], - "ArtistSortName": artist["ArtistSortName"], - "Status": artist["Status"], - "TotalTracks": artist["TotalTracks"], - "HaveTracks": artist["HaveTracks"], - "LatestAlbum": "", - "ReleaseDate": "", - "ReleaseInFuture": "False", - "AlbumID": "", - } + "ArtistName": artist["ArtistName"], + "ArtistSortName": artist["ArtistSortName"], + "Status": artist["Status"], + "TotalTracks": artist["TotalTracks"], + "HaveTracks": artist["HaveTracks"], + "LatestAlbum": "", + "ReleaseDate": "", + "ReleaseInFuture": "False", + "AlbumID": "", + } if not row['HaveTracks']: row['HaveTracks'] = 0 @@ -954,9 +1061,9 @@ class WebInterface(object): myDB = db.DBConnection() artist = myDB.action('SELECT * FROM artists WHERE ArtistID=?', [ArtistID]).fetchone() artist_json = json.dumps({ - 'ArtistName': artist['ArtistName'], - 'Status': artist['Status'] - }) + 'ArtistName': artist['ArtistName'], + 'Status': artist['Status'] + }) return artist_json @cherrypy.expose @@ -964,9 +1071,9 @@ class WebInterface(object): myDB = db.DBConnection() album = myDB.action('SELECT * from albums WHERE AlbumID=?', [AlbumID]).fetchone() album_json = json.dumps({ - 'AlbumTitle': album['AlbumTitle'], - 'ArtistName': album['ArtistName'], - 'Status': album['Status'] + 'AlbumTitle': album['AlbumTitle'], + 'ArtistName': album['ArtistName'], + 'Status': album['Status'] }) return album_json @@ -982,7 +1089,9 @@ class WebInterface(object): myDB.action('DELETE from snatched WHERE Status=?', [type]) else: logger.info(u"Deleting '%s' from history" % title) - myDB.action('DELETE from snatched WHERE Status NOT LIKE "Seed%" AND Title=? AND DateAdded=?', [title, date_added]) + myDB.action( + 'DELETE from snatched WHERE Status NOT LIKE "Seed%" AND Title=? AND DateAdded=?', + [title, date_added]) raise cherrypy.HTTPRedirect("history") @cherrypy.expose @@ -995,7 +1104,7 @@ class WebInterface(object): def forceScan(self, keepmatched=None): myDB = db.DBConnection() ######################################### - #NEED TO MOVE THIS INTO A SEPARATE FUNCTION BEFORE RELEASE + # NEED TO MOVE THIS INTO A SEPARATE FUNCTION BEFORE RELEASE myDB.select('DELETE from Have') logger.info('Removed all entries in local library database') myDB.select('UPDATE alltracks SET Location=NULL, BitRate=NULL, Format=NULL') @@ -1003,7 +1112,8 @@ class WebInterface(object): logger.info('All tracks in library unmatched') myDB.action('UPDATE artists SET HaveTracks=NULL') logger.info('Reset track counts for all artists') - myDB.action('UPDATE albums SET Status="Skipped" WHERE Status="Skipped" OR Status="Downloaded"') + myDB.action( + 'UPDATE albums SET Status="Skipped" WHERE Status="Skipped" OR Status="Downloaded"') logger.info('Marking all unwanted albums as Skipped') try: threading.Thread(target=librarysync.libraryScan).start() @@ -1014,7 +1124,8 @@ class WebInterface(object): @cherrypy.expose def config(self): interface_dir = os.path.join(headphones.PROG_DIR, 'data/interfaces/') - interface_list = [name for name in os.listdir(interface_dir) if os.path.isdir(os.path.join(interface_dir, name))] + interface_list = [name for name in os.listdir(interface_dir) if + os.path.isdir(os.path.join(interface_dir, name))] config = { "http_host": headphones.CONFIG.HTTP_HOST, @@ -1115,7 +1226,8 @@ class WebInterface(object): "preferred_bitrate": headphones.CONFIG.PREFERRED_BITRATE, "preferred_bitrate_high": headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER, "preferred_bitrate_low": headphones.CONFIG.PREFERRED_BITRATE_LOW_BUFFER, - "preferred_bitrate_allow_lossless": checked(headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS), + "preferred_bitrate_allow_lossless": checked( + headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS), "detect_bitrate": checked(headphones.CONFIG.DETECT_BITRATE), "lossless_bitrate_from": headphones.CONFIG.LOSSLESS_BITRATE_FROM, "lossless_bitrate_to": headphones.CONFIG.LOSSLESS_BITRATE_TO, @@ -1133,7 +1245,7 @@ class WebInterface(object): "embed_album_art": checked(headphones.CONFIG.EMBED_ALBUM_ART), "embed_lyrics": checked(headphones.CONFIG.EMBED_LYRICS), "replace_existing_folders": checked(headphones.CONFIG.REPLACE_EXISTING_FOLDERS), - "keep_original_folder" : checked(headphones.CONFIG.KEEP_ORIGINAL_FOLDER), + "keep_original_folder": checked(headphones.CONFIG.KEEP_ORIGINAL_FOLDER), "destination_dir": headphones.CONFIG.DESTINATION_DIR, "lossless_destination_dir": headphones.CONFIG.LOSSLESS_DESTINATION_DIR, "folder_format": headphones.CONFIG.FOLDER_FORMAT, @@ -1286,18 +1398,31 @@ class WebInterface(object): # Handle the variable config options. Note - keys with False values aren't getting passed checked_configs = [ - "launch_browser", "enable_https", "api_enabled", "use_blackhole", "headphones_indexer", "use_newznab", "newznab_enabled", "use_torznab", "torznab_enabled", - "use_nzbsorg", "use_omgwtfnzbs", "use_kat", "use_piratebay", "use_oldpiratebay", "use_mininova", "use_waffles", "use_rutracker", - "use_whatcd", "use_strike", "preferred_bitrate_allow_lossless", "detect_bitrate", "ignore_clean_releases", "freeze_db", "cue_split", "move_files", - "rename_files", "correct_metadata", "cleanup_files", "keep_nfo", "add_album_art", "embed_album_art", "embed_lyrics", - "replace_existing_folders", "keep_original_folder", "file_underscores", "include_extras", "official_releases_only", - "wait_until_release_date", "autowant_upcoming", "autowant_all", "autowant_manually_added", "do_not_process_unmatched", "keep_torrent_files", "music_encoder", - "encoderlossless", "encoder_multicore", "delete_lossless_files", "growl_enabled", "growl_onsnatch", "prowl_enabled", - "prowl_onsnatch", "xbmc_enabled", "xbmc_update", "xbmc_notify", "lms_enabled", "plex_enabled", "plex_update", "plex_notify", - "nma_enabled", "nma_onsnatch", "pushalot_enabled", "pushalot_onsnatch", "synoindex_enabled", "pushover_enabled", - "pushover_onsnatch", "pushbullet_enabled", "pushbullet_onsnatch", "subsonic_enabled", "twitter_enabled", "twitter_onsnatch", - "osx_notify_enabled", "osx_notify_onsnatch", "boxcar_enabled", "boxcar_onsnatch", "songkick_enabled", "songkick_filter_enabled", - "mpc_enabled", "email_enabled", "email_ssl", "email_tls", "email_onsnatch", "customauth", "idtag" + "launch_browser", "enable_https", "api_enabled", "use_blackhole", "headphones_indexer", + "use_newznab", "newznab_enabled", "use_torznab", "torznab_enabled", + "use_nzbsorg", "use_omgwtfnzbs", "use_kat", "use_piratebay", "use_oldpiratebay", + "use_mininova", "use_waffles", "use_rutracker", + "use_whatcd", "use_strike", "preferred_bitrate_allow_lossless", "detect_bitrate", + "ignore_clean_releases", "freeze_db", "cue_split", "move_files", + "rename_files", "correct_metadata", "cleanup_files", "keep_nfo", "add_album_art", + "embed_album_art", "embed_lyrics", + "replace_existing_folders", "keep_original_folder", "file_underscores", + "include_extras", "official_releases_only", + "wait_until_release_date", "autowant_upcoming", "autowant_all", + "autowant_manually_added", "do_not_process_unmatched", "keep_torrent_files", + "music_encoder", + "encoderlossless", "encoder_multicore", "delete_lossless_files", "growl_enabled", + "growl_onsnatch", "prowl_enabled", + "prowl_onsnatch", "xbmc_enabled", "xbmc_update", "xbmc_notify", "lms_enabled", + "plex_enabled", "plex_update", "plex_notify", + "nma_enabled", "nma_onsnatch", "pushalot_enabled", "pushalot_onsnatch", + "synoindex_enabled", "pushover_enabled", + "pushover_onsnatch", "pushbullet_enabled", "pushbullet_onsnatch", "subsonic_enabled", + "twitter_enabled", "twitter_onsnatch", + "osx_notify_enabled", "osx_notify_onsnatch", "boxcar_enabled", "boxcar_onsnatch", + "songkick_enabled", "songkick_filter_enabled", + "mpc_enabled", "email_enabled", "email_ssl", "email_tls", "email_onsnatch", + "customauth", "idtag" ] for checked_config in checked_configs: if checked_config not in kwargs: @@ -1473,9 +1598,11 @@ class WebInterface(object): image_dict = {'artwork': image_url, 'thumbnail': thumb_url} elif AlbumID and (not image_dict['artwork'] or not image_dict['thumbnail']): if not image_dict['artwork']: - image_dict['artwork'] = "http://coverartarchive.org/release/%s/front-500.jpg" % AlbumID + image_dict[ + 'artwork'] = "http://coverartarchive.org/release/%s/front-500.jpg" % AlbumID if not image_dict['thumbnail']: - image_dict['thumbnail'] = "http://coverartarchive.org/release/%s/front-250.jpg" % AlbumID + image_dict[ + 'thumbnail'] = "http://coverartarchive.org/release/%s/front-250.jpg" % AlbumID return json.dumps(image_dict) @@ -1514,7 +1641,8 @@ class WebInterface(object): if result: osx_notify = notifiers.OSX_NOTIFY() osx_notify.notify('Registered', result, 'Success :-)') - logger.info('Registered %s, to re-register a different app, delete this app first' % result) + logger.info( + 'Registered %s, to re-register a different app, delete this app first' % result) else: logger.warn(msg) return msg @@ -1536,7 +1664,8 @@ class WebInterface(object): def testPushbullet(self): logger.info("Testing Pushbullet notifications") pushbullet = notifiers.PUSHBULLET() - pushbullet.notify("it works!") + pushbullet.notify("it works!", "Test message") + class Artwork(object): @cherrypy.expose @@ -1605,4 +1734,6 @@ class Artwork(object): return fp.read() thumbs = Thumbs() + + WebInterface.artwork = Artwork() diff --git a/headphones/webstart.py b/headphones/webstart.py index cf2f4a77..a4ab7113 100644 --- a/headphones/webstart.py +++ b/headphones/webstart.py @@ -13,18 +13,17 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -import os import sys + +import os import cherrypy import headphones - from headphones import logger from headphones.webserve import WebInterface from headphones.helpers import create_https_certificates def initialize(options): - # HTTPS stuff stolen from sickbeard enable_https = options['enable_https'] https_cert = options['https_cert'] @@ -33,15 +32,16 @@ def initialize(options): if enable_https: # If either the HTTPS certificate or key do not exist, try to make # self-signed ones. - if not (https_cert and os.path.exists(https_cert)) or not (https_key and os.path.exists(https_key)): + if not (https_cert and os.path.exists(https_cert)) or not ( + https_key and os.path.exists(https_key)): if not create_https_certificates(https_cert, https_key): logger.warn("Unable to create certificate and key. Disabling " \ - "HTTPS") + "HTTPS") enable_https = False if not (os.path.exists(https_cert) and os.path.exists(https_key)): logger.warn("Disabled HTTPS because of missing certificate and " \ - "key.") + "key.") enable_https = False options_dict = { @@ -63,7 +63,7 @@ def initialize(options): protocol = "http" logger.info("Starting Headphones web server on %s://%s:%d/", protocol, - options['http_host'], options['http_port']) + options['http_host'], options['http_port']) cherrypy.config.update(options_dict) conf = { @@ -99,7 +99,8 @@ def initialize(options): } if options['http_password']: - logger.info("Web server authentication is enabled, username is '%s'", options['http_username']) + logger.info("Web server authentication is enabled, username is '%s'", + options['http_username']) conf['/'].update({ 'tools.auth_basic.on': True, @@ -118,7 +119,8 @@ def initialize(options): cherrypy.process.servers.check_port(str(options['http_host']), options['http_port']) cherrypy.server.start() except IOError: - sys.stderr.write('Failed to start on port: %i. Is something else running?\n' % (options['http_port'])) + sys.stderr.write( + 'Failed to start on port: %i. Is something else running?\n' % (options['http_port'])) sys.exit(1) cherrypy.server.wait() From 74e071b1d740464a086ab825e1a3492fc9289c4c Mon Sep 17 00:00:00 2001 From: Jamie Magee Date: Fri, 27 Nov 2015 14:10:08 +0100 Subject: [PATCH 29/44] Add Python, Windows, OS X, and Linux to .gitignore --- .gitignore | 135 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 120 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index a3c65e2c..2c038baf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,67 @@ +# Python # +########## +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class -# Compiled source # -################### -*.pyc -*.py~ -*.pyproj -*.sln +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ # Headphones files # -###################### +#################### *.log *.db* *.db-journal @@ -22,16 +76,68 @@ cache/* *.key *.csr -# OS generated files # -###################### -.DS_Store? -.DS_Store -ehthumbs.db -Icon? +# Windows # +########## +# Windows image file caches Thumbs.db +ehthumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# OS X # +######## +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# Linux # +######### +*~ + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* #Ignore files generated by PyCharm -.idea/* +.idea/ +*.iml #Ignore files generated by vi *.swp @@ -53,7 +159,6 @@ Thumbs.db *.bak *.cache *.ilk -*.log [Bb]in [Dd]ebug*/ *.lib From 982594a4a5865f4fe7e1342e829d4c11e18a033a Mon Sep 17 00:00:00 2001 From: Jamie Magee Date: Wed, 25 Nov 2015 19:52:26 +0100 Subject: [PATCH 30/44] Migrate as much as possible to pip and requirements.txt --- .gitignore | 4 + .travis.yml | 33 +- {lib => headphones}/bencode.py | 36 +- {lib => headphones}/certgen.py | 7 +- lib/MultipartPostHandler.py | 88 - lib/apscheduler/__init__.py | 5 - lib/apscheduler/events.py | 73 - lib/apscheduler/executors/__init__.py | 0 lib/apscheduler/executors/asyncio.py | 28 - lib/apscheduler/executors/base.py | 119 - lib/apscheduler/executors/debug.py | 19 - lib/apscheduler/executors/gevent.py | 29 - lib/apscheduler/executors/pool.py | 54 - lib/apscheduler/executors/twisted.py | 25 - lib/apscheduler/job.py | 252 - lib/apscheduler/jobstores/__init__.py | 0 lib/apscheduler/jobstores/base.py | 127 - lib/apscheduler/jobstores/memory.py | 107 - lib/apscheduler/jobstores/mongodb.py | 124 - lib/apscheduler/jobstores/redis.py | 138 - lib/apscheduler/jobstores/sqlalchemy.py | 137 - lib/apscheduler/schedulers/__init__.py | 12 - lib/apscheduler/schedulers/asyncio.py | 68 - lib/apscheduler/schedulers/background.py | 39 - lib/apscheduler/schedulers/base.py | 845 --- lib/apscheduler/schedulers/blocking.py | 32 - lib/apscheduler/schedulers/gevent.py | 35 - lib/apscheduler/schedulers/qt.py | 46 - lib/apscheduler/schedulers/tornado.py | 60 - lib/apscheduler/schedulers/twisted.py | 65 - lib/apscheduler/triggers/__init__.py | 0 lib/apscheduler/triggers/base.py | 16 - lib/apscheduler/triggers/cron/__init__.py | 176 - lib/apscheduler/triggers/cron/expressions.py | 188 - lib/apscheduler/triggers/cron/fields.py | 97 - lib/apscheduler/triggers/date.py | 30 - lib/apscheduler/triggers/interval.py | 65 - lib/apscheduler/util.py | 385 -- lib/argparse.py | 2386 -------- lib/beets/LICENSE | 21 - lib/beets/README.rst | 94 - lib/beets/__init__.py | 28 - lib/beets/autotag/__init__.py | 136 - lib/beets/autotag/hooks.py | 579 -- lib/beets/autotag/match.py | 492 -- lib/beets/autotag/mb.py | 407 -- lib/beets/config_default.yaml | 109 - lib/beets/dbcore/__init__.py | 25 - lib/beets/dbcore/db.py | 820 --- lib/beets/dbcore/query.py | 654 --- lib/beets/dbcore/queryparse.py | 180 - lib/beets/dbcore/types.py | 208 - lib/beets/importer.py | 1428 ----- lib/beets/library.py | 1299 ----- lib/beets/mediafile.py | 1929 ------- lib/beets/plugins.py | 435 -- lib/beets/ui/__init__.py | 970 ---- lib/beets/ui/commands.py | 1646 ------ lib/beets/ui/completion_base.sh | 162 - lib/beets/util/__init__.py | 686 --- lib/beets/util/artresizer.py | 211 - lib/beets/util/bluelet.py | 647 --- lib/beets/util/confit.py | 1281 ----- lib/beets/util/enumeration.py | 40 - lib/beets/util/functemplate.py | 571 -- lib/beets/util/pipeline.py | 517 -- lib/beets/vfs.py | 50 - lib/beetsplug/__init__.py | 19 - lib/beetsplug/embedart.py | 280 - lib/beetsplug/fetchart.py | 396 -- lib/beetsplug/lyrics.py | 544 -- lib/biplist/__init__.py | 803 --- lib/bs4/__init__.py | 468 -- lib/bs4/builder/__init__.py | 324 -- lib/bs4/builder/_html5lib.py | 329 -- lib/bs4/builder/_htmlparser.py | 262 - lib/bs4/builder/_lxml.py | 248 - lib/bs4/dammit.py | 839 --- lib/bs4/diagnose.py | 213 - lib/bs4/element.py | 1713 ------ lib/bs4/testing.py | 680 --- lib/cherrypy/LICENSE.txt | 25 - lib/cherrypy/__init__.py | 652 --- lib/cherrypy/_cpchecker.py | 332 -- lib/cherrypy/_cpcompat.py | 383 -- lib/cherrypy/_cpcompat_subprocess.py | 1544 ----- lib/cherrypy/_cpconfig.py | 317 -- lib/cherrypy/_cpdispatch.py | 686 --- lib/cherrypy/_cperror.py | 609 -- lib/cherrypy/_cplogging.py | 459 -- lib/cherrypy/_cpmodpy.py | 353 -- lib/cherrypy/_cpnative_server.py | 154 - lib/cherrypy/_cpreqbody.py | 1013 ---- lib/cherrypy/_cprequest.py | 973 ---- lib/cherrypy/_cpserver.py | 226 - lib/cherrypy/_cpthreadinglocal.py | 241 - lib/cherrypy/_cptools.py | 529 -- lib/cherrypy/_cptree.py | 299 - lib/cherrypy/_cpwsgi.py | 438 -- lib/cherrypy/_cpwsgi_server.py | 70 - lib/cherrypy/cherryd | 110 - lib/cherrypy/favicon.ico | Bin 1406 -> 0 bytes lib/cherrypy/lib/__init__.py | 85 - lib/cherrypy/lib/auth.py | 97 - lib/cherrypy/lib/auth_basic.py | 90 - lib/cherrypy/lib/auth_digest.py | 390 -- lib/cherrypy/lib/caching.py | 470 -- lib/cherrypy/lib/covercp.py | 387 -- lib/cherrypy/lib/cpstats.py | 687 --- lib/cherrypy/lib/cptools.py | 630 --- lib/cherrypy/lib/encoding.py | 421 -- lib/cherrypy/lib/gctools.py | 217 - lib/cherrypy/lib/http.py | 6 - lib/cherrypy/lib/httpauth.py | 371 -- lib/cherrypy/lib/httputil.py | 536 -- lib/cherrypy/lib/jsontools.py | 96 - lib/cherrypy/lib/lockfile.py | 147 - lib/cherrypy/lib/locking.py | 47 - lib/cherrypy/lib/profiler.py | 216 - lib/cherrypy/lib/reprconf.py | 503 -- lib/cherrypy/lib/sessions.py | 968 ---- lib/cherrypy/lib/static.py | 378 -- lib/cherrypy/lib/xmlrpcutil.py | 57 - lib/cherrypy/process/__init__.py | 14 - lib/cherrypy/process/plugins.py | 717 --- lib/cherrypy/process/servers.py | 465 -- lib/cherrypy/process/win32.py | 180 - lib/cherrypy/process/wspbus.py | 448 -- lib/cherrypy/scaffold/__init__.py | 61 - lib/cherrypy/scaffold/apache-fcgi.conf | 22 - lib/cherrypy/scaffold/example.conf | 3 - lib/cherrypy/scaffold/site.conf | 14 - .../static/made_with_cherrypy_small.png | Bin 7455 -> 0 bytes lib/cherrypy/wsgiserver/__init__.py | 14 - lib/cherrypy/wsgiserver/ssl_builtin.py | 92 - lib/cherrypy/wsgiserver/ssl_pyopenssl.py | 253 - lib/cherrypy/wsgiserver/wsgiserver2.py | 2481 -------- lib/cherrypy/wsgiserver/wsgiserver3.py | 2176 ------- lib/concurrent/LICENSE | 21 - lib/concurrent/__init__.py | 3 - lib/concurrent/futures/__init__.py | 23 - lib/concurrent/futures/_base.py | 605 -- lib/concurrent/futures/_compat.py | 111 - lib/concurrent/futures/process.py | 363 -- lib/concurrent/futures/thread.py | 138 - lib/configobj.py | 2468 -------- lib/enum/LICENSE | 32 - lib/enum/README | 2 - lib/enum/__init__.py | 790 --- lib/enum/doc/enum.rst | 725 --- lib/enum/enum.py | 790 --- lib/enum/test_enum.py | 1690 ------ lib/feedparser.py | 3909 ------------- lib/gntp/LICENSE | 20 - lib/gntp/__init__.py | 0 lib/gntp/cli.py | 141 - lib/gntp/config.py | 77 - lib/gntp/core.py | 511 -- lib/gntp/errors.py | 25 - lib/gntp/notifier.py | 265 - lib/gntp/shim.py | 45 - lib/gntp/version.py | 4 - lib/html5lib/__init__.py | 25 - lib/html5lib/constants.py | 3102 ---------- lib/html5lib/filters/__init__.py | 0 lib/html5lib/filters/_base.py | 12 - .../filters/alphabeticalattributes.py | 20 - lib/html5lib/filters/inject_meta_charset.py | 65 - lib/html5lib/filters/lint.py | 90 - lib/html5lib/filters/optionaltags.py | 205 - lib/html5lib/filters/sanitizer.py | 12 - lib/html5lib/filters/whitespace.py | 38 - lib/html5lib/html5parser.py | 2724 --------- lib/html5lib/ihatexml.py | 285 - lib/html5lib/inputstream.py | 903 --- lib/html5lib/sanitizer.py | 296 - lib/html5lib/serializer/__init__.py | 16 - lib/html5lib/serializer/htmlserializer.py | 317 -- lib/html5lib/tokenizer.py | 1731 ------ lib/html5lib/treeadapters/__init__.py | 0 lib/html5lib/treeadapters/sax.py | 44 - lib/html5lib/treebuilders/__init__.py | 76 - lib/html5lib/treebuilders/_base.py | 377 -- lib/html5lib/treebuilders/dom.py | 227 - lib/html5lib/treebuilders/etree.py | 337 -- lib/html5lib/treebuilders/etree_lxml.py | 369 -- lib/html5lib/treewalkers/__init__.py | 147 - lib/html5lib/treewalkers/_base.py | 200 - lib/html5lib/treewalkers/dom.py | 43 - lib/html5lib/treewalkers/etree.py | 136 - lib/html5lib/treewalkers/genshistream.py | 69 - lib/html5lib/treewalkers/lxmletree.py | 201 - lib/html5lib/treewalkers/pulldom.py | 63 - lib/html5lib/trie/__init__.py | 12 - lib/html5lib/trie/_base.py | 37 - lib/html5lib/trie/datrie.py | 44 - lib/html5lib/trie/py.py | 67 - lib/html5lib/utils.py | 103 - lib/httplib2/__init__.py | 1215 ---- lib/httplib2/iri2uri.py | 110 - lib/logutils/__init__.py | 195 - lib/logutils/adapter.py | 116 - lib/logutils/colorize.py | 194 - lib/logutils/dictconfig.py | 573 -- lib/logutils/http.py | 90 - lib/logutils/queue.py | 225 - lib/logutils/redis.py | 75 - lib/logutils/testing.py | 156 - lib/mako/LICENSE | 20 - lib/mako/README.rst | 52 - lib/mako/__init__.py | 8 - lib/mako/_ast_util.py | 845 --- lib/mako/ast.py | 178 - lib/mako/cache.py | 238 - lib/mako/cmd.py | 62 - lib/mako/codegen.py | 1237 ---- lib/mako/compat.py | 174 - lib/mako/exceptions.py | 373 -- lib/mako/ext/__init__.py | 0 lib/mako/ext/autohandler.py | 65 - lib/mako/ext/babelplugin.py | 49 - lib/mako/ext/beaker_cache.py | 75 - lib/mako/ext/extract.py | 101 - lib/mako/ext/linguaplugin.py | 38 - lib/mako/ext/preprocessors.py | 20 - lib/mako/ext/pygmentplugin.py | 122 - lib/mako/ext/turbogears.py | 58 - lib/mako/filters.py | 201 - lib/mako/lexer.py | 441 -- lib/mako/lookup.py | 359 -- lib/mako/parsetree.py | 594 -- lib/mako/pygen.py | 299 - lib/mako/pyparser.py | 232 - lib/mako/runtime.py | 878 --- lib/mako/template.py | 705 --- lib/mako/util.py | 360 -- lib/munkres.py | 786 --- lib/musicbrainzngs/__init__.py | 2 - lib/musicbrainzngs/caa.py | 177 - lib/musicbrainzngs/compat.py | 62 - lib/musicbrainzngs/mbxml.py | 722 --- lib/musicbrainzngs/musicbrainz.py | 1255 ---- lib/musicbrainzngs/util.py | 44 - lib/mutagen/README.rst | 58 - lib/mutagen/__init__.py | 41 - lib/mutagen/_compat.py | 84 - lib/mutagen/_constants.py | 199 - lib/mutagen/_file.py | 237 - lib/mutagen/_tags.py | 31 - lib/mutagen/_util.py | 603 -- lib/mutagen/_vorbis.py | 327 -- lib/mutagen/aac.py | 407 -- lib/mutagen/aiff.py | 362 -- lib/mutagen/apev2.py | 714 --- lib/mutagen/asf.py | 862 --- lib/mutagen/easyid3.py | 534 -- lib/mutagen/easymp4.py | 284 - lib/mutagen/flac.py | 838 --- lib/mutagen/id3/__init__.py | 1034 ---- lib/mutagen/id3/_frames.py | 1916 ------- lib/mutagen/id3/_specs.py | 543 -- lib/mutagen/id3/_util.py | 165 - lib/mutagen/m4a.py | 545 -- lib/mutagen/monkeysaudio.py | 86 - lib/mutagen/mp3.py | 289 - lib/mutagen/mp4/__init__.py | 965 ---- lib/mutagen/mp4/_as_entry.py | 541 -- lib/mutagen/mp4/_atom.py | 190 - lib/mutagen/mp4/_util.py | 21 - lib/mutagen/musepack.py | 270 - lib/mutagen/ogg.py | 508 -- lib/mutagen/oggflac.py | 147 - lib/mutagen/oggopus.py | 138 - lib/mutagen/oggspeex.py | 138 - lib/mutagen/oggtheora.py | 131 - lib/mutagen/oggvorbis.py | 139 - lib/mutagen/optimfrog.py | 74 - lib/mutagen/trueaudio.py | 84 - lib/mutagen/wavpack.py | 124 - lib/oauth2/__init__.py | 704 --- lib/ordereddict.py | 127 - lib/pkg_resources.py | 2895 ---------- lib/pygazelle/__init__.py | 0 lib/pygazelle/api.py | 419 -- lib/pygazelle/artist.py | 85 - lib/pygazelle/category.py | 13 - lib/pygazelle/encoding.py | 13 - lib/pygazelle/format.py | 8 - lib/pygazelle/inbox.py | 107 - lib/pygazelle/media.py | 11 - lib/pygazelle/release_type.py | 19 - lib/pygazelle/request.py | 29 - lib/pygazelle/tag.py | 17 - lib/pygazelle/torrent.py | 183 - lib/pygazelle/torrent_group.py | 139 - lib/pygazelle/user.py | 108 - lib/pynma/__init__.py | 4 - lib/pynma/pynma.py | 148 - lib/pythontwitter/__init__.py | 4651 --------------- lib/pytz/LICENSE.txt | 19 - lib/pytz/README.txt | 575 -- lib/pytz/__init__.py | 1513 ----- lib/pytz/exceptions.py | 48 - lib/pytz/lazy.py | 168 - lib/pytz/reference.py | 127 - lib/pytz/tzfile.py | 137 - lib/pytz/tzinfo.py | 564 -- lib/pytz/zoneinfo/Africa/Abidjan | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Accra | Bin 840 -> 0 bytes lib/pytz/zoneinfo/Africa/Addis_Ababa | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Africa/Algiers | Bin 760 -> 0 bytes lib/pytz/zoneinfo/Africa/Asmara | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Africa/Asmera | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Africa/Bamako | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Bangui | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Banjul | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Bissau | Bin 208 -> 0 bytes lib/pytz/zoneinfo/Africa/Blantyre | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Brazzaville | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Bujumbura | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Cairo | Bin 2779 -> 0 bytes lib/pytz/zoneinfo/Africa/Casablanca | Bin 1657 -> 0 bytes lib/pytz/zoneinfo/Africa/Ceuta | Bin 2075 -> 0 bytes lib/pytz/zoneinfo/Africa/Conakry | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Dakar | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Dar_es_Salaam | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Africa/Djibouti | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Africa/Douala | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/El_Aaiun | Bin 1487 -> 0 bytes lib/pytz/zoneinfo/Africa/Freetown | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Gaborone | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Harare | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Johannesburg | Bin 271 -> 0 bytes lib/pytz/zoneinfo/Africa/Juba | Bin 683 -> 0 bytes lib/pytz/zoneinfo/Africa/Kampala | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Africa/Khartoum | Bin 683 -> 0 bytes lib/pytz/zoneinfo/Africa/Kigali | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Kinshasa | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Lagos | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Libreville | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Lome | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Luanda | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Lubumbashi | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Lusaka | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Malabo | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Maputo | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Maseru | Bin 271 -> 0 bytes lib/pytz/zoneinfo/Africa/Mbabane | Bin 271 -> 0 bytes lib/pytz/zoneinfo/Africa/Mogadishu | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Africa/Monrovia | Bin 241 -> 0 bytes lib/pytz/zoneinfo/Africa/Nairobi | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Africa/Ndjamena | Bin 225 -> 0 bytes lib/pytz/zoneinfo/Africa/Niamey | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Nouakchott | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Ouagadougou | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Porto-Novo | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Africa/Sao_Tome | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Timbuktu | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Africa/Tripoli | Bin 655 -> 0 bytes lib/pytz/zoneinfo/Africa/Tunis | Bin 710 -> 0 bytes lib/pytz/zoneinfo/Africa/Windhoek | Bin 1582 -> 0 bytes lib/pytz/zoneinfo/America/Adak | Bin 2379 -> 0 bytes lib/pytz/zoneinfo/America/Anchorage | Bin 2384 -> 0 bytes lib/pytz/zoneinfo/America/Anguilla | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/Antigua | Bin 208 -> 0 bytes lib/pytz/zoneinfo/America/Araguaina | Bin 896 -> 0 bytes .../zoneinfo/America/Argentina/Buenos_Aires | Bin 1087 -> 0 bytes lib/pytz/zoneinfo/America/Argentina/Catamarca | Bin 1129 -> 0 bytes .../zoneinfo/America/Argentina/ComodRivadavia | Bin 1129 -> 0 bytes lib/pytz/zoneinfo/America/Argentina/Cordoba | Bin 1129 -> 0 bytes lib/pytz/zoneinfo/America/Argentina/Jujuy | Bin 1145 -> 0 bytes lib/pytz/zoneinfo/America/Argentina/La_Rioja | Bin 1143 -> 0 bytes lib/pytz/zoneinfo/America/Argentina/Mendoza | Bin 1173 -> 0 bytes .../zoneinfo/America/Argentina/Rio_Gallegos | Bin 1129 -> 0 bytes lib/pytz/zoneinfo/America/Argentina/Salta | Bin 1101 -> 0 bytes lib/pytz/zoneinfo/America/Argentina/San_Juan | Bin 1143 -> 0 bytes lib/pytz/zoneinfo/America/Argentina/San_Luis | Bin 1171 -> 0 bytes lib/pytz/zoneinfo/America/Argentina/Tucuman | Bin 1157 -> 0 bytes lib/pytz/zoneinfo/America/Argentina/Ushuaia | Bin 1129 -> 0 bytes lib/pytz/zoneinfo/America/Aruba | Bin 208 -> 0 bytes lib/pytz/zoneinfo/America/Asuncion | Bin 2062 -> 0 bytes lib/pytz/zoneinfo/America/Atikokan | Bin 345 -> 0 bytes lib/pytz/zoneinfo/America/Atka | Bin 2379 -> 0 bytes lib/pytz/zoneinfo/America/Bahia | Bin 1036 -> 0 bytes lib/pytz/zoneinfo/America/Bahia_Banderas | Bin 1588 -> 0 bytes lib/pytz/zoneinfo/America/Barbados | Bin 344 -> 0 bytes lib/pytz/zoneinfo/America/Belem | Bin 588 -> 0 bytes lib/pytz/zoneinfo/America/Belize | Bin 976 -> 0 bytes lib/pytz/zoneinfo/America/Blanc-Sablon | Bin 307 -> 0 bytes lib/pytz/zoneinfo/America/Boa_Vista | Bin 644 -> 0 bytes lib/pytz/zoneinfo/America/Bogota | Bin 257 -> 0 bytes lib/pytz/zoneinfo/America/Boise | Bin 2403 -> 0 bytes lib/pytz/zoneinfo/America/Buenos_Aires | Bin 1087 -> 0 bytes lib/pytz/zoneinfo/America/Cambridge_Bay | Bin 2098 -> 0 bytes lib/pytz/zoneinfo/America/Campo_Grande | Bin 2015 -> 0 bytes lib/pytz/zoneinfo/America/Cancun | Bin 1480 -> 0 bytes lib/pytz/zoneinfo/America/Caracas | Bin 266 -> 0 bytes lib/pytz/zoneinfo/America/Catamarca | Bin 1129 -> 0 bytes lib/pytz/zoneinfo/America/Cayenne | Bin 200 -> 0 bytes lib/pytz/zoneinfo/America/Cayman | Bin 203 -> 0 bytes lib/pytz/zoneinfo/America/Chicago | Bin 3585 -> 0 bytes lib/pytz/zoneinfo/America/Chihuahua | Bin 1522 -> 0 bytes lib/pytz/zoneinfo/America/Coral_Harbour | Bin 345 -> 0 bytes lib/pytz/zoneinfo/America/Cordoba | Bin 1129 -> 0 bytes lib/pytz/zoneinfo/America/Costa_Rica | Bin 341 -> 0 bytes lib/pytz/zoneinfo/America/Creston | Bin 233 -> 0 bytes lib/pytz/zoneinfo/America/Cuiaba | Bin 1987 -> 0 bytes lib/pytz/zoneinfo/America/Curacao | Bin 208 -> 0 bytes lib/pytz/zoneinfo/America/Danmarkshavn | Bin 714 -> 0 bytes lib/pytz/zoneinfo/America/Dawson | Bin 2093 -> 0 bytes lib/pytz/zoneinfo/America/Dawson_Creek | Bin 1059 -> 0 bytes lib/pytz/zoneinfo/America/Denver | Bin 2453 -> 0 bytes lib/pytz/zoneinfo/America/Detroit | Bin 2216 -> 0 bytes lib/pytz/zoneinfo/America/Dominica | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/Edmonton | Bin 2402 -> 0 bytes lib/pytz/zoneinfo/America/Eirunepe | Bin 684 -> 0 bytes lib/pytz/zoneinfo/America/El_Salvador | Bin 250 -> 0 bytes lib/pytz/zoneinfo/America/Ensenada | Bin 2356 -> 0 bytes lib/pytz/zoneinfo/America/Fort_Wayne | Bin 1675 -> 0 bytes lib/pytz/zoneinfo/America/Fortaleza | Bin 728 -> 0 bytes lib/pytz/zoneinfo/America/Glace_Bay | Bin 2206 -> 0 bytes lib/pytz/zoneinfo/America/Godthab | Bin 1877 -> 0 bytes lib/pytz/zoneinfo/America/Goose_Bay | Bin 3219 -> 0 bytes lib/pytz/zoneinfo/America/Grand_Turk | Bin 1287 -> 0 bytes lib/pytz/zoneinfo/America/Grenada | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/Guadeloupe | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/Guatemala | Bin 306 -> 0 bytes lib/pytz/zoneinfo/America/Guayaquil | Bin 203 -> 0 bytes lib/pytz/zoneinfo/America/Guyana | Bin 270 -> 0 bytes lib/pytz/zoneinfo/America/Halifax | Bin 3438 -> 0 bytes lib/pytz/zoneinfo/America/Havana | Bin 2437 -> 0 bytes lib/pytz/zoneinfo/America/Hermosillo | Bin 454 -> 0 bytes .../zoneinfo/America/Indiana/Indianapolis | Bin 1675 -> 0 bytes lib/pytz/zoneinfo/America/Indiana/Knox | Bin 2437 -> 0 bytes lib/pytz/zoneinfo/America/Indiana/Marengo | Bin 1731 -> 0 bytes lib/pytz/zoneinfo/America/Indiana/Petersburg | Bin 1913 -> 0 bytes lib/pytz/zoneinfo/America/Indiana/Tell_City | Bin 1735 -> 0 bytes lib/pytz/zoneinfo/America/Indiana/Vevay | Bin 1423 -> 0 bytes lib/pytz/zoneinfo/America/Indiana/Vincennes | Bin 1703 -> 0 bytes lib/pytz/zoneinfo/America/Indiana/Winamac | Bin 1787 -> 0 bytes lib/pytz/zoneinfo/America/Indianapolis | Bin 1675 -> 0 bytes lib/pytz/zoneinfo/America/Inuvik | Bin 1928 -> 0 bytes lib/pytz/zoneinfo/America/Iqaluit | Bin 2046 -> 0 bytes lib/pytz/zoneinfo/America/Jamaica | Bin 507 -> 0 bytes lib/pytz/zoneinfo/America/Jujuy | Bin 1145 -> 0 bytes lib/pytz/zoneinfo/America/Juneau | Bin 2362 -> 0 bytes lib/pytz/zoneinfo/America/Kentucky/Louisville | Bin 2781 -> 0 bytes lib/pytz/zoneinfo/America/Kentucky/Monticello | Bin 2361 -> 0 bytes lib/pytz/zoneinfo/America/Knox_IN | Bin 2437 -> 0 bytes lib/pytz/zoneinfo/America/Kralendijk | Bin 208 -> 0 bytes lib/pytz/zoneinfo/America/La_Paz | Bin 243 -> 0 bytes lib/pytz/zoneinfo/America/Lima | Bin 417 -> 0 bytes lib/pytz/zoneinfo/America/Los_Angeles | Bin 2845 -> 0 bytes lib/pytz/zoneinfo/America/Louisville | Bin 2781 -> 0 bytes lib/pytz/zoneinfo/America/Lower_Princes | Bin 208 -> 0 bytes lib/pytz/zoneinfo/America/Maceio | Bin 756 -> 0 bytes lib/pytz/zoneinfo/America/Managua | Bin 463 -> 0 bytes lib/pytz/zoneinfo/America/Manaus | Bin 616 -> 0 bytes lib/pytz/zoneinfo/America/Marigot | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/Martinique | Bin 257 -> 0 bytes lib/pytz/zoneinfo/America/Matamoros | Bin 1416 -> 0 bytes lib/pytz/zoneinfo/America/Mazatlan | Bin 1564 -> 0 bytes lib/pytz/zoneinfo/America/Mendoza | Bin 1173 -> 0 bytes lib/pytz/zoneinfo/America/Menominee | Bin 2283 -> 0 bytes lib/pytz/zoneinfo/America/Merida | Bin 1456 -> 0 bytes lib/pytz/zoneinfo/America/Metlakatla | Bin 716 -> 0 bytes lib/pytz/zoneinfo/America/Mexico_City | Bin 1618 -> 0 bytes lib/pytz/zoneinfo/America/Miquelon | Bin 1684 -> 0 bytes lib/pytz/zoneinfo/America/Moncton | Bin 3163 -> 0 bytes lib/pytz/zoneinfo/America/Monterrey | Bin 1416 -> 0 bytes lib/pytz/zoneinfo/America/Montevideo | Bin 2160 -> 0 bytes lib/pytz/zoneinfo/America/Montreal | Bin 3503 -> 0 bytes lib/pytz/zoneinfo/America/Montserrat | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/Nassau | Bin 2284 -> 0 bytes lib/pytz/zoneinfo/America/New_York | Bin 3545 -> 0 bytes lib/pytz/zoneinfo/America/Nipigon | Bin 2131 -> 0 bytes lib/pytz/zoneinfo/America/Nome | Bin 2376 -> 0 bytes lib/pytz/zoneinfo/America/Noronha | Bin 728 -> 0 bytes lib/pytz/zoneinfo/America/North_Dakota/Beulah | Bin 2389 -> 0 bytes lib/pytz/zoneinfo/America/North_Dakota/Center | Bin 2389 -> 0 bytes .../zoneinfo/America/North_Dakota/New_Salem | Bin 2389 -> 0 bytes lib/pytz/zoneinfo/America/Ojinaga | Bin 1522 -> 0 bytes lib/pytz/zoneinfo/America/Panama | Bin 203 -> 0 bytes lib/pytz/zoneinfo/America/Pangnirtung | Bin 2108 -> 0 bytes lib/pytz/zoneinfo/America/Paramaribo | Bin 308 -> 0 bytes lib/pytz/zoneinfo/America/Phoenix | Bin 353 -> 0 bytes lib/pytz/zoneinfo/America/Port-au-Prince | Bin 1483 -> 0 bytes lib/pytz/zoneinfo/America/Port_of_Spain | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/Porto_Acre | Bin 656 -> 0 bytes lib/pytz/zoneinfo/America/Porto_Velho | Bin 588 -> 0 bytes lib/pytz/zoneinfo/America/Puerto_Rico | Bin 255 -> 0 bytes lib/pytz/zoneinfo/America/Rainy_River | Bin 2131 -> 0 bytes lib/pytz/zoneinfo/America/Rankin_Inlet | Bin 1930 -> 0 bytes lib/pytz/zoneinfo/America/Recife | Bin 728 -> 0 bytes lib/pytz/zoneinfo/America/Regina | Bin 994 -> 0 bytes lib/pytz/zoneinfo/America/Resolute | Bin 1930 -> 0 bytes lib/pytz/zoneinfo/America/Rio_Branco | Bin 656 -> 0 bytes lib/pytz/zoneinfo/America/Rosario | Bin 1129 -> 0 bytes lib/pytz/zoneinfo/America/Santa_Isabel | Bin 2356 -> 0 bytes lib/pytz/zoneinfo/America/Santarem | Bin 626 -> 0 bytes lib/pytz/zoneinfo/America/Santiago | Bin 2531 -> 0 bytes lib/pytz/zoneinfo/America/Santo_Domingo | Bin 489 -> 0 bytes lib/pytz/zoneinfo/America/Sao_Paulo | Bin 2015 -> 0 bytes lib/pytz/zoneinfo/America/Scoresbysund | Bin 1925 -> 0 bytes lib/pytz/zoneinfo/America/Shiprock | Bin 2453 -> 0 bytes lib/pytz/zoneinfo/America/Sitka | Bin 2350 -> 0 bytes lib/pytz/zoneinfo/America/St_Barthelemy | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/St_Johns | Bin 3664 -> 0 bytes lib/pytz/zoneinfo/America/St_Kitts | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/St_Lucia | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/St_Thomas | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/St_Vincent | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/Swift_Current | Bin 574 -> 0 bytes lib/pytz/zoneinfo/America/Tegucigalpa | Bin 278 -> 0 bytes lib/pytz/zoneinfo/America/Thule | Bin 1528 -> 0 bytes lib/pytz/zoneinfo/America/Thunder_Bay | Bin 2211 -> 0 bytes lib/pytz/zoneinfo/America/Tijuana | Bin 2356 -> 0 bytes lib/pytz/zoneinfo/America/Toronto | Bin 3503 -> 0 bytes lib/pytz/zoneinfo/America/Tortola | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/Vancouver | Bin 2901 -> 0 bytes lib/pytz/zoneinfo/America/Virgin | Bin 170 -> 0 bytes lib/pytz/zoneinfo/America/Whitehorse | Bin 2093 -> 0 bytes lib/pytz/zoneinfo/America/Winnipeg | Bin 2891 -> 0 bytes lib/pytz/zoneinfo/America/Yakutat | Bin 2314 -> 0 bytes lib/pytz/zoneinfo/America/Yellowknife | Bin 1980 -> 0 bytes lib/pytz/zoneinfo/Antarctica/Casey | Bin 272 -> 0 bytes lib/pytz/zoneinfo/Antarctica/Davis | Bin 290 -> 0 bytes lib/pytz/zoneinfo/Antarctica/DumontDUrville | Bin 227 -> 0 bytes lib/pytz/zoneinfo/Antarctica/Macquarie | Bin 1530 -> 0 bytes lib/pytz/zoneinfo/Antarctica/Mawson | Bin 204 -> 0 bytes lib/pytz/zoneinfo/Antarctica/McMurdo | Bin 2460 -> 0 bytes lib/pytz/zoneinfo/Antarctica/Palmer | Bin 2054 -> 0 bytes lib/pytz/zoneinfo/Antarctica/Rothera | Bin 173 -> 0 bytes lib/pytz/zoneinfo/Antarctica/South_Pole | Bin 2460 -> 0 bytes lib/pytz/zoneinfo/Antarctica/Syowa | Bin 174 -> 0 bytes lib/pytz/zoneinfo/Antarctica/Troll | Bin 1161 -> 0 bytes lib/pytz/zoneinfo/Antarctica/Vostok | Bin 174 -> 0 bytes lib/pytz/zoneinfo/Arctic/Longyearbyen | Bin 2251 -> 0 bytes lib/pytz/zoneinfo/Asia/Aden | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Asia/Almaty | Bin 936 -> 0 bytes lib/pytz/zoneinfo/Asia/Amman | Bin 1877 -> 0 bytes lib/pytz/zoneinfo/Asia/Anadyr | Bin 1197 -> 0 bytes lib/pytz/zoneinfo/Asia/Aqtau | Bin 1142 -> 0 bytes lib/pytz/zoneinfo/Asia/Aqtobe | Bin 1052 -> 0 bytes lib/pytz/zoneinfo/Asia/Ashgabat | Bin 671 -> 0 bytes lib/pytz/zoneinfo/Asia/Ashkhabad | Bin 671 -> 0 bytes lib/pytz/zoneinfo/Asia/Baghdad | Bin 988 -> 0 bytes lib/pytz/zoneinfo/Asia/Bahrain | Bin 209 -> 0 bytes lib/pytz/zoneinfo/Asia/Baku | Bin 1956 -> 0 bytes lib/pytz/zoneinfo/Asia/Bangkok | Bin 204 -> 0 bytes lib/pytz/zoneinfo/Asia/Beirut | Bin 2175 -> 0 bytes lib/pytz/zoneinfo/Asia/Bishkek | Bin 1061 -> 0 bytes lib/pytz/zoneinfo/Asia/Brunei | Bin 201 -> 0 bytes lib/pytz/zoneinfo/Asia/Calcutta | Bin 291 -> 0 bytes lib/pytz/zoneinfo/Asia/Chita | Bin 1236 -> 0 bytes lib/pytz/zoneinfo/Asia/Choibalsan | Bin 904 -> 0 bytes lib/pytz/zoneinfo/Asia/Chongqing | Bin 414 -> 0 bytes lib/pytz/zoneinfo/Asia/Chungking | Bin 414 -> 0 bytes lib/pytz/zoneinfo/Asia/Colombo | Bin 389 -> 0 bytes lib/pytz/zoneinfo/Asia/Dacca | Bin 390 -> 0 bytes lib/pytz/zoneinfo/Asia/Damascus | Bin 2320 -> 0 bytes lib/pytz/zoneinfo/Asia/Dhaka | Bin 390 -> 0 bytes lib/pytz/zoneinfo/Asia/Dili | Bin 309 -> 0 bytes lib/pytz/zoneinfo/Asia/Dubai | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Asia/Dushanbe | Bin 611 -> 0 bytes lib/pytz/zoneinfo/Asia/Gaza | Bin 2313 -> 0 bytes lib/pytz/zoneinfo/Asia/Harbin | Bin 414 -> 0 bytes lib/pytz/zoneinfo/Asia/Hebron | Bin 2341 -> 0 bytes lib/pytz/zoneinfo/Asia/Ho_Chi_Minh | Bin 373 -> 0 bytes lib/pytz/zoneinfo/Asia/Hong_Kong | Bin 1189 -> 0 bytes lib/pytz/zoneinfo/Asia/Hovd | Bin 848 -> 0 bytes lib/pytz/zoneinfo/Asia/Irkutsk | Bin 1259 -> 0 bytes lib/pytz/zoneinfo/Asia/Istanbul | Bin 2747 -> 0 bytes lib/pytz/zoneinfo/Asia/Jakarta | Bin 370 -> 0 bytes lib/pytz/zoneinfo/Asia/Jayapura | Bin 241 -> 0 bytes lib/pytz/zoneinfo/Asia/Jerusalem | Bin 2265 -> 0 bytes lib/pytz/zoneinfo/Asia/Kabul | Bin 199 -> 0 bytes lib/pytz/zoneinfo/Asia/Kamchatka | Bin 1181 -> 0 bytes lib/pytz/zoneinfo/Asia/Karachi | Bin 403 -> 0 bytes lib/pytz/zoneinfo/Asia/Kashgar | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Asia/Kathmandu | Bin 212 -> 0 bytes lib/pytz/zoneinfo/Asia/Katmandu | Bin 212 -> 0 bytes lib/pytz/zoneinfo/Asia/Khandyga | Bin 1324 -> 0 bytes lib/pytz/zoneinfo/Asia/Kolkata | Bin 291 -> 0 bytes lib/pytz/zoneinfo/Asia/Krasnoyarsk | Bin 1226 -> 0 bytes lib/pytz/zoneinfo/Asia/Kuala_Lumpur | Bin 398 -> 0 bytes lib/pytz/zoneinfo/Asia/Kuching | Bin 519 -> 0 bytes lib/pytz/zoneinfo/Asia/Kuwait | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Asia/Macao | Bin 795 -> 0 bytes lib/pytz/zoneinfo/Asia/Macau | Bin 795 -> 0 bytes lib/pytz/zoneinfo/Asia/Magadan | Bin 1227 -> 0 bytes lib/pytz/zoneinfo/Asia/Makassar | Bin 280 -> 0 bytes lib/pytz/zoneinfo/Asia/Manila | Bin 361 -> 0 bytes lib/pytz/zoneinfo/Asia/Muscat | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Asia/Nicosia | Bin 2016 -> 0 bytes lib/pytz/zoneinfo/Asia/Novokuznetsk | Bin 1248 -> 0 bytes lib/pytz/zoneinfo/Asia/Novosibirsk | Bin 1208 -> 0 bytes lib/pytz/zoneinfo/Asia/Omsk | Bin 1226 -> 0 bytes lib/pytz/zoneinfo/Asia/Oral | Bin 1100 -> 0 bytes lib/pytz/zoneinfo/Asia/Phnom_Penh | Bin 204 -> 0 bytes lib/pytz/zoneinfo/Asia/Pontianak | Bin 375 -> 0 bytes lib/pytz/zoneinfo/Asia/Pyongyang | Bin 279 -> 0 bytes lib/pytz/zoneinfo/Asia/Qatar | Bin 209 -> 0 bytes lib/pytz/zoneinfo/Asia/Qyzylorda | Bin 1082 -> 0 bytes lib/pytz/zoneinfo/Asia/Rangoon | Bin 285 -> 0 bytes lib/pytz/zoneinfo/Asia/Riyadh | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Asia/Saigon | Bin 373 -> 0 bytes lib/pytz/zoneinfo/Asia/Sakhalin | Bin 1227 -> 0 bytes lib/pytz/zoneinfo/Asia/Samarkand | Bin 691 -> 0 bytes lib/pytz/zoneinfo/Asia/Seoul | Bin 571 -> 0 bytes lib/pytz/zoneinfo/Asia/Shanghai | Bin 414 -> 0 bytes lib/pytz/zoneinfo/Asia/Singapore | Bin 428 -> 0 bytes lib/pytz/zoneinfo/Asia/Srednekolymsk | Bin 1237 -> 0 bytes lib/pytz/zoneinfo/Asia/Taipei | Bin 800 -> 0 bytes lib/pytz/zoneinfo/Asia/Tashkent | Bin 681 -> 0 bytes lib/pytz/zoneinfo/Asia/Tbilisi | Bin 1142 -> 0 bytes lib/pytz/zoneinfo/Asia/Tehran | Bin 1661 -> 0 bytes lib/pytz/zoneinfo/Asia/Tel_Aviv | Bin 2265 -> 0 bytes lib/pytz/zoneinfo/Asia/Thimbu | Bin 209 -> 0 bytes lib/pytz/zoneinfo/Asia/Thimphu | Bin 209 -> 0 bytes lib/pytz/zoneinfo/Asia/Tokyo | Bin 355 -> 0 bytes lib/pytz/zoneinfo/Asia/Ujung_Pandang | Bin 280 -> 0 bytes lib/pytz/zoneinfo/Asia/Ulaanbaatar | Bin 848 -> 0 bytes lib/pytz/zoneinfo/Asia/Ulan_Bator | Bin 848 -> 0 bytes lib/pytz/zoneinfo/Asia/Urumqi | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Asia/Ust-Nera | Bin 1293 -> 0 bytes lib/pytz/zoneinfo/Asia/Vientiane | Bin 204 -> 0 bytes lib/pytz/zoneinfo/Asia/Vladivostok | Bin 1227 -> 0 bytes lib/pytz/zoneinfo/Asia/Yakutsk | Bin 1226 -> 0 bytes lib/pytz/zoneinfo/Asia/Yekaterinburg | Bin 1334 -> 0 bytes lib/pytz/zoneinfo/Asia/Yerevan | Bin 1277 -> 0 bytes lib/pytz/zoneinfo/Atlantic/Azores | Bin 3488 -> 0 bytes lib/pytz/zoneinfo/Atlantic/Bermuda | Bin 2004 -> 0 bytes lib/pytz/zoneinfo/Atlantic/Canary | Bin 1913 -> 0 bytes lib/pytz/zoneinfo/Atlantic/Cape_Verde | Bin 254 -> 0 bytes lib/pytz/zoneinfo/Atlantic/Faeroe | Bin 1829 -> 0 bytes lib/pytz/zoneinfo/Atlantic/Faroe | Bin 1829 -> 0 bytes lib/pytz/zoneinfo/Atlantic/Jan_Mayen | Bin 2251 -> 0 bytes lib/pytz/zoneinfo/Atlantic/Madeira | Bin 3478 -> 0 bytes lib/pytz/zoneinfo/Atlantic/Reykjavik | Bin 1167 -> 0 bytes lib/pytz/zoneinfo/Atlantic/South_Georgia | Bin 148 -> 0 bytes lib/pytz/zoneinfo/Atlantic/St_Helena | Bin 170 -> 0 bytes lib/pytz/zoneinfo/Atlantic/Stanley | Bin 1246 -> 0 bytes lib/pytz/zoneinfo/Australia/ACT | Bin 2223 -> 0 bytes lib/pytz/zoneinfo/Australia/Adelaide | Bin 2238 -> 0 bytes lib/pytz/zoneinfo/Australia/Brisbane | Bin 452 -> 0 bytes lib/pytz/zoneinfo/Australia/Broken_Hill | Bin 2274 -> 0 bytes lib/pytz/zoneinfo/Australia/Canberra | Bin 2223 -> 0 bytes lib/pytz/zoneinfo/Australia/Currie | Bin 2223 -> 0 bytes lib/pytz/zoneinfo/Australia/Darwin | Bin 323 -> 0 bytes lib/pytz/zoneinfo/Australia/Eucla | Bin 487 -> 0 bytes lib/pytz/zoneinfo/Australia/Hobart | Bin 2335 -> 0 bytes lib/pytz/zoneinfo/Australia/LHI | Bin 1859 -> 0 bytes lib/pytz/zoneinfo/Australia/Lindeman | Bin 522 -> 0 bytes lib/pytz/zoneinfo/Australia/Lord_Howe | Bin 1859 -> 0 bytes lib/pytz/zoneinfo/Australia/Melbourne | Bin 2223 -> 0 bytes lib/pytz/zoneinfo/Australia/NSW | Bin 2223 -> 0 bytes lib/pytz/zoneinfo/Australia/North | Bin 323 -> 0 bytes lib/pytz/zoneinfo/Australia/Perth | Bin 479 -> 0 bytes lib/pytz/zoneinfo/Australia/Queensland | Bin 452 -> 0 bytes lib/pytz/zoneinfo/Australia/South | Bin 2238 -> 0 bytes lib/pytz/zoneinfo/Australia/Sydney | Bin 2223 -> 0 bytes lib/pytz/zoneinfo/Australia/Tasmania | Bin 2335 -> 0 bytes lib/pytz/zoneinfo/Australia/Victoria | Bin 2223 -> 0 bytes lib/pytz/zoneinfo/Australia/West | Bin 479 -> 0 bytes lib/pytz/zoneinfo/Australia/Yancowinna | Bin 2274 -> 0 bytes lib/pytz/zoneinfo/Brazil/Acre | Bin 656 -> 0 bytes lib/pytz/zoneinfo/Brazil/DeNoronha | Bin 728 -> 0 bytes lib/pytz/zoneinfo/Brazil/East | Bin 2015 -> 0 bytes lib/pytz/zoneinfo/Brazil/West | Bin 616 -> 0 bytes lib/pytz/zoneinfo/CET | Bin 2102 -> 0 bytes lib/pytz/zoneinfo/CST6CDT | Bin 2294 -> 0 bytes lib/pytz/zoneinfo/Canada/Atlantic | Bin 3438 -> 0 bytes lib/pytz/zoneinfo/Canada/Central | Bin 2891 -> 0 bytes lib/pytz/zoneinfo/Canada/East-Saskatchewan | Bin 994 -> 0 bytes lib/pytz/zoneinfo/Canada/Eastern | Bin 3503 -> 0 bytes lib/pytz/zoneinfo/Canada/Mountain | Bin 2402 -> 0 bytes lib/pytz/zoneinfo/Canada/Newfoundland | Bin 3664 -> 0 bytes lib/pytz/zoneinfo/Canada/Pacific | Bin 2901 -> 0 bytes lib/pytz/zoneinfo/Canada/Saskatchewan | Bin 994 -> 0 bytes lib/pytz/zoneinfo/Canada/Yukon | Bin 2093 -> 0 bytes lib/pytz/zoneinfo/Chile/Continental | Bin 2531 -> 0 bytes lib/pytz/zoneinfo/Chile/EasterIsland | Bin 2295 -> 0 bytes lib/pytz/zoneinfo/Cuba | Bin 2437 -> 0 bytes lib/pytz/zoneinfo/EET | Bin 1876 -> 0 bytes lib/pytz/zoneinfo/EST | Bin 127 -> 0 bytes lib/pytz/zoneinfo/EST5EDT | Bin 2294 -> 0 bytes lib/pytz/zoneinfo/Egypt | Bin 2779 -> 0 bytes lib/pytz/zoneinfo/Eire | Bin 3559 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+0 | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+1 | Bin 135 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+10 | Bin 139 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+11 | Bin 139 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+12 | Bin 139 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+2 | Bin 135 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+3 | Bin 135 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+4 | Bin 135 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+5 | Bin 135 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+6 | Bin 135 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+7 | Bin 135 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+8 | Bin 135 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT+9 | Bin 135 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-0 | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-1 | Bin 136 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-10 | Bin 140 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-11 | Bin 140 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-12 | Bin 140 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-13 | Bin 140 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-14 | Bin 140 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-2 | Bin 136 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-3 | Bin 136 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-4 | Bin 136 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-5 | Bin 136 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-6 | Bin 136 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-7 | Bin 136 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-8 | Bin 136 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT-9 | Bin 136 -> 0 bytes lib/pytz/zoneinfo/Etc/GMT0 | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Etc/Greenwich | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Etc/UCT | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Etc/UTC | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Etc/Universal | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Etc/Zulu | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Europe/Amsterdam | Bin 2943 -> 0 bytes lib/pytz/zoneinfo/Europe/Andorra | Bin 1751 -> 0 bytes lib/pytz/zoneinfo/Europe/Athens | Bin 2271 -> 0 bytes lib/pytz/zoneinfo/Europe/Belfast | Bin 3687 -> 0 bytes lib/pytz/zoneinfo/Europe/Belgrade | Bin 1957 -> 0 bytes lib/pytz/zoneinfo/Europe/Berlin | Bin 2335 -> 0 bytes lib/pytz/zoneinfo/Europe/Bratislava | Bin 2272 -> 0 bytes lib/pytz/zoneinfo/Europe/Brussels | Bin 2970 -> 0 bytes lib/pytz/zoneinfo/Europe/Bucharest | Bin 2221 -> 0 bytes lib/pytz/zoneinfo/Europe/Budapest | Bin 2405 -> 0 bytes lib/pytz/zoneinfo/Europe/Busingen | Bin 1918 -> 0 bytes lib/pytz/zoneinfo/Europe/Chisinau | Bin 2433 -> 0 bytes lib/pytz/zoneinfo/Europe/Copenhagen | Bin 2160 -> 0 bytes lib/pytz/zoneinfo/Europe/Dublin | Bin 3559 -> 0 bytes lib/pytz/zoneinfo/Europe/Gibraltar | Bin 3061 -> 0 bytes lib/pytz/zoneinfo/Europe/Guernsey | Bin 3687 -> 0 bytes lib/pytz/zoneinfo/Europe/Helsinki | Bin 1909 -> 0 bytes lib/pytz/zoneinfo/Europe/Isle_of_Man | Bin 3687 -> 0 bytes lib/pytz/zoneinfo/Europe/Istanbul | Bin 2747 -> 0 bytes lib/pytz/zoneinfo/Europe/Jersey | Bin 3687 -> 0 bytes lib/pytz/zoneinfo/Europe/Kaliningrad | Bin 1550 -> 0 bytes lib/pytz/zoneinfo/Europe/Kiev | Bin 2097 -> 0 bytes lib/pytz/zoneinfo/Europe/Lisbon | Bin 3453 -> 0 bytes lib/pytz/zoneinfo/Europe/Ljubljana | Bin 1957 -> 0 bytes lib/pytz/zoneinfo/Europe/London | Bin 3687 -> 0 bytes lib/pytz/zoneinfo/Europe/Luxembourg | Bin 2974 -> 0 bytes lib/pytz/zoneinfo/Europe/Madrid | Bin 2619 -> 0 bytes lib/pytz/zoneinfo/Europe/Malta | Bin 2629 -> 0 bytes lib/pytz/zoneinfo/Europe/Mariehamn | Bin 1909 -> 0 bytes lib/pytz/zoneinfo/Europe/Minsk | Bin 1368 -> 0 bytes lib/pytz/zoneinfo/Europe/Monaco | Bin 2953 -> 0 bytes lib/pytz/zoneinfo/Europe/Moscow | Bin 1528 -> 0 bytes lib/pytz/zoneinfo/Europe/Nicosia | Bin 2016 -> 0 bytes lib/pytz/zoneinfo/Europe/Oslo | Bin 2251 -> 0 bytes lib/pytz/zoneinfo/Europe/Paris | Bin 2971 -> 0 bytes lib/pytz/zoneinfo/Europe/Podgorica | Bin 1957 -> 0 bytes lib/pytz/zoneinfo/Europe/Prague | Bin 2272 -> 0 bytes lib/pytz/zoneinfo/Europe/Riga | Bin 2235 -> 0 bytes lib/pytz/zoneinfo/Europe/Rome | Bin 2678 -> 0 bytes lib/pytz/zoneinfo/Europe/Samara | Bin 1394 -> 0 bytes lib/pytz/zoneinfo/Europe/San_Marino | Bin 2678 -> 0 bytes lib/pytz/zoneinfo/Europe/Sarajevo | Bin 1957 -> 0 bytes lib/pytz/zoneinfo/Europe/Simferopol | Bin 1504 -> 0 bytes lib/pytz/zoneinfo/Europe/Skopje | Bin 1957 -> 0 bytes lib/pytz/zoneinfo/Europe/Sofia | Bin 2130 -> 0 bytes lib/pytz/zoneinfo/Europe/Stockholm | Bin 1918 -> 0 bytes lib/pytz/zoneinfo/Europe/Tallinn | Bin 2201 -> 0 bytes lib/pytz/zoneinfo/Europe/Tirane | Bin 2098 -> 0 bytes lib/pytz/zoneinfo/Europe/Tiraspol | Bin 2433 -> 0 bytes lib/pytz/zoneinfo/Europe/Uzhgorod | Bin 2103 -> 0 bytes lib/pytz/zoneinfo/Europe/Vaduz | Bin 1918 -> 0 bytes lib/pytz/zoneinfo/Europe/Vatican | Bin 2678 -> 0 bytes lib/pytz/zoneinfo/Europe/Vienna | Bin 2237 -> 0 bytes lib/pytz/zoneinfo/Europe/Vilnius | Bin 2199 -> 0 bytes lib/pytz/zoneinfo/Europe/Volgograd | Bin 1317 -> 0 bytes lib/pytz/zoneinfo/Europe/Warsaw | Bin 2705 -> 0 bytes lib/pytz/zoneinfo/Europe/Zagreb | Bin 1957 -> 0 bytes lib/pytz/zoneinfo/Europe/Zaporozhye | Bin 2111 -> 0 bytes lib/pytz/zoneinfo/Europe/Zurich | Bin 1918 -> 0 bytes lib/pytz/zoneinfo/Factory | Bin 264 -> 0 bytes lib/pytz/zoneinfo/GB | Bin 3687 -> 0 bytes lib/pytz/zoneinfo/GB-Eire | Bin 3687 -> 0 bytes lib/pytz/zoneinfo/GMT | Bin 127 -> 0 bytes lib/pytz/zoneinfo/GMT+0 | Bin 127 -> 0 bytes lib/pytz/zoneinfo/GMT-0 | Bin 127 -> 0 bytes lib/pytz/zoneinfo/GMT0 | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Greenwich | Bin 127 -> 0 bytes lib/pytz/zoneinfo/HST | Bin 128 -> 0 bytes lib/pytz/zoneinfo/Hongkong | Bin 1189 -> 0 bytes lib/pytz/zoneinfo/Iceland | Bin 1167 -> 0 bytes lib/pytz/zoneinfo/Indian/Antananarivo | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Indian/Chagos | Bin 201 -> 0 bytes lib/pytz/zoneinfo/Indian/Christmas | Bin 149 -> 0 bytes lib/pytz/zoneinfo/Indian/Cocos | Bin 152 -> 0 bytes lib/pytz/zoneinfo/Indian/Comoro | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Indian/Kerguelen | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Indian/Mahe | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Indian/Maldives | Bin 204 -> 0 bytes lib/pytz/zoneinfo/Indian/Mauritius | Bin 253 -> 0 bytes lib/pytz/zoneinfo/Indian/Mayotte | Bin 283 -> 0 bytes lib/pytz/zoneinfo/Indian/Reunion | Bin 171 -> 0 bytes lib/pytz/zoneinfo/Iran | Bin 1661 -> 0 bytes lib/pytz/zoneinfo/Israel | Bin 2265 -> 0 bytes lib/pytz/zoneinfo/Jamaica | Bin 507 -> 0 bytes lib/pytz/zoneinfo/Japan | Bin 355 -> 0 bytes lib/pytz/zoneinfo/Kwajalein | Bin 237 -> 0 bytes lib/pytz/zoneinfo/Libya | Bin 655 -> 0 bytes lib/pytz/zoneinfo/MET | Bin 2102 -> 0 bytes lib/pytz/zoneinfo/MST | Bin 127 -> 0 bytes lib/pytz/zoneinfo/MST7MDT | Bin 2294 -> 0 bytes lib/pytz/zoneinfo/Mexico/BajaNorte | Bin 2356 -> 0 bytes lib/pytz/zoneinfo/Mexico/BajaSur | Bin 1564 -> 0 bytes lib/pytz/zoneinfo/Mexico/General | Bin 1618 -> 0 bytes lib/pytz/zoneinfo/NZ | Bin 2460 -> 0 bytes lib/pytz/zoneinfo/NZ-CHAT | Bin 2057 -> 0 bytes lib/pytz/zoneinfo/Navajo | Bin 2453 -> 0 bytes lib/pytz/zoneinfo/PRC | Bin 414 -> 0 bytes lib/pytz/zoneinfo/PST8PDT | Bin 2294 -> 0 bytes lib/pytz/zoneinfo/Pacific/Apia | Bin 1102 -> 0 bytes lib/pytz/zoneinfo/Pacific/Auckland | Bin 2460 -> 0 bytes lib/pytz/zoneinfo/Pacific/Bougainville | Bin 280 -> 0 bytes lib/pytz/zoneinfo/Pacific/Chatham | Bin 2057 -> 0 bytes lib/pytz/zoneinfo/Pacific/Chuuk | Bin 153 -> 0 bytes lib/pytz/zoneinfo/Pacific/Easter | Bin 2295 -> 0 bytes lib/pytz/zoneinfo/Pacific/Efate | Bin 478 -> 0 bytes lib/pytz/zoneinfo/Pacific/Enderbury | Bin 230 -> 0 bytes lib/pytz/zoneinfo/Pacific/Fakaofo | Bin 197 -> 0 bytes lib/pytz/zoneinfo/Pacific/Fiji | Bin 1074 -> 0 bytes lib/pytz/zoneinfo/Pacific/Funafuti | Bin 150 -> 0 bytes lib/pytz/zoneinfo/Pacific/Galapagos | Bin 211 -> 0 bytes lib/pytz/zoneinfo/Pacific/Gambier | Bin 173 -> 0 bytes lib/pytz/zoneinfo/Pacific/Guadalcanal | Bin 172 -> 0 bytes lib/pytz/zoneinfo/Pacific/Guam | Bin 225 -> 0 bytes lib/pytz/zoneinfo/Pacific/Honolulu | Bin 276 -> 0 bytes lib/pytz/zoneinfo/Pacific/Johnston | Bin 276 -> 0 bytes lib/pytz/zoneinfo/Pacific/Kiritimati | Bin 230 -> 0 bytes lib/pytz/zoneinfo/Pacific/Kosrae | Bin 230 -> 0 bytes lib/pytz/zoneinfo/Pacific/Kwajalein | Bin 237 -> 0 bytes lib/pytz/zoneinfo/Pacific/Majuro | Bin 197 -> 0 bytes lib/pytz/zoneinfo/Pacific/Marquesas | Bin 176 -> 0 bytes lib/pytz/zoneinfo/Pacific/Midway | Bin 294 -> 0 bytes lib/pytz/zoneinfo/Pacific/Nauru | Bin 254 -> 0 bytes lib/pytz/zoneinfo/Pacific/Niue | Bin 226 -> 0 bytes lib/pytz/zoneinfo/Pacific/Norfolk | Bin 208 -> 0 bytes lib/pytz/zoneinfo/Pacific/Noumea | Bin 314 -> 0 bytes lib/pytz/zoneinfo/Pacific/Pago_Pago | Bin 272 -> 0 bytes lib/pytz/zoneinfo/Pacific/Palau | Bin 149 -> 0 bytes lib/pytz/zoneinfo/Pacific/Pitcairn | Bin 203 -> 0 bytes lib/pytz/zoneinfo/Pacific/Pohnpei | Bin 153 -> 0 bytes lib/pytz/zoneinfo/Pacific/Ponape | Bin 153 -> 0 bytes lib/pytz/zoneinfo/Pacific/Port_Moresby | Bin 172 -> 0 bytes lib/pytz/zoneinfo/Pacific/Rarotonga | Bin 574 -> 0 bytes lib/pytz/zoneinfo/Pacific/Saipan | Bin 255 -> 0 bytes lib/pytz/zoneinfo/Pacific/Samoa | Bin 272 -> 0 bytes lib/pytz/zoneinfo/Pacific/Tahiti | Bin 174 -> 0 bytes lib/pytz/zoneinfo/Pacific/Tarawa | Bin 153 -> 0 bytes lib/pytz/zoneinfo/Pacific/Tongatapu | Bin 339 -> 0 bytes lib/pytz/zoneinfo/Pacific/Truk | Bin 153 -> 0 bytes lib/pytz/zoneinfo/Pacific/Wake | Bin 153 -> 0 bytes lib/pytz/zoneinfo/Pacific/Wallis | Bin 150 -> 0 bytes lib/pytz/zoneinfo/Pacific/Yap | Bin 153 -> 0 bytes lib/pytz/zoneinfo/Poland | Bin 2705 -> 0 bytes lib/pytz/zoneinfo/Portugal | Bin 3453 -> 0 bytes lib/pytz/zoneinfo/ROC | Bin 800 -> 0 bytes lib/pytz/zoneinfo/ROK | Bin 571 -> 0 bytes lib/pytz/zoneinfo/Singapore | Bin 428 -> 0 bytes lib/pytz/zoneinfo/Turkey | Bin 2747 -> 0 bytes lib/pytz/zoneinfo/UCT | Bin 127 -> 0 bytes lib/pytz/zoneinfo/US/Alaska | Bin 2384 -> 0 bytes lib/pytz/zoneinfo/US/Aleutian | Bin 2379 -> 0 bytes lib/pytz/zoneinfo/US/Arizona | Bin 353 -> 0 bytes lib/pytz/zoneinfo/US/Central | Bin 3585 -> 0 bytes lib/pytz/zoneinfo/US/East-Indiana | Bin 1675 -> 0 bytes lib/pytz/zoneinfo/US/Eastern | Bin 3545 -> 0 bytes lib/pytz/zoneinfo/US/Hawaii | Bin 276 -> 0 bytes lib/pytz/zoneinfo/US/Indiana-Starke | Bin 2437 -> 0 bytes lib/pytz/zoneinfo/US/Michigan | Bin 2216 -> 0 bytes lib/pytz/zoneinfo/US/Mountain | Bin 2453 -> 0 bytes lib/pytz/zoneinfo/US/Pacific | Bin 2845 -> 0 bytes lib/pytz/zoneinfo/US/Pacific-New | Bin 2845 -> 0 bytes lib/pytz/zoneinfo/US/Samoa | Bin 272 -> 0 bytes lib/pytz/zoneinfo/UTC | Bin 127 -> 0 bytes lib/pytz/zoneinfo/Universal | Bin 127 -> 0 bytes lib/pytz/zoneinfo/W-SU | Bin 1528 -> 0 bytes lib/pytz/zoneinfo/WET | Bin 1873 -> 0 bytes lib/pytz/zoneinfo/Zulu | Bin 127 -> 0 bytes lib/pytz/zoneinfo/iso3166.tab | 275 - lib/pytz/zoneinfo/localtime | Bin 127 -> 0 bytes lib/pytz/zoneinfo/posixrules | Bin 3545 -> 0 bytes lib/pytz/zoneinfo/zone.tab | 440 -- lib/pytz/zoneinfo/zone1970.tab | 371 -- lib/requests/__init__.py | 77 - lib/requests/adapters.py | 432 -- lib/requests/api.py | 147 - lib/requests/auth.py | 212 - lib/requests/cacert.pem | 5026 ----------------- lib/requests/certs.py | 25 - lib/requests/compat.py | 62 - lib/requests/cookies.py | 482 -- lib/requests/exceptions.py | 99 - lib/requests/hooks.py | 45 - lib/requests/models.py | 848 --- lib/requests/packages/README.rst | 8 - lib/requests/packages/__init__.py | 3 - lib/requests/packages/chardet/__init__.py | 32 - lib/requests/packages/chardet/big5freq.py | 925 --- lib/requests/packages/chardet/big5prober.py | 42 - lib/requests/packages/chardet/chardetect.py | 80 - .../packages/chardet/chardistribution.py | 231 - .../packages/chardet/charsetgroupprober.py | 106 - .../packages/chardet/charsetprober.py | 62 - .../packages/chardet/codingstatemachine.py | 61 - lib/requests/packages/chardet/compat.py | 34 - lib/requests/packages/chardet/constants.py | 39 - lib/requests/packages/chardet/cp949prober.py | 44 - lib/requests/packages/chardet/escprober.py | 86 - lib/requests/packages/chardet/escsm.py | 242 - lib/requests/packages/chardet/eucjpprober.py | 90 - lib/requests/packages/chardet/euckrfreq.py | 596 -- lib/requests/packages/chardet/euckrprober.py | 42 - lib/requests/packages/chardet/euctwfreq.py | 428 -- lib/requests/packages/chardet/euctwprober.py | 41 - lib/requests/packages/chardet/gb2312freq.py | 472 -- lib/requests/packages/chardet/gb2312prober.py | 41 - lib/requests/packages/chardet/hebrewprober.py | 283 - lib/requests/packages/chardet/jisfreq.py | 569 -- lib/requests/packages/chardet/jpcntx.py | 227 - .../packages/chardet/langbulgarianmodel.py | 229 - .../packages/chardet/langcyrillicmodel.py | 329 -- .../packages/chardet/langgreekmodel.py | 225 - .../packages/chardet/langhebrewmodel.py | 201 - .../packages/chardet/langhungarianmodel.py | 225 - .../packages/chardet/langthaimodel.py | 200 - lib/requests/packages/chardet/latin1prober.py | 139 - .../packages/chardet/mbcharsetprober.py | 86 - .../packages/chardet/mbcsgroupprober.py | 54 - lib/requests/packages/chardet/mbcssm.py | 572 -- .../packages/chardet/sbcharsetprober.py | 120 - .../packages/chardet/sbcsgroupprober.py | 69 - lib/requests/packages/chardet/sjisprober.py | 91 - .../packages/chardet/universaldetector.py | 170 - lib/requests/packages/chardet/utf8prober.py | 76 - lib/requests/packages/urllib3/__init__.py | 71 - lib/requests/packages/urllib3/_collections.py | 323 -- lib/requests/packages/urllib3/connection.py | 277 - .../packages/urllib3/connectionpool.py | 816 --- .../packages/urllib3/contrib/__init__.py | 0 .../packages/urllib3/contrib/appengine.py | 222 - .../packages/urllib3/contrib/ntlmpool.py | 114 - .../packages/urllib3/contrib/pyopenssl.py | 309 - lib/requests/packages/urllib3/exceptions.py | 193 - lib/requests/packages/urllib3/fields.py | 177 - lib/requests/packages/urllib3/filepost.py | 93 - .../packages/urllib3/packages/__init__.py | 4 - .../packages/urllib3/packages/ordered_dict.py | 259 - lib/requests/packages/urllib3/packages/six.py | 385 -- .../packages/ssl_match_hostname/__init__.py | 13 - .../ssl_match_hostname/_implementation.py | 105 - lib/requests/packages/urllib3/poolmanager.py | 280 - lib/requests/packages/urllib3/request.py | 149 - lib/requests/packages/urllib3/response.py | 485 -- .../packages/urllib3/util/__init__.py | 24 - .../packages/urllib3/util/connection.py | 100 - lib/requests/packages/urllib3/util/request.py | 71 - .../packages/urllib3/util/response.py | 73 - lib/requests/packages/urllib3/util/retry.py | 285 - lib/requests/packages/urllib3/util/ssl_.py | 286 - lib/requests/packages/urllib3/util/timeout.py | 240 - lib/requests/packages/urllib3/util/url.py | 214 - lib/requests/sessions.py | 682 --- lib/requests/status_codes.py | 89 - lib/requests/structures.py | 104 - lib/requests/utils.py | 720 --- lib/six.py | 646 --- lib/tzlocal/LICENSE.txt | 121 - lib/tzlocal/README.rst | 80 - lib/tzlocal/__init__.py | 7 - lib/tzlocal/darwin.py | 27 - lib/tzlocal/tests.py | 64 - lib/tzlocal/unix.py | 115 - lib/tzlocal/win32.py | 93 - lib/tzlocal/windows_tz.py | 542 -- lib/unidecode/LICENSE | 339 -- lib/unidecode/README | 134 - lib/unidecode/__init__.py | 69 - lib/unidecode/x000.py | 165 - lib/unidecode/x001.py | 258 - lib/unidecode/x002.py | 257 - lib/unidecode/x003.py | 257 - lib/unidecode/x004.py | 257 - lib/unidecode/x005.py | 257 - lib/unidecode/x006.py | 257 - lib/unidecode/x007.py | 257 - lib/unidecode/x009.py | 257 - lib/unidecode/x00a.py | 257 - lib/unidecode/x00b.py | 257 - lib/unidecode/x00c.py | 257 - lib/unidecode/x00d.py | 257 - lib/unidecode/x00e.py | 257 - lib/unidecode/x00f.py | 257 - lib/unidecode/x010.py | 257 - lib/unidecode/x011.py | 257 - lib/unidecode/x012.py | 258 - lib/unidecode/x013.py | 257 - lib/unidecode/x014.py | 258 - lib/unidecode/x015.py | 258 - lib/unidecode/x016.py | 257 - lib/unidecode/x017.py | 257 - lib/unidecode/x018.py | 257 - lib/unidecode/x01d.py | 257 - lib/unidecode/x01e.py | 257 - lib/unidecode/x01f.py | 257 - lib/unidecode/x020.py | 257 - lib/unidecode/x021.py | 257 - lib/unidecode/x022.py | 257 - lib/unidecode/x023.py | 257 - lib/unidecode/x024.py | 257 - lib/unidecode/x025.py | 257 - lib/unidecode/x026.py | 257 - lib/unidecode/x027.py | 257 - lib/unidecode/x028.py | 258 - lib/unidecode/x029.py | 257 - lib/unidecode/x02a.py | 257 - lib/unidecode/x02c.py | 257 - lib/unidecode/x02e.py | 257 - lib/unidecode/x02f.py | 257 - lib/unidecode/x030.py | 257 - lib/unidecode/x031.py | 257 - lib/unidecode/x032.py | 257 - lib/unidecode/x033.py | 257 - lib/unidecode/x04d.py | 257 - lib/unidecode/x04e.py | 258 - lib/unidecode/x04f.py | 258 - lib/unidecode/x050.py | 258 - lib/unidecode/x051.py | 258 - lib/unidecode/x052.py | 258 - lib/unidecode/x053.py | 258 - lib/unidecode/x054.py | 258 - lib/unidecode/x055.py | 258 - lib/unidecode/x056.py | 258 - lib/unidecode/x057.py | 258 - lib/unidecode/x058.py | 258 - lib/unidecode/x059.py | 258 - lib/unidecode/x05a.py | 258 - lib/unidecode/x05b.py | 258 - lib/unidecode/x05c.py | 258 - lib/unidecode/x05d.py | 258 - lib/unidecode/x05e.py | 258 - lib/unidecode/x05f.py | 258 - lib/unidecode/x060.py | 258 - lib/unidecode/x061.py | 258 - lib/unidecode/x062.py | 258 - lib/unidecode/x063.py | 258 - lib/unidecode/x064.py | 258 - lib/unidecode/x065.py | 258 - lib/unidecode/x066.py | 258 - lib/unidecode/x067.py | 258 - lib/unidecode/x068.py | 258 - lib/unidecode/x069.py | 258 - lib/unidecode/x06a.py | 258 - lib/unidecode/x06b.py | 258 - lib/unidecode/x06c.py | 258 - lib/unidecode/x06d.py | 258 - lib/unidecode/x06e.py | 258 - lib/unidecode/x06f.py | 258 - lib/unidecode/x070.py | 258 - lib/unidecode/x071.py | 258 - lib/unidecode/x072.py | 258 - lib/unidecode/x073.py | 258 - lib/unidecode/x074.py | 258 - lib/unidecode/x075.py | 258 - lib/unidecode/x076.py | 258 - lib/unidecode/x077.py | 258 - lib/unidecode/x078.py | 258 - lib/unidecode/x079.py | 258 - lib/unidecode/x07a.py | 258 - lib/unidecode/x07b.py | 258 - lib/unidecode/x07c.py | 258 - lib/unidecode/x07d.py | 258 - lib/unidecode/x07e.py | 258 - lib/unidecode/x07f.py | 258 - lib/unidecode/x080.py | 258 - lib/unidecode/x081.py | 258 - lib/unidecode/x082.py | 258 - lib/unidecode/x083.py | 258 - lib/unidecode/x084.py | 258 - lib/unidecode/x085.py | 258 - lib/unidecode/x086.py | 258 - lib/unidecode/x087.py | 258 - lib/unidecode/x088.py | 258 - lib/unidecode/x089.py | 258 - lib/unidecode/x08a.py | 258 - lib/unidecode/x08b.py | 258 - lib/unidecode/x08c.py | 258 - lib/unidecode/x08d.py | 258 - lib/unidecode/x08e.py | 258 - lib/unidecode/x08f.py | 258 - lib/unidecode/x090.py | 258 - lib/unidecode/x091.py | 258 - lib/unidecode/x092.py | 258 - lib/unidecode/x093.py | 258 - lib/unidecode/x094.py | 258 - lib/unidecode/x095.py | 258 - lib/unidecode/x096.py | 258 - lib/unidecode/x097.py | 258 - lib/unidecode/x098.py | 258 - lib/unidecode/x099.py | 258 - lib/unidecode/x09a.py | 258 - lib/unidecode/x09b.py | 258 - lib/unidecode/x09c.py | 258 - lib/unidecode/x09d.py | 258 - lib/unidecode/x09e.py | 258 - lib/unidecode/x09f.py | 257 - lib/unidecode/x0a0.py | 258 - lib/unidecode/x0a1.py | 258 - lib/unidecode/x0a2.py | 258 - lib/unidecode/x0a3.py | 258 - lib/unidecode/x0a4.py | 257 - lib/unidecode/x0ac.py | 258 - lib/unidecode/x0ad.py | 258 - lib/unidecode/x0ae.py | 258 - lib/unidecode/x0af.py | 258 - lib/unidecode/x0b0.py | 258 - lib/unidecode/x0b1.py | 258 - lib/unidecode/x0b2.py | 258 - lib/unidecode/x0b3.py | 258 - lib/unidecode/x0b4.py | 258 - lib/unidecode/x0b5.py | 258 - lib/unidecode/x0b6.py | 258 - lib/unidecode/x0b7.py | 258 - lib/unidecode/x0b8.py | 258 - lib/unidecode/x0b9.py | 258 - lib/unidecode/x0ba.py | 258 - lib/unidecode/x0bb.py | 258 - lib/unidecode/x0bc.py | 258 - lib/unidecode/x0bd.py | 258 - lib/unidecode/x0be.py | 258 - lib/unidecode/x0bf.py | 258 - lib/unidecode/x0c0.py | 258 - lib/unidecode/x0c1.py | 258 - lib/unidecode/x0c2.py | 258 - lib/unidecode/x0c3.py | 258 - lib/unidecode/x0c4.py | 258 - lib/unidecode/x0c5.py | 258 - lib/unidecode/x0c6.py | 258 - lib/unidecode/x0c7.py | 258 - lib/unidecode/x0c8.py | 258 - lib/unidecode/x0c9.py | 258 - lib/unidecode/x0ca.py | 258 - lib/unidecode/x0cb.py | 258 - lib/unidecode/x0cc.py | 258 - lib/unidecode/x0cd.py | 258 - lib/unidecode/x0ce.py | 258 - lib/unidecode/x0cf.py | 258 - lib/unidecode/x0d0.py | 258 - lib/unidecode/x0d1.py | 258 - lib/unidecode/x0d2.py | 258 - lib/unidecode/x0d3.py | 258 - lib/unidecode/x0d4.py | 258 - lib/unidecode/x0d5.py | 258 - lib/unidecode/x0d6.py | 258 - lib/unidecode/x0d7.py | 257 - lib/unidecode/x0f9.py | 258 - lib/unidecode/x0fa.py | 257 - lib/unidecode/x0fb.py | 258 - lib/unidecode/x0fc.py | 258 - lib/unidecode/x0fd.py | 257 - lib/unidecode/x0fe.py | 258 - lib/unidecode/x0ff.py | 258 - lib/unidecode/x1d4.py | 258 - lib/unidecode/x1d5.py | 258 - lib/unidecode/x1d6.py | 258 - lib/unidecode/x1d7.py | 258 - lib/yaml/__init__.py | 315 -- lib/yaml/composer.py | 139 - lib/yaml/constructor.py | 675 --- lib/yaml/cyaml.py | 85 - lib/yaml/dumper.py | 62 - lib/yaml/emitter.py | 1140 ---- lib/yaml/error.py | 75 - lib/yaml/events.py | 86 - lib/yaml/loader.py | 40 - lib/yaml/nodes.py | 49 - lib/yaml/parser.py | 589 -- lib/yaml/reader.py | 190 - lib/yaml/representer.py | 484 -- lib/yaml/resolver.py | 224 - lib/yaml/scanner.py | 1457 ----- lib/yaml/serializer.py | 111 - lib/yaml/tokens.py | 104 - requirements.txt | 29 + 1195 files changed, 81 insertions(+), 196887 deletions(-) rename {lib => headphones}/bencode.py (92%) rename {lib => headphones}/certgen.py (98%) delete mode 100644 lib/MultipartPostHandler.py delete mode 100644 lib/apscheduler/__init__.py delete mode 100644 lib/apscheduler/events.py delete mode 100644 lib/apscheduler/executors/__init__.py delete mode 100644 lib/apscheduler/executors/asyncio.py delete mode 100644 lib/apscheduler/executors/base.py delete mode 100644 lib/apscheduler/executors/debug.py delete mode 100644 lib/apscheduler/executors/gevent.py delete mode 100644 lib/apscheduler/executors/pool.py delete mode 100644 lib/apscheduler/executors/twisted.py delete mode 100644 lib/apscheduler/job.py delete mode 100644 lib/apscheduler/jobstores/__init__.py delete mode 100644 lib/apscheduler/jobstores/base.py delete mode 100644 lib/apscheduler/jobstores/memory.py delete mode 100644 lib/apscheduler/jobstores/mongodb.py delete mode 100644 lib/apscheduler/jobstores/redis.py delete mode 100644 lib/apscheduler/jobstores/sqlalchemy.py delete mode 100644 lib/apscheduler/schedulers/__init__.py delete mode 100644 lib/apscheduler/schedulers/asyncio.py delete mode 100644 lib/apscheduler/schedulers/background.py delete mode 100644 lib/apscheduler/schedulers/base.py delete mode 100644 lib/apscheduler/schedulers/blocking.py delete mode 100644 lib/apscheduler/schedulers/gevent.py delete mode 100644 lib/apscheduler/schedulers/qt.py delete mode 100644 lib/apscheduler/schedulers/tornado.py delete mode 100644 lib/apscheduler/schedulers/twisted.py delete mode 100644 lib/apscheduler/triggers/__init__.py delete mode 100644 lib/apscheduler/triggers/base.py delete mode 100644 lib/apscheduler/triggers/cron/__init__.py delete mode 100644 lib/apscheduler/triggers/cron/expressions.py delete mode 100644 lib/apscheduler/triggers/cron/fields.py delete mode 100644 lib/apscheduler/triggers/date.py delete mode 100644 lib/apscheduler/triggers/interval.py delete mode 100644 lib/apscheduler/util.py delete mode 100644 lib/argparse.py delete mode 100644 lib/beets/LICENSE delete mode 100644 lib/beets/README.rst delete mode 100644 lib/beets/__init__.py delete mode 100644 lib/beets/autotag/__init__.py delete mode 100644 lib/beets/autotag/hooks.py delete mode 100644 lib/beets/autotag/match.py delete mode 100644 lib/beets/autotag/mb.py delete mode 100644 lib/beets/config_default.yaml delete mode 100644 lib/beets/dbcore/__init__.py delete mode 100644 lib/beets/dbcore/db.py delete mode 100644 lib/beets/dbcore/query.py delete mode 100644 lib/beets/dbcore/queryparse.py delete mode 100644 lib/beets/dbcore/types.py delete mode 100644 lib/beets/importer.py delete mode 100644 lib/beets/library.py delete mode 100644 lib/beets/mediafile.py delete mode 100755 lib/beets/plugins.py delete mode 100644 lib/beets/ui/__init__.py delete mode 100644 lib/beets/ui/commands.py delete mode 100644 lib/beets/ui/completion_base.sh delete mode 100644 lib/beets/util/__init__.py delete mode 100644 lib/beets/util/artresizer.py delete mode 100644 lib/beets/util/bluelet.py delete mode 100644 lib/beets/util/confit.py delete mode 100644 lib/beets/util/enumeration.py delete mode 100644 lib/beets/util/functemplate.py delete mode 100644 lib/beets/util/pipeline.py delete mode 100644 lib/beets/vfs.py delete mode 100644 lib/beetsplug/__init__.py delete mode 100644 lib/beetsplug/embedart.py delete mode 100644 lib/beetsplug/fetchart.py delete mode 100644 lib/beetsplug/lyrics.py delete mode 100755 lib/biplist/__init__.py delete mode 100644 lib/bs4/__init__.py delete mode 100644 lib/bs4/builder/__init__.py delete mode 100644 lib/bs4/builder/_html5lib.py delete mode 100644 lib/bs4/builder/_htmlparser.py delete mode 100644 lib/bs4/builder/_lxml.py delete mode 100644 lib/bs4/dammit.py delete mode 100644 lib/bs4/diagnose.py delete mode 100644 lib/bs4/element.py delete mode 100644 lib/bs4/testing.py delete mode 100644 lib/cherrypy/LICENSE.txt delete mode 100644 lib/cherrypy/__init__.py delete mode 100644 lib/cherrypy/_cpchecker.py delete mode 100644 lib/cherrypy/_cpcompat.py delete mode 100644 lib/cherrypy/_cpcompat_subprocess.py delete mode 100644 lib/cherrypy/_cpconfig.py delete mode 100644 lib/cherrypy/_cpdispatch.py delete mode 100644 lib/cherrypy/_cperror.py delete mode 100644 lib/cherrypy/_cplogging.py delete mode 100644 lib/cherrypy/_cpmodpy.py delete mode 100644 lib/cherrypy/_cpnative_server.py delete mode 100644 lib/cherrypy/_cpreqbody.py delete mode 100644 lib/cherrypy/_cprequest.py delete mode 100644 lib/cherrypy/_cpserver.py delete mode 100644 lib/cherrypy/_cpthreadinglocal.py delete mode 100644 lib/cherrypy/_cptools.py delete mode 100644 lib/cherrypy/_cptree.py delete mode 100644 lib/cherrypy/_cpwsgi.py delete mode 100644 lib/cherrypy/_cpwsgi_server.py delete mode 100644 lib/cherrypy/cherryd delete mode 100644 lib/cherrypy/favicon.ico delete mode 100644 lib/cherrypy/lib/__init__.py delete mode 100644 lib/cherrypy/lib/auth.py delete mode 100644 lib/cherrypy/lib/auth_basic.py delete mode 100644 lib/cherrypy/lib/auth_digest.py delete mode 100644 lib/cherrypy/lib/caching.py delete mode 100644 lib/cherrypy/lib/covercp.py delete mode 100644 lib/cherrypy/lib/cpstats.py delete mode 100644 lib/cherrypy/lib/cptools.py delete mode 100644 lib/cherrypy/lib/encoding.py delete mode 100644 lib/cherrypy/lib/gctools.py delete mode 100644 lib/cherrypy/lib/http.py delete mode 100644 lib/cherrypy/lib/httpauth.py delete mode 100644 lib/cherrypy/lib/httputil.py delete mode 100644 lib/cherrypy/lib/jsontools.py delete mode 100644 lib/cherrypy/lib/lockfile.py delete mode 100644 lib/cherrypy/lib/locking.py delete mode 100644 lib/cherrypy/lib/profiler.py delete mode 100644 lib/cherrypy/lib/reprconf.py delete mode 100644 lib/cherrypy/lib/sessions.py delete mode 100644 lib/cherrypy/lib/static.py delete mode 100644 lib/cherrypy/lib/xmlrpcutil.py delete mode 100644 lib/cherrypy/process/__init__.py delete mode 100644 lib/cherrypy/process/plugins.py delete mode 100644 lib/cherrypy/process/servers.py delete mode 100644 lib/cherrypy/process/win32.py delete mode 100644 lib/cherrypy/process/wspbus.py delete mode 100644 lib/cherrypy/scaffold/__init__.py delete mode 100644 lib/cherrypy/scaffold/apache-fcgi.conf delete mode 100644 lib/cherrypy/scaffold/example.conf delete mode 100644 lib/cherrypy/scaffold/site.conf delete mode 100644 lib/cherrypy/scaffold/static/made_with_cherrypy_small.png delete mode 100644 lib/cherrypy/wsgiserver/__init__.py delete mode 100644 lib/cherrypy/wsgiserver/ssl_builtin.py delete mode 100644 lib/cherrypy/wsgiserver/ssl_pyopenssl.py delete mode 100644 lib/cherrypy/wsgiserver/wsgiserver2.py delete mode 100644 lib/cherrypy/wsgiserver/wsgiserver3.py delete mode 100644 lib/concurrent/LICENSE delete mode 100644 lib/concurrent/__init__.py delete mode 100644 lib/concurrent/futures/__init__.py delete mode 100644 lib/concurrent/futures/_base.py delete mode 100644 lib/concurrent/futures/_compat.py delete mode 100644 lib/concurrent/futures/process.py delete mode 100644 lib/concurrent/futures/thread.py delete mode 100644 lib/configobj.py delete mode 100644 lib/enum/LICENSE delete mode 100644 lib/enum/README delete mode 100644 lib/enum/__init__.py delete mode 100644 lib/enum/doc/enum.rst delete mode 100644 lib/enum/enum.py delete mode 100644 lib/enum/test_enum.py delete mode 100644 lib/feedparser.py delete mode 100644 lib/gntp/LICENSE delete mode 100644 lib/gntp/__init__.py delete mode 100644 lib/gntp/cli.py delete mode 100644 lib/gntp/config.py delete mode 100644 lib/gntp/core.py delete mode 100644 lib/gntp/errors.py delete mode 100644 lib/gntp/notifier.py delete mode 100644 lib/gntp/shim.py delete mode 100644 lib/gntp/version.py delete mode 100644 lib/html5lib/__init__.py delete mode 100644 lib/html5lib/constants.py delete mode 100644 lib/html5lib/filters/__init__.py delete mode 100644 lib/html5lib/filters/_base.py delete mode 100644 lib/html5lib/filters/alphabeticalattributes.py delete mode 100644 lib/html5lib/filters/inject_meta_charset.py delete mode 100644 lib/html5lib/filters/lint.py delete mode 100644 lib/html5lib/filters/optionaltags.py delete mode 100644 lib/html5lib/filters/sanitizer.py delete mode 100644 lib/html5lib/filters/whitespace.py delete mode 100644 lib/html5lib/html5parser.py delete mode 100644 lib/html5lib/ihatexml.py delete mode 100644 lib/html5lib/inputstream.py delete mode 100644 lib/html5lib/sanitizer.py delete mode 100644 lib/html5lib/serializer/__init__.py delete mode 100644 lib/html5lib/serializer/htmlserializer.py delete mode 100644 lib/html5lib/tokenizer.py delete mode 100644 lib/html5lib/treeadapters/__init__.py delete mode 100644 lib/html5lib/treeadapters/sax.py delete mode 100644 lib/html5lib/treebuilders/__init__.py delete mode 100644 lib/html5lib/treebuilders/_base.py delete mode 100644 lib/html5lib/treebuilders/dom.py delete mode 100644 lib/html5lib/treebuilders/etree.py delete mode 100644 lib/html5lib/treebuilders/etree_lxml.py delete mode 100644 lib/html5lib/treewalkers/__init__.py delete mode 100644 lib/html5lib/treewalkers/_base.py delete mode 100644 lib/html5lib/treewalkers/dom.py delete mode 100644 lib/html5lib/treewalkers/etree.py delete mode 100644 lib/html5lib/treewalkers/genshistream.py delete mode 100644 lib/html5lib/treewalkers/lxmletree.py delete mode 100644 lib/html5lib/treewalkers/pulldom.py delete mode 100644 lib/html5lib/trie/__init__.py delete mode 100644 lib/html5lib/trie/_base.py delete mode 100644 lib/html5lib/trie/datrie.py delete mode 100644 lib/html5lib/trie/py.py delete mode 100644 lib/html5lib/utils.py delete mode 100755 lib/httplib2/__init__.py delete mode 100755 lib/httplib2/iri2uri.py delete mode 100644 lib/logutils/__init__.py delete mode 100644 lib/logutils/adapter.py delete mode 100644 lib/logutils/colorize.py delete mode 100644 lib/logutils/dictconfig.py delete mode 100644 lib/logutils/http.py delete mode 100644 lib/logutils/queue.py delete mode 100644 lib/logutils/redis.py delete mode 100644 lib/logutils/testing.py delete mode 100644 lib/mako/LICENSE delete mode 100644 lib/mako/README.rst delete mode 100644 lib/mako/__init__.py delete mode 100644 lib/mako/_ast_util.py delete mode 100644 lib/mako/ast.py delete mode 100644 lib/mako/cache.py delete mode 100755 lib/mako/cmd.py delete mode 100644 lib/mako/codegen.py delete mode 100644 lib/mako/compat.py delete mode 100644 lib/mako/exceptions.py delete mode 100644 lib/mako/ext/__init__.py delete mode 100644 lib/mako/ext/autohandler.py delete mode 100644 lib/mako/ext/babelplugin.py delete mode 100644 lib/mako/ext/beaker_cache.py delete mode 100644 lib/mako/ext/extract.py delete mode 100644 lib/mako/ext/linguaplugin.py delete mode 100644 lib/mako/ext/preprocessors.py delete mode 100644 lib/mako/ext/pygmentplugin.py delete mode 100644 lib/mako/ext/turbogears.py delete mode 100644 lib/mako/filters.py delete mode 100644 lib/mako/lexer.py delete mode 100644 lib/mako/lookup.py delete mode 100644 lib/mako/parsetree.py delete mode 100644 lib/mako/pygen.py delete mode 100644 lib/mako/pyparser.py delete mode 100644 lib/mako/runtime.py delete mode 100644 lib/mako/template.py delete mode 100644 lib/mako/util.py delete mode 100644 lib/munkres.py delete mode 100644 lib/musicbrainzngs/__init__.py delete mode 100644 lib/musicbrainzngs/caa.py delete mode 100644 lib/musicbrainzngs/compat.py delete mode 100644 lib/musicbrainzngs/mbxml.py delete mode 100644 lib/musicbrainzngs/musicbrainz.py delete mode 100644 lib/musicbrainzngs/util.py delete mode 100644 lib/mutagen/README.rst delete mode 100644 lib/mutagen/__init__.py delete mode 100644 lib/mutagen/_compat.py delete mode 100644 lib/mutagen/_constants.py delete mode 100644 lib/mutagen/_file.py delete mode 100644 lib/mutagen/_tags.py delete mode 100644 lib/mutagen/_util.py delete mode 100644 lib/mutagen/_vorbis.py delete mode 100644 lib/mutagen/aac.py delete mode 100644 lib/mutagen/aiff.py delete mode 100644 lib/mutagen/apev2.py delete mode 100644 lib/mutagen/asf.py delete mode 100644 lib/mutagen/easyid3.py delete mode 100644 lib/mutagen/easymp4.py delete mode 100644 lib/mutagen/flac.py delete mode 100644 lib/mutagen/id3/__init__.py delete mode 100644 lib/mutagen/id3/_frames.py delete mode 100644 lib/mutagen/id3/_specs.py delete mode 100644 lib/mutagen/id3/_util.py delete mode 100644 lib/mutagen/m4a.py delete mode 100644 lib/mutagen/monkeysaudio.py delete mode 100644 lib/mutagen/mp3.py delete mode 100644 lib/mutagen/mp4/__init__.py delete mode 100644 lib/mutagen/mp4/_as_entry.py delete mode 100644 lib/mutagen/mp4/_atom.py delete mode 100644 lib/mutagen/mp4/_util.py delete mode 100644 lib/mutagen/musepack.py delete mode 100644 lib/mutagen/ogg.py delete mode 100644 lib/mutagen/oggflac.py delete mode 100644 lib/mutagen/oggopus.py delete mode 100644 lib/mutagen/oggspeex.py delete mode 100644 lib/mutagen/oggtheora.py delete mode 100644 lib/mutagen/oggvorbis.py delete mode 100644 lib/mutagen/optimfrog.py delete mode 100644 lib/mutagen/trueaudio.py delete mode 100644 lib/mutagen/wavpack.py delete mode 100644 lib/oauth2/__init__.py delete mode 100644 lib/ordereddict.py delete mode 100644 lib/pkg_resources.py delete mode 100644 lib/pygazelle/__init__.py delete mode 100644 lib/pygazelle/api.py delete mode 100644 lib/pygazelle/artist.py delete mode 100644 lib/pygazelle/category.py delete mode 100644 lib/pygazelle/encoding.py delete mode 100644 lib/pygazelle/format.py delete mode 100644 lib/pygazelle/inbox.py delete mode 100644 lib/pygazelle/media.py delete mode 100644 lib/pygazelle/release_type.py delete mode 100644 lib/pygazelle/request.py delete mode 100644 lib/pygazelle/tag.py delete mode 100644 lib/pygazelle/torrent.py delete mode 100644 lib/pygazelle/torrent_group.py delete mode 100644 lib/pygazelle/user.py delete mode 100644 lib/pynma/__init__.py delete mode 100755 lib/pynma/pynma.py delete mode 100644 lib/pythontwitter/__init__.py delete mode 100644 lib/pytz/LICENSE.txt delete mode 100644 lib/pytz/README.txt delete mode 100644 lib/pytz/__init__.py delete mode 100644 lib/pytz/exceptions.py delete mode 100644 lib/pytz/lazy.py delete mode 100644 lib/pytz/reference.py delete mode 100644 lib/pytz/tzfile.py delete mode 100644 lib/pytz/tzinfo.py delete mode 100644 lib/pytz/zoneinfo/Africa/Abidjan delete mode 100644 lib/pytz/zoneinfo/Africa/Accra delete mode 100644 lib/pytz/zoneinfo/Africa/Addis_Ababa delete mode 100644 lib/pytz/zoneinfo/Africa/Algiers delete mode 100644 lib/pytz/zoneinfo/Africa/Asmara delete mode 100644 lib/pytz/zoneinfo/Africa/Asmera delete mode 100644 lib/pytz/zoneinfo/Africa/Bamako delete mode 100644 lib/pytz/zoneinfo/Africa/Bangui delete mode 100644 lib/pytz/zoneinfo/Africa/Banjul delete mode 100644 lib/pytz/zoneinfo/Africa/Bissau delete mode 100644 lib/pytz/zoneinfo/Africa/Blantyre delete mode 100644 lib/pytz/zoneinfo/Africa/Brazzaville delete mode 100644 lib/pytz/zoneinfo/Africa/Bujumbura delete mode 100644 lib/pytz/zoneinfo/Africa/Cairo delete mode 100644 lib/pytz/zoneinfo/Africa/Casablanca delete mode 100644 lib/pytz/zoneinfo/Africa/Ceuta delete mode 100644 lib/pytz/zoneinfo/Africa/Conakry delete mode 100644 lib/pytz/zoneinfo/Africa/Dakar delete mode 100644 lib/pytz/zoneinfo/Africa/Dar_es_Salaam delete mode 100644 lib/pytz/zoneinfo/Africa/Djibouti delete mode 100644 lib/pytz/zoneinfo/Africa/Douala delete mode 100644 lib/pytz/zoneinfo/Africa/El_Aaiun delete mode 100644 lib/pytz/zoneinfo/Africa/Freetown delete mode 100644 lib/pytz/zoneinfo/Africa/Gaborone delete mode 100644 lib/pytz/zoneinfo/Africa/Harare delete mode 100644 lib/pytz/zoneinfo/Africa/Johannesburg delete mode 100644 lib/pytz/zoneinfo/Africa/Juba delete mode 100644 lib/pytz/zoneinfo/Africa/Kampala delete mode 100644 lib/pytz/zoneinfo/Africa/Khartoum delete mode 100644 lib/pytz/zoneinfo/Africa/Kigali delete mode 100644 lib/pytz/zoneinfo/Africa/Kinshasa delete mode 100644 lib/pytz/zoneinfo/Africa/Lagos delete mode 100644 lib/pytz/zoneinfo/Africa/Libreville delete mode 100644 lib/pytz/zoneinfo/Africa/Lome delete mode 100644 lib/pytz/zoneinfo/Africa/Luanda delete mode 100644 lib/pytz/zoneinfo/Africa/Lubumbashi delete mode 100644 lib/pytz/zoneinfo/Africa/Lusaka delete mode 100644 lib/pytz/zoneinfo/Africa/Malabo delete mode 100644 lib/pytz/zoneinfo/Africa/Maputo delete mode 100644 lib/pytz/zoneinfo/Africa/Maseru delete mode 100644 lib/pytz/zoneinfo/Africa/Mbabane delete mode 100644 lib/pytz/zoneinfo/Africa/Mogadishu delete mode 100644 lib/pytz/zoneinfo/Africa/Monrovia delete mode 100644 lib/pytz/zoneinfo/Africa/Nairobi delete mode 100644 lib/pytz/zoneinfo/Africa/Ndjamena delete mode 100644 lib/pytz/zoneinfo/Africa/Niamey delete mode 100644 lib/pytz/zoneinfo/Africa/Nouakchott delete mode 100644 lib/pytz/zoneinfo/Africa/Ouagadougou delete mode 100644 lib/pytz/zoneinfo/Africa/Porto-Novo delete mode 100644 lib/pytz/zoneinfo/Africa/Sao_Tome delete mode 100644 lib/pytz/zoneinfo/Africa/Timbuktu delete mode 100644 lib/pytz/zoneinfo/Africa/Tripoli delete mode 100644 lib/pytz/zoneinfo/Africa/Tunis delete mode 100644 lib/pytz/zoneinfo/Africa/Windhoek delete mode 100644 lib/pytz/zoneinfo/America/Adak delete mode 100644 lib/pytz/zoneinfo/America/Anchorage delete mode 100644 lib/pytz/zoneinfo/America/Anguilla delete mode 100644 lib/pytz/zoneinfo/America/Antigua delete mode 100644 lib/pytz/zoneinfo/America/Araguaina delete mode 100644 lib/pytz/zoneinfo/America/Argentina/Buenos_Aires delete mode 100644 lib/pytz/zoneinfo/America/Argentina/Catamarca delete mode 100644 lib/pytz/zoneinfo/America/Argentina/ComodRivadavia delete mode 100644 lib/pytz/zoneinfo/America/Argentina/Cordoba delete mode 100644 lib/pytz/zoneinfo/America/Argentina/Jujuy delete mode 100644 lib/pytz/zoneinfo/America/Argentina/La_Rioja delete mode 100644 lib/pytz/zoneinfo/America/Argentina/Mendoza delete mode 100644 lib/pytz/zoneinfo/America/Argentina/Rio_Gallegos delete mode 100644 lib/pytz/zoneinfo/America/Argentina/Salta delete mode 100644 lib/pytz/zoneinfo/America/Argentina/San_Juan delete mode 100644 lib/pytz/zoneinfo/America/Argentina/San_Luis delete mode 100644 lib/pytz/zoneinfo/America/Argentina/Tucuman delete mode 100644 lib/pytz/zoneinfo/America/Argentina/Ushuaia delete mode 100644 lib/pytz/zoneinfo/America/Aruba delete mode 100644 lib/pytz/zoneinfo/America/Asuncion delete mode 100644 lib/pytz/zoneinfo/America/Atikokan delete mode 100644 lib/pytz/zoneinfo/America/Atka delete mode 100644 lib/pytz/zoneinfo/America/Bahia delete mode 100644 lib/pytz/zoneinfo/America/Bahia_Banderas delete mode 100644 lib/pytz/zoneinfo/America/Barbados delete mode 100644 lib/pytz/zoneinfo/America/Belem delete mode 100644 lib/pytz/zoneinfo/America/Belize delete mode 100644 lib/pytz/zoneinfo/America/Blanc-Sablon delete mode 100644 lib/pytz/zoneinfo/America/Boa_Vista delete mode 100644 lib/pytz/zoneinfo/America/Bogota delete mode 100644 lib/pytz/zoneinfo/America/Boise delete mode 100644 lib/pytz/zoneinfo/America/Buenos_Aires delete mode 100644 lib/pytz/zoneinfo/America/Cambridge_Bay delete mode 100644 lib/pytz/zoneinfo/America/Campo_Grande delete mode 100644 lib/pytz/zoneinfo/America/Cancun delete mode 100644 lib/pytz/zoneinfo/America/Caracas delete mode 100644 lib/pytz/zoneinfo/America/Catamarca delete mode 100644 lib/pytz/zoneinfo/America/Cayenne delete mode 100644 lib/pytz/zoneinfo/America/Cayman delete mode 100644 lib/pytz/zoneinfo/America/Chicago delete mode 100644 lib/pytz/zoneinfo/America/Chihuahua delete mode 100644 lib/pytz/zoneinfo/America/Coral_Harbour delete mode 100644 lib/pytz/zoneinfo/America/Cordoba delete mode 100644 lib/pytz/zoneinfo/America/Costa_Rica delete mode 100644 lib/pytz/zoneinfo/America/Creston delete mode 100644 lib/pytz/zoneinfo/America/Cuiaba delete mode 100644 lib/pytz/zoneinfo/America/Curacao delete mode 100644 lib/pytz/zoneinfo/America/Danmarkshavn delete mode 100644 lib/pytz/zoneinfo/America/Dawson delete mode 100644 lib/pytz/zoneinfo/America/Dawson_Creek delete mode 100644 lib/pytz/zoneinfo/America/Denver delete mode 100644 lib/pytz/zoneinfo/America/Detroit delete mode 100644 lib/pytz/zoneinfo/America/Dominica delete mode 100644 lib/pytz/zoneinfo/America/Edmonton delete mode 100644 lib/pytz/zoneinfo/America/Eirunepe delete mode 100644 lib/pytz/zoneinfo/America/El_Salvador delete mode 100644 lib/pytz/zoneinfo/America/Ensenada delete mode 100644 lib/pytz/zoneinfo/America/Fort_Wayne delete mode 100644 lib/pytz/zoneinfo/America/Fortaleza delete mode 100644 lib/pytz/zoneinfo/America/Glace_Bay delete mode 100644 lib/pytz/zoneinfo/America/Godthab delete mode 100644 lib/pytz/zoneinfo/America/Goose_Bay delete mode 100644 lib/pytz/zoneinfo/America/Grand_Turk delete mode 100644 lib/pytz/zoneinfo/America/Grenada delete mode 100644 lib/pytz/zoneinfo/America/Guadeloupe delete mode 100644 lib/pytz/zoneinfo/America/Guatemala delete mode 100644 lib/pytz/zoneinfo/America/Guayaquil delete mode 100644 lib/pytz/zoneinfo/America/Guyana delete mode 100644 lib/pytz/zoneinfo/America/Halifax delete mode 100644 lib/pytz/zoneinfo/America/Havana delete mode 100644 lib/pytz/zoneinfo/America/Hermosillo delete mode 100644 lib/pytz/zoneinfo/America/Indiana/Indianapolis delete mode 100644 lib/pytz/zoneinfo/America/Indiana/Knox delete mode 100644 lib/pytz/zoneinfo/America/Indiana/Marengo delete mode 100644 lib/pytz/zoneinfo/America/Indiana/Petersburg delete mode 100644 lib/pytz/zoneinfo/America/Indiana/Tell_City delete mode 100644 lib/pytz/zoneinfo/America/Indiana/Vevay delete mode 100644 lib/pytz/zoneinfo/America/Indiana/Vincennes delete mode 100644 lib/pytz/zoneinfo/America/Indiana/Winamac delete mode 100644 lib/pytz/zoneinfo/America/Indianapolis delete mode 100644 lib/pytz/zoneinfo/America/Inuvik delete mode 100644 lib/pytz/zoneinfo/America/Iqaluit delete mode 100644 lib/pytz/zoneinfo/America/Jamaica delete mode 100644 lib/pytz/zoneinfo/America/Jujuy delete mode 100644 lib/pytz/zoneinfo/America/Juneau delete mode 100644 lib/pytz/zoneinfo/America/Kentucky/Louisville delete mode 100644 lib/pytz/zoneinfo/America/Kentucky/Monticello delete mode 100644 lib/pytz/zoneinfo/America/Knox_IN delete mode 100644 lib/pytz/zoneinfo/America/Kralendijk delete mode 100644 lib/pytz/zoneinfo/America/La_Paz delete mode 100644 lib/pytz/zoneinfo/America/Lima delete mode 100644 lib/pytz/zoneinfo/America/Los_Angeles delete mode 100644 lib/pytz/zoneinfo/America/Louisville delete mode 100644 lib/pytz/zoneinfo/America/Lower_Princes delete mode 100644 lib/pytz/zoneinfo/America/Maceio delete mode 100644 lib/pytz/zoneinfo/America/Managua delete mode 100644 lib/pytz/zoneinfo/America/Manaus delete mode 100644 lib/pytz/zoneinfo/America/Marigot delete mode 100644 lib/pytz/zoneinfo/America/Martinique delete mode 100644 lib/pytz/zoneinfo/America/Matamoros delete mode 100644 lib/pytz/zoneinfo/America/Mazatlan delete mode 100644 lib/pytz/zoneinfo/America/Mendoza delete mode 100644 lib/pytz/zoneinfo/America/Menominee delete mode 100644 lib/pytz/zoneinfo/America/Merida delete mode 100644 lib/pytz/zoneinfo/America/Metlakatla delete mode 100644 lib/pytz/zoneinfo/America/Mexico_City delete mode 100644 lib/pytz/zoneinfo/America/Miquelon delete mode 100644 lib/pytz/zoneinfo/America/Moncton delete mode 100644 lib/pytz/zoneinfo/America/Monterrey delete mode 100644 lib/pytz/zoneinfo/America/Montevideo delete mode 100644 lib/pytz/zoneinfo/America/Montreal delete mode 100644 lib/pytz/zoneinfo/America/Montserrat delete mode 100644 lib/pytz/zoneinfo/America/Nassau delete mode 100644 lib/pytz/zoneinfo/America/New_York delete mode 100644 lib/pytz/zoneinfo/America/Nipigon delete mode 100644 lib/pytz/zoneinfo/America/Nome delete mode 100644 lib/pytz/zoneinfo/America/Noronha delete mode 100644 lib/pytz/zoneinfo/America/North_Dakota/Beulah delete mode 100644 lib/pytz/zoneinfo/America/North_Dakota/Center delete mode 100644 lib/pytz/zoneinfo/America/North_Dakota/New_Salem delete mode 100644 lib/pytz/zoneinfo/America/Ojinaga delete mode 100644 lib/pytz/zoneinfo/America/Panama delete mode 100644 lib/pytz/zoneinfo/America/Pangnirtung delete mode 100644 lib/pytz/zoneinfo/America/Paramaribo delete mode 100644 lib/pytz/zoneinfo/America/Phoenix delete mode 100644 lib/pytz/zoneinfo/America/Port-au-Prince delete mode 100644 lib/pytz/zoneinfo/America/Port_of_Spain delete mode 100644 lib/pytz/zoneinfo/America/Porto_Acre delete mode 100644 lib/pytz/zoneinfo/America/Porto_Velho delete mode 100644 lib/pytz/zoneinfo/America/Puerto_Rico delete mode 100644 lib/pytz/zoneinfo/America/Rainy_River delete mode 100644 lib/pytz/zoneinfo/America/Rankin_Inlet delete mode 100644 lib/pytz/zoneinfo/America/Recife delete mode 100644 lib/pytz/zoneinfo/America/Regina delete mode 100644 lib/pytz/zoneinfo/America/Resolute delete mode 100644 lib/pytz/zoneinfo/America/Rio_Branco delete mode 100644 lib/pytz/zoneinfo/America/Rosario delete mode 100644 lib/pytz/zoneinfo/America/Santa_Isabel delete mode 100644 lib/pytz/zoneinfo/America/Santarem delete mode 100644 lib/pytz/zoneinfo/America/Santiago delete mode 100644 lib/pytz/zoneinfo/America/Santo_Domingo delete mode 100644 lib/pytz/zoneinfo/America/Sao_Paulo delete mode 100644 lib/pytz/zoneinfo/America/Scoresbysund delete mode 100644 lib/pytz/zoneinfo/America/Shiprock delete mode 100644 lib/pytz/zoneinfo/America/Sitka delete mode 100644 lib/pytz/zoneinfo/America/St_Barthelemy delete mode 100644 lib/pytz/zoneinfo/America/St_Johns delete mode 100644 lib/pytz/zoneinfo/America/St_Kitts delete mode 100644 lib/pytz/zoneinfo/America/St_Lucia delete mode 100644 lib/pytz/zoneinfo/America/St_Thomas delete mode 100644 lib/pytz/zoneinfo/America/St_Vincent delete mode 100644 lib/pytz/zoneinfo/America/Swift_Current delete mode 100644 lib/pytz/zoneinfo/America/Tegucigalpa delete mode 100644 lib/pytz/zoneinfo/America/Thule delete mode 100644 lib/pytz/zoneinfo/America/Thunder_Bay delete mode 100644 lib/pytz/zoneinfo/America/Tijuana delete mode 100644 lib/pytz/zoneinfo/America/Toronto delete mode 100644 lib/pytz/zoneinfo/America/Tortola delete mode 100644 lib/pytz/zoneinfo/America/Vancouver delete mode 100644 lib/pytz/zoneinfo/America/Virgin delete mode 100644 lib/pytz/zoneinfo/America/Whitehorse delete mode 100644 lib/pytz/zoneinfo/America/Winnipeg delete mode 100644 lib/pytz/zoneinfo/America/Yakutat delete mode 100644 lib/pytz/zoneinfo/America/Yellowknife delete mode 100644 lib/pytz/zoneinfo/Antarctica/Casey delete mode 100644 lib/pytz/zoneinfo/Antarctica/Davis delete mode 100644 lib/pytz/zoneinfo/Antarctica/DumontDUrville delete mode 100644 lib/pytz/zoneinfo/Antarctica/Macquarie delete mode 100644 lib/pytz/zoneinfo/Antarctica/Mawson delete mode 100644 lib/pytz/zoneinfo/Antarctica/McMurdo delete mode 100644 lib/pytz/zoneinfo/Antarctica/Palmer delete mode 100644 lib/pytz/zoneinfo/Antarctica/Rothera delete mode 100644 lib/pytz/zoneinfo/Antarctica/South_Pole delete mode 100644 lib/pytz/zoneinfo/Antarctica/Syowa delete mode 100644 lib/pytz/zoneinfo/Antarctica/Troll delete mode 100644 lib/pytz/zoneinfo/Antarctica/Vostok delete mode 100644 lib/pytz/zoneinfo/Arctic/Longyearbyen delete mode 100644 lib/pytz/zoneinfo/Asia/Aden delete mode 100644 lib/pytz/zoneinfo/Asia/Almaty delete mode 100644 lib/pytz/zoneinfo/Asia/Amman delete mode 100644 lib/pytz/zoneinfo/Asia/Anadyr delete mode 100644 lib/pytz/zoneinfo/Asia/Aqtau delete mode 100644 lib/pytz/zoneinfo/Asia/Aqtobe delete mode 100644 lib/pytz/zoneinfo/Asia/Ashgabat delete mode 100644 lib/pytz/zoneinfo/Asia/Ashkhabad delete mode 100644 lib/pytz/zoneinfo/Asia/Baghdad delete mode 100644 lib/pytz/zoneinfo/Asia/Bahrain delete mode 100644 lib/pytz/zoneinfo/Asia/Baku delete mode 100644 lib/pytz/zoneinfo/Asia/Bangkok delete mode 100644 lib/pytz/zoneinfo/Asia/Beirut delete mode 100644 lib/pytz/zoneinfo/Asia/Bishkek delete mode 100644 lib/pytz/zoneinfo/Asia/Brunei delete mode 100644 lib/pytz/zoneinfo/Asia/Calcutta delete mode 100644 lib/pytz/zoneinfo/Asia/Chita delete mode 100644 lib/pytz/zoneinfo/Asia/Choibalsan delete mode 100644 lib/pytz/zoneinfo/Asia/Chongqing delete mode 100644 lib/pytz/zoneinfo/Asia/Chungking delete mode 100644 lib/pytz/zoneinfo/Asia/Colombo delete mode 100644 lib/pytz/zoneinfo/Asia/Dacca delete mode 100644 lib/pytz/zoneinfo/Asia/Damascus delete mode 100644 lib/pytz/zoneinfo/Asia/Dhaka delete mode 100644 lib/pytz/zoneinfo/Asia/Dili delete mode 100644 lib/pytz/zoneinfo/Asia/Dubai delete mode 100644 lib/pytz/zoneinfo/Asia/Dushanbe delete mode 100644 lib/pytz/zoneinfo/Asia/Gaza delete mode 100644 lib/pytz/zoneinfo/Asia/Harbin delete mode 100644 lib/pytz/zoneinfo/Asia/Hebron delete mode 100644 lib/pytz/zoneinfo/Asia/Ho_Chi_Minh delete mode 100644 lib/pytz/zoneinfo/Asia/Hong_Kong delete mode 100644 lib/pytz/zoneinfo/Asia/Hovd delete mode 100644 lib/pytz/zoneinfo/Asia/Irkutsk delete mode 100644 lib/pytz/zoneinfo/Asia/Istanbul delete mode 100644 lib/pytz/zoneinfo/Asia/Jakarta delete mode 100644 lib/pytz/zoneinfo/Asia/Jayapura delete mode 100644 lib/pytz/zoneinfo/Asia/Jerusalem delete mode 100644 lib/pytz/zoneinfo/Asia/Kabul delete mode 100644 lib/pytz/zoneinfo/Asia/Kamchatka delete mode 100644 lib/pytz/zoneinfo/Asia/Karachi delete mode 100644 lib/pytz/zoneinfo/Asia/Kashgar delete mode 100644 lib/pytz/zoneinfo/Asia/Kathmandu delete mode 100644 lib/pytz/zoneinfo/Asia/Katmandu delete mode 100644 lib/pytz/zoneinfo/Asia/Khandyga delete mode 100644 lib/pytz/zoneinfo/Asia/Kolkata delete mode 100644 lib/pytz/zoneinfo/Asia/Krasnoyarsk delete mode 100644 lib/pytz/zoneinfo/Asia/Kuala_Lumpur delete mode 100644 lib/pytz/zoneinfo/Asia/Kuching delete mode 100644 lib/pytz/zoneinfo/Asia/Kuwait delete mode 100644 lib/pytz/zoneinfo/Asia/Macao delete mode 100644 lib/pytz/zoneinfo/Asia/Macau delete mode 100644 lib/pytz/zoneinfo/Asia/Magadan delete mode 100644 lib/pytz/zoneinfo/Asia/Makassar delete mode 100644 lib/pytz/zoneinfo/Asia/Manila delete mode 100644 lib/pytz/zoneinfo/Asia/Muscat delete mode 100644 lib/pytz/zoneinfo/Asia/Nicosia delete mode 100644 lib/pytz/zoneinfo/Asia/Novokuznetsk delete mode 100644 lib/pytz/zoneinfo/Asia/Novosibirsk delete mode 100644 lib/pytz/zoneinfo/Asia/Omsk delete mode 100644 lib/pytz/zoneinfo/Asia/Oral delete mode 100644 lib/pytz/zoneinfo/Asia/Phnom_Penh delete mode 100644 lib/pytz/zoneinfo/Asia/Pontianak delete mode 100644 lib/pytz/zoneinfo/Asia/Pyongyang delete mode 100644 lib/pytz/zoneinfo/Asia/Qatar delete mode 100644 lib/pytz/zoneinfo/Asia/Qyzylorda delete mode 100644 lib/pytz/zoneinfo/Asia/Rangoon delete mode 100644 lib/pytz/zoneinfo/Asia/Riyadh delete mode 100644 lib/pytz/zoneinfo/Asia/Saigon delete mode 100644 lib/pytz/zoneinfo/Asia/Sakhalin delete mode 100644 lib/pytz/zoneinfo/Asia/Samarkand delete mode 100644 lib/pytz/zoneinfo/Asia/Seoul delete mode 100644 lib/pytz/zoneinfo/Asia/Shanghai delete mode 100644 lib/pytz/zoneinfo/Asia/Singapore delete mode 100644 lib/pytz/zoneinfo/Asia/Srednekolymsk delete mode 100644 lib/pytz/zoneinfo/Asia/Taipei delete mode 100644 lib/pytz/zoneinfo/Asia/Tashkent delete mode 100644 lib/pytz/zoneinfo/Asia/Tbilisi delete mode 100644 lib/pytz/zoneinfo/Asia/Tehran delete mode 100644 lib/pytz/zoneinfo/Asia/Tel_Aviv delete mode 100644 lib/pytz/zoneinfo/Asia/Thimbu delete mode 100644 lib/pytz/zoneinfo/Asia/Thimphu delete mode 100644 lib/pytz/zoneinfo/Asia/Tokyo delete mode 100644 lib/pytz/zoneinfo/Asia/Ujung_Pandang delete mode 100644 lib/pytz/zoneinfo/Asia/Ulaanbaatar delete mode 100644 lib/pytz/zoneinfo/Asia/Ulan_Bator delete mode 100644 lib/pytz/zoneinfo/Asia/Urumqi delete mode 100644 lib/pytz/zoneinfo/Asia/Ust-Nera delete mode 100644 lib/pytz/zoneinfo/Asia/Vientiane delete mode 100644 lib/pytz/zoneinfo/Asia/Vladivostok delete mode 100644 lib/pytz/zoneinfo/Asia/Yakutsk delete mode 100644 lib/pytz/zoneinfo/Asia/Yekaterinburg delete mode 100644 lib/pytz/zoneinfo/Asia/Yerevan delete mode 100644 lib/pytz/zoneinfo/Atlantic/Azores delete mode 100644 lib/pytz/zoneinfo/Atlantic/Bermuda delete mode 100644 lib/pytz/zoneinfo/Atlantic/Canary delete mode 100644 lib/pytz/zoneinfo/Atlantic/Cape_Verde delete mode 100644 lib/pytz/zoneinfo/Atlantic/Faeroe delete mode 100644 lib/pytz/zoneinfo/Atlantic/Faroe delete mode 100644 lib/pytz/zoneinfo/Atlantic/Jan_Mayen delete mode 100644 lib/pytz/zoneinfo/Atlantic/Madeira delete mode 100644 lib/pytz/zoneinfo/Atlantic/Reykjavik delete mode 100644 lib/pytz/zoneinfo/Atlantic/South_Georgia delete mode 100644 lib/pytz/zoneinfo/Atlantic/St_Helena delete mode 100644 lib/pytz/zoneinfo/Atlantic/Stanley delete mode 100644 lib/pytz/zoneinfo/Australia/ACT delete mode 100644 lib/pytz/zoneinfo/Australia/Adelaide delete mode 100644 lib/pytz/zoneinfo/Australia/Brisbane delete mode 100644 lib/pytz/zoneinfo/Australia/Broken_Hill delete mode 100644 lib/pytz/zoneinfo/Australia/Canberra delete mode 100644 lib/pytz/zoneinfo/Australia/Currie delete mode 100644 lib/pytz/zoneinfo/Australia/Darwin delete mode 100644 lib/pytz/zoneinfo/Australia/Eucla delete mode 100644 lib/pytz/zoneinfo/Australia/Hobart delete mode 100644 lib/pytz/zoneinfo/Australia/LHI delete mode 100644 lib/pytz/zoneinfo/Australia/Lindeman delete mode 100644 lib/pytz/zoneinfo/Australia/Lord_Howe delete mode 100644 lib/pytz/zoneinfo/Australia/Melbourne delete mode 100644 lib/pytz/zoneinfo/Australia/NSW delete mode 100644 lib/pytz/zoneinfo/Australia/North delete mode 100644 lib/pytz/zoneinfo/Australia/Perth delete mode 100644 lib/pytz/zoneinfo/Australia/Queensland delete mode 100644 lib/pytz/zoneinfo/Australia/South delete mode 100644 lib/pytz/zoneinfo/Australia/Sydney delete mode 100644 lib/pytz/zoneinfo/Australia/Tasmania delete mode 100644 lib/pytz/zoneinfo/Australia/Victoria delete mode 100644 lib/pytz/zoneinfo/Australia/West delete mode 100644 lib/pytz/zoneinfo/Australia/Yancowinna delete mode 100644 lib/pytz/zoneinfo/Brazil/Acre delete mode 100644 lib/pytz/zoneinfo/Brazil/DeNoronha delete mode 100644 lib/pytz/zoneinfo/Brazil/East delete mode 100644 lib/pytz/zoneinfo/Brazil/West delete mode 100644 lib/pytz/zoneinfo/CET delete mode 100644 lib/pytz/zoneinfo/CST6CDT delete mode 100644 lib/pytz/zoneinfo/Canada/Atlantic delete mode 100644 lib/pytz/zoneinfo/Canada/Central delete mode 100644 lib/pytz/zoneinfo/Canada/East-Saskatchewan delete mode 100644 lib/pytz/zoneinfo/Canada/Eastern delete mode 100644 lib/pytz/zoneinfo/Canada/Mountain delete mode 100644 lib/pytz/zoneinfo/Canada/Newfoundland delete mode 100644 lib/pytz/zoneinfo/Canada/Pacific delete mode 100644 lib/pytz/zoneinfo/Canada/Saskatchewan delete mode 100644 lib/pytz/zoneinfo/Canada/Yukon delete mode 100644 lib/pytz/zoneinfo/Chile/Continental delete mode 100644 lib/pytz/zoneinfo/Chile/EasterIsland delete mode 100644 lib/pytz/zoneinfo/Cuba delete mode 100644 lib/pytz/zoneinfo/EET delete mode 100644 lib/pytz/zoneinfo/EST delete mode 100644 lib/pytz/zoneinfo/EST5EDT delete mode 100644 lib/pytz/zoneinfo/Egypt delete mode 100644 lib/pytz/zoneinfo/Eire delete mode 100644 lib/pytz/zoneinfo/Etc/GMT delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+0 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+1 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+10 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+11 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+12 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+2 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+3 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+4 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+5 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+6 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+7 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+8 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT+9 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-0 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-1 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-10 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-11 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-12 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-13 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-14 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-2 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-3 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-4 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-5 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-6 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-7 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-8 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT-9 delete mode 100644 lib/pytz/zoneinfo/Etc/GMT0 delete mode 100644 lib/pytz/zoneinfo/Etc/Greenwich delete mode 100644 lib/pytz/zoneinfo/Etc/UCT delete mode 100644 lib/pytz/zoneinfo/Etc/UTC delete mode 100644 lib/pytz/zoneinfo/Etc/Universal delete mode 100644 lib/pytz/zoneinfo/Etc/Zulu delete mode 100644 lib/pytz/zoneinfo/Europe/Amsterdam delete mode 100644 lib/pytz/zoneinfo/Europe/Andorra delete mode 100644 lib/pytz/zoneinfo/Europe/Athens delete mode 100644 lib/pytz/zoneinfo/Europe/Belfast delete mode 100644 lib/pytz/zoneinfo/Europe/Belgrade delete mode 100644 lib/pytz/zoneinfo/Europe/Berlin delete mode 100644 lib/pytz/zoneinfo/Europe/Bratislava delete mode 100644 lib/pytz/zoneinfo/Europe/Brussels delete mode 100644 lib/pytz/zoneinfo/Europe/Bucharest delete mode 100644 lib/pytz/zoneinfo/Europe/Budapest delete mode 100644 lib/pytz/zoneinfo/Europe/Busingen delete mode 100644 lib/pytz/zoneinfo/Europe/Chisinau delete mode 100644 lib/pytz/zoneinfo/Europe/Copenhagen delete mode 100644 lib/pytz/zoneinfo/Europe/Dublin delete mode 100644 lib/pytz/zoneinfo/Europe/Gibraltar delete mode 100644 lib/pytz/zoneinfo/Europe/Guernsey delete mode 100644 lib/pytz/zoneinfo/Europe/Helsinki delete mode 100644 lib/pytz/zoneinfo/Europe/Isle_of_Man delete mode 100644 lib/pytz/zoneinfo/Europe/Istanbul delete mode 100644 lib/pytz/zoneinfo/Europe/Jersey delete mode 100644 lib/pytz/zoneinfo/Europe/Kaliningrad delete mode 100644 lib/pytz/zoneinfo/Europe/Kiev delete mode 100644 lib/pytz/zoneinfo/Europe/Lisbon delete mode 100644 lib/pytz/zoneinfo/Europe/Ljubljana delete mode 100644 lib/pytz/zoneinfo/Europe/London delete mode 100644 lib/pytz/zoneinfo/Europe/Luxembourg delete mode 100644 lib/pytz/zoneinfo/Europe/Madrid delete mode 100644 lib/pytz/zoneinfo/Europe/Malta delete mode 100644 lib/pytz/zoneinfo/Europe/Mariehamn delete mode 100644 lib/pytz/zoneinfo/Europe/Minsk delete mode 100644 lib/pytz/zoneinfo/Europe/Monaco delete mode 100644 lib/pytz/zoneinfo/Europe/Moscow delete mode 100644 lib/pytz/zoneinfo/Europe/Nicosia delete mode 100644 lib/pytz/zoneinfo/Europe/Oslo delete mode 100644 lib/pytz/zoneinfo/Europe/Paris delete mode 100644 lib/pytz/zoneinfo/Europe/Podgorica delete mode 100644 lib/pytz/zoneinfo/Europe/Prague delete mode 100644 lib/pytz/zoneinfo/Europe/Riga delete mode 100644 lib/pytz/zoneinfo/Europe/Rome delete mode 100644 lib/pytz/zoneinfo/Europe/Samara delete mode 100644 lib/pytz/zoneinfo/Europe/San_Marino delete mode 100644 lib/pytz/zoneinfo/Europe/Sarajevo delete mode 100644 lib/pytz/zoneinfo/Europe/Simferopol delete mode 100644 lib/pytz/zoneinfo/Europe/Skopje delete mode 100644 lib/pytz/zoneinfo/Europe/Sofia delete mode 100644 lib/pytz/zoneinfo/Europe/Stockholm delete mode 100644 lib/pytz/zoneinfo/Europe/Tallinn delete mode 100644 lib/pytz/zoneinfo/Europe/Tirane delete mode 100644 lib/pytz/zoneinfo/Europe/Tiraspol delete mode 100644 lib/pytz/zoneinfo/Europe/Uzhgorod delete mode 100644 lib/pytz/zoneinfo/Europe/Vaduz delete mode 100644 lib/pytz/zoneinfo/Europe/Vatican delete mode 100644 lib/pytz/zoneinfo/Europe/Vienna delete mode 100644 lib/pytz/zoneinfo/Europe/Vilnius delete mode 100644 lib/pytz/zoneinfo/Europe/Volgograd delete mode 100644 lib/pytz/zoneinfo/Europe/Warsaw delete mode 100644 lib/pytz/zoneinfo/Europe/Zagreb delete mode 100644 lib/pytz/zoneinfo/Europe/Zaporozhye delete mode 100644 lib/pytz/zoneinfo/Europe/Zurich delete mode 100644 lib/pytz/zoneinfo/Factory delete mode 100644 lib/pytz/zoneinfo/GB delete mode 100644 lib/pytz/zoneinfo/GB-Eire delete mode 100644 lib/pytz/zoneinfo/GMT delete mode 100644 lib/pytz/zoneinfo/GMT+0 delete mode 100644 lib/pytz/zoneinfo/GMT-0 delete mode 100644 lib/pytz/zoneinfo/GMT0 delete mode 100644 lib/pytz/zoneinfo/Greenwich delete mode 100644 lib/pytz/zoneinfo/HST delete mode 100644 lib/pytz/zoneinfo/Hongkong delete mode 100644 lib/pytz/zoneinfo/Iceland delete mode 100644 lib/pytz/zoneinfo/Indian/Antananarivo delete mode 100644 lib/pytz/zoneinfo/Indian/Chagos delete mode 100644 lib/pytz/zoneinfo/Indian/Christmas delete mode 100644 lib/pytz/zoneinfo/Indian/Cocos delete mode 100644 lib/pytz/zoneinfo/Indian/Comoro delete mode 100644 lib/pytz/zoneinfo/Indian/Kerguelen delete mode 100644 lib/pytz/zoneinfo/Indian/Mahe delete mode 100644 lib/pytz/zoneinfo/Indian/Maldives delete mode 100644 lib/pytz/zoneinfo/Indian/Mauritius delete mode 100644 lib/pytz/zoneinfo/Indian/Mayotte delete mode 100644 lib/pytz/zoneinfo/Indian/Reunion delete mode 100644 lib/pytz/zoneinfo/Iran delete mode 100644 lib/pytz/zoneinfo/Israel delete mode 100644 lib/pytz/zoneinfo/Jamaica delete mode 100644 lib/pytz/zoneinfo/Japan delete mode 100644 lib/pytz/zoneinfo/Kwajalein delete mode 100644 lib/pytz/zoneinfo/Libya delete mode 100644 lib/pytz/zoneinfo/MET delete mode 100644 lib/pytz/zoneinfo/MST delete mode 100644 lib/pytz/zoneinfo/MST7MDT delete mode 100644 lib/pytz/zoneinfo/Mexico/BajaNorte delete mode 100644 lib/pytz/zoneinfo/Mexico/BajaSur delete mode 100644 lib/pytz/zoneinfo/Mexico/General delete mode 100644 lib/pytz/zoneinfo/NZ delete mode 100644 lib/pytz/zoneinfo/NZ-CHAT delete mode 100644 lib/pytz/zoneinfo/Navajo delete mode 100644 lib/pytz/zoneinfo/PRC delete mode 100644 lib/pytz/zoneinfo/PST8PDT delete mode 100644 lib/pytz/zoneinfo/Pacific/Apia delete mode 100644 lib/pytz/zoneinfo/Pacific/Auckland delete mode 100644 lib/pytz/zoneinfo/Pacific/Bougainville delete mode 100644 lib/pytz/zoneinfo/Pacific/Chatham delete mode 100644 lib/pytz/zoneinfo/Pacific/Chuuk delete mode 100644 lib/pytz/zoneinfo/Pacific/Easter delete mode 100644 lib/pytz/zoneinfo/Pacific/Efate delete mode 100644 lib/pytz/zoneinfo/Pacific/Enderbury delete mode 100644 lib/pytz/zoneinfo/Pacific/Fakaofo delete mode 100644 lib/pytz/zoneinfo/Pacific/Fiji delete mode 100644 lib/pytz/zoneinfo/Pacific/Funafuti delete mode 100644 lib/pytz/zoneinfo/Pacific/Galapagos delete mode 100644 lib/pytz/zoneinfo/Pacific/Gambier delete mode 100644 lib/pytz/zoneinfo/Pacific/Guadalcanal delete mode 100644 lib/pytz/zoneinfo/Pacific/Guam delete mode 100644 lib/pytz/zoneinfo/Pacific/Honolulu delete mode 100644 lib/pytz/zoneinfo/Pacific/Johnston delete mode 100644 lib/pytz/zoneinfo/Pacific/Kiritimati delete mode 100644 lib/pytz/zoneinfo/Pacific/Kosrae delete mode 100644 lib/pytz/zoneinfo/Pacific/Kwajalein delete mode 100644 lib/pytz/zoneinfo/Pacific/Majuro delete mode 100644 lib/pytz/zoneinfo/Pacific/Marquesas delete mode 100644 lib/pytz/zoneinfo/Pacific/Midway delete mode 100644 lib/pytz/zoneinfo/Pacific/Nauru delete mode 100644 lib/pytz/zoneinfo/Pacific/Niue delete mode 100644 lib/pytz/zoneinfo/Pacific/Norfolk delete mode 100644 lib/pytz/zoneinfo/Pacific/Noumea delete mode 100644 lib/pytz/zoneinfo/Pacific/Pago_Pago delete mode 100644 lib/pytz/zoneinfo/Pacific/Palau delete mode 100644 lib/pytz/zoneinfo/Pacific/Pitcairn delete mode 100644 lib/pytz/zoneinfo/Pacific/Pohnpei delete mode 100644 lib/pytz/zoneinfo/Pacific/Ponape delete mode 100644 lib/pytz/zoneinfo/Pacific/Port_Moresby delete mode 100644 lib/pytz/zoneinfo/Pacific/Rarotonga delete mode 100644 lib/pytz/zoneinfo/Pacific/Saipan delete mode 100644 lib/pytz/zoneinfo/Pacific/Samoa delete mode 100644 lib/pytz/zoneinfo/Pacific/Tahiti delete mode 100644 lib/pytz/zoneinfo/Pacific/Tarawa delete mode 100644 lib/pytz/zoneinfo/Pacific/Tongatapu delete mode 100644 lib/pytz/zoneinfo/Pacific/Truk delete mode 100644 lib/pytz/zoneinfo/Pacific/Wake delete mode 100644 lib/pytz/zoneinfo/Pacific/Wallis delete mode 100644 lib/pytz/zoneinfo/Pacific/Yap delete mode 100644 lib/pytz/zoneinfo/Poland delete mode 100644 lib/pytz/zoneinfo/Portugal delete mode 100644 lib/pytz/zoneinfo/ROC delete mode 100644 lib/pytz/zoneinfo/ROK delete mode 100644 lib/pytz/zoneinfo/Singapore delete mode 100644 lib/pytz/zoneinfo/Turkey delete mode 100644 lib/pytz/zoneinfo/UCT delete mode 100644 lib/pytz/zoneinfo/US/Alaska delete mode 100644 lib/pytz/zoneinfo/US/Aleutian delete mode 100644 lib/pytz/zoneinfo/US/Arizona delete mode 100644 lib/pytz/zoneinfo/US/Central delete mode 100644 lib/pytz/zoneinfo/US/East-Indiana delete mode 100644 lib/pytz/zoneinfo/US/Eastern delete mode 100644 lib/pytz/zoneinfo/US/Hawaii delete mode 100644 lib/pytz/zoneinfo/US/Indiana-Starke delete mode 100644 lib/pytz/zoneinfo/US/Michigan delete mode 100644 lib/pytz/zoneinfo/US/Mountain delete mode 100644 lib/pytz/zoneinfo/US/Pacific delete mode 100644 lib/pytz/zoneinfo/US/Pacific-New delete mode 100644 lib/pytz/zoneinfo/US/Samoa delete mode 100644 lib/pytz/zoneinfo/UTC delete mode 100644 lib/pytz/zoneinfo/Universal delete mode 100644 lib/pytz/zoneinfo/W-SU delete mode 100644 lib/pytz/zoneinfo/WET delete mode 100644 lib/pytz/zoneinfo/Zulu delete mode 100644 lib/pytz/zoneinfo/iso3166.tab delete mode 100644 lib/pytz/zoneinfo/localtime delete mode 100644 lib/pytz/zoneinfo/posixrules delete mode 100644 lib/pytz/zoneinfo/zone.tab delete mode 100644 lib/pytz/zoneinfo/zone1970.tab delete mode 100644 lib/requests/__init__.py delete mode 100644 lib/requests/adapters.py delete mode 100644 lib/requests/api.py delete mode 100644 lib/requests/auth.py delete mode 100644 lib/requests/cacert.pem delete mode 100644 lib/requests/certs.py delete mode 100644 lib/requests/compat.py delete mode 100644 lib/requests/cookies.py delete mode 100644 lib/requests/exceptions.py delete mode 100644 lib/requests/hooks.py delete mode 100644 lib/requests/models.py delete mode 100644 lib/requests/packages/README.rst delete mode 100644 lib/requests/packages/__init__.py delete mode 100644 lib/requests/packages/chardet/__init__.py delete mode 100644 lib/requests/packages/chardet/big5freq.py delete mode 100644 lib/requests/packages/chardet/big5prober.py delete mode 100755 lib/requests/packages/chardet/chardetect.py delete mode 100644 lib/requests/packages/chardet/chardistribution.py delete mode 100644 lib/requests/packages/chardet/charsetgroupprober.py delete mode 100644 lib/requests/packages/chardet/charsetprober.py delete mode 100644 lib/requests/packages/chardet/codingstatemachine.py delete mode 100644 lib/requests/packages/chardet/compat.py delete mode 100644 lib/requests/packages/chardet/constants.py delete mode 100644 lib/requests/packages/chardet/cp949prober.py delete mode 100644 lib/requests/packages/chardet/escprober.py delete mode 100644 lib/requests/packages/chardet/escsm.py delete mode 100644 lib/requests/packages/chardet/eucjpprober.py delete mode 100644 lib/requests/packages/chardet/euckrfreq.py delete mode 100644 lib/requests/packages/chardet/euckrprober.py delete mode 100644 lib/requests/packages/chardet/euctwfreq.py delete mode 100644 lib/requests/packages/chardet/euctwprober.py delete mode 100644 lib/requests/packages/chardet/gb2312freq.py delete mode 100644 lib/requests/packages/chardet/gb2312prober.py delete mode 100644 lib/requests/packages/chardet/hebrewprober.py delete mode 100644 lib/requests/packages/chardet/jisfreq.py delete mode 100644 lib/requests/packages/chardet/jpcntx.py delete mode 100644 lib/requests/packages/chardet/langbulgarianmodel.py delete mode 100644 lib/requests/packages/chardet/langcyrillicmodel.py delete mode 100644 lib/requests/packages/chardet/langgreekmodel.py delete mode 100644 lib/requests/packages/chardet/langhebrewmodel.py delete mode 100644 lib/requests/packages/chardet/langhungarianmodel.py delete mode 100644 lib/requests/packages/chardet/langthaimodel.py delete mode 100644 lib/requests/packages/chardet/latin1prober.py delete mode 100644 lib/requests/packages/chardet/mbcharsetprober.py delete mode 100644 lib/requests/packages/chardet/mbcsgroupprober.py delete mode 100644 lib/requests/packages/chardet/mbcssm.py delete mode 100644 lib/requests/packages/chardet/sbcharsetprober.py delete mode 100644 lib/requests/packages/chardet/sbcsgroupprober.py delete mode 100644 lib/requests/packages/chardet/sjisprober.py delete mode 100644 lib/requests/packages/chardet/universaldetector.py delete mode 100644 lib/requests/packages/chardet/utf8prober.py delete mode 100644 lib/requests/packages/urllib3/__init__.py delete mode 100644 lib/requests/packages/urllib3/_collections.py delete mode 100644 lib/requests/packages/urllib3/connection.py delete mode 100644 lib/requests/packages/urllib3/connectionpool.py delete mode 100644 lib/requests/packages/urllib3/contrib/__init__.py delete mode 100644 lib/requests/packages/urllib3/contrib/appengine.py delete mode 100644 lib/requests/packages/urllib3/contrib/ntlmpool.py delete mode 100644 lib/requests/packages/urllib3/contrib/pyopenssl.py delete mode 100644 lib/requests/packages/urllib3/exceptions.py delete mode 100644 lib/requests/packages/urllib3/fields.py delete mode 100644 lib/requests/packages/urllib3/filepost.py delete mode 100644 lib/requests/packages/urllib3/packages/__init__.py delete mode 100644 lib/requests/packages/urllib3/packages/ordered_dict.py delete mode 100644 lib/requests/packages/urllib3/packages/six.py delete mode 100644 lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py delete mode 100644 lib/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py delete mode 100644 lib/requests/packages/urllib3/poolmanager.py delete mode 100644 lib/requests/packages/urllib3/request.py delete mode 100644 lib/requests/packages/urllib3/response.py delete mode 100644 lib/requests/packages/urllib3/util/__init__.py delete mode 100644 lib/requests/packages/urllib3/util/connection.py delete mode 100644 lib/requests/packages/urllib3/util/request.py delete mode 100644 lib/requests/packages/urllib3/util/response.py delete mode 100644 lib/requests/packages/urllib3/util/retry.py delete mode 100644 lib/requests/packages/urllib3/util/ssl_.py delete mode 100644 lib/requests/packages/urllib3/util/timeout.py delete mode 100644 lib/requests/packages/urllib3/util/url.py delete mode 100644 lib/requests/sessions.py delete mode 100644 lib/requests/status_codes.py delete mode 100644 lib/requests/structures.py delete mode 100644 lib/requests/utils.py delete mode 100644 lib/six.py delete mode 100644 lib/tzlocal/LICENSE.txt delete mode 100644 lib/tzlocal/README.rst delete mode 100644 lib/tzlocal/__init__.py delete mode 100644 lib/tzlocal/darwin.py delete mode 100644 lib/tzlocal/tests.py delete mode 100644 lib/tzlocal/unix.py delete mode 100644 lib/tzlocal/win32.py delete mode 100644 lib/tzlocal/windows_tz.py delete mode 100644 lib/unidecode/LICENSE delete mode 100644 lib/unidecode/README delete mode 100644 lib/unidecode/__init__.py delete mode 100644 lib/unidecode/x000.py delete mode 100644 lib/unidecode/x001.py delete mode 100644 lib/unidecode/x002.py delete mode 100644 lib/unidecode/x003.py delete mode 100644 lib/unidecode/x004.py delete mode 100644 lib/unidecode/x005.py delete mode 100644 lib/unidecode/x006.py delete mode 100644 lib/unidecode/x007.py delete mode 100644 lib/unidecode/x009.py delete mode 100644 lib/unidecode/x00a.py delete mode 100644 lib/unidecode/x00b.py delete mode 100644 lib/unidecode/x00c.py delete mode 100644 lib/unidecode/x00d.py delete mode 100644 lib/unidecode/x00e.py delete mode 100644 lib/unidecode/x00f.py delete mode 100644 lib/unidecode/x010.py delete mode 100644 lib/unidecode/x011.py delete mode 100644 lib/unidecode/x012.py delete mode 100644 lib/unidecode/x013.py delete mode 100644 lib/unidecode/x014.py delete mode 100644 lib/unidecode/x015.py delete mode 100644 lib/unidecode/x016.py delete mode 100644 lib/unidecode/x017.py delete mode 100644 lib/unidecode/x018.py delete mode 100644 lib/unidecode/x01d.py delete mode 100644 lib/unidecode/x01e.py delete mode 100644 lib/unidecode/x01f.py delete mode 100644 lib/unidecode/x020.py delete mode 100644 lib/unidecode/x021.py delete mode 100644 lib/unidecode/x022.py delete mode 100644 lib/unidecode/x023.py delete mode 100644 lib/unidecode/x024.py delete mode 100644 lib/unidecode/x025.py delete mode 100644 lib/unidecode/x026.py delete mode 100644 lib/unidecode/x027.py delete mode 100644 lib/unidecode/x028.py delete mode 100644 lib/unidecode/x029.py delete mode 100644 lib/unidecode/x02a.py delete mode 100644 lib/unidecode/x02c.py delete mode 100644 lib/unidecode/x02e.py delete mode 100644 lib/unidecode/x02f.py delete mode 100644 lib/unidecode/x030.py delete mode 100644 lib/unidecode/x031.py delete mode 100644 lib/unidecode/x032.py delete mode 100644 lib/unidecode/x033.py delete mode 100644 lib/unidecode/x04d.py delete mode 100644 lib/unidecode/x04e.py delete mode 100644 lib/unidecode/x04f.py delete mode 100644 lib/unidecode/x050.py delete mode 100644 lib/unidecode/x051.py delete mode 100644 lib/unidecode/x052.py delete mode 100644 lib/unidecode/x053.py delete mode 100644 lib/unidecode/x054.py delete mode 100644 lib/unidecode/x055.py delete mode 100644 lib/unidecode/x056.py delete mode 100644 lib/unidecode/x057.py delete mode 100644 lib/unidecode/x058.py delete mode 100644 lib/unidecode/x059.py delete mode 100644 lib/unidecode/x05a.py delete mode 100644 lib/unidecode/x05b.py delete mode 100644 lib/unidecode/x05c.py delete mode 100644 lib/unidecode/x05d.py delete mode 100644 lib/unidecode/x05e.py delete mode 100644 lib/unidecode/x05f.py delete mode 100644 lib/unidecode/x060.py delete mode 100644 lib/unidecode/x061.py delete mode 100644 lib/unidecode/x062.py delete mode 100644 lib/unidecode/x063.py delete mode 100644 lib/unidecode/x064.py delete mode 100644 lib/unidecode/x065.py delete mode 100644 lib/unidecode/x066.py delete mode 100644 lib/unidecode/x067.py delete mode 100644 lib/unidecode/x068.py delete mode 100644 lib/unidecode/x069.py delete mode 100644 lib/unidecode/x06a.py delete mode 100644 lib/unidecode/x06b.py delete mode 100644 lib/unidecode/x06c.py delete mode 100644 lib/unidecode/x06d.py delete mode 100644 lib/unidecode/x06e.py delete mode 100644 lib/unidecode/x06f.py delete mode 100644 lib/unidecode/x070.py delete mode 100644 lib/unidecode/x071.py delete mode 100644 lib/unidecode/x072.py delete mode 100644 lib/unidecode/x073.py delete mode 100644 lib/unidecode/x074.py delete mode 100644 lib/unidecode/x075.py delete mode 100644 lib/unidecode/x076.py delete mode 100644 lib/unidecode/x077.py delete mode 100644 lib/unidecode/x078.py delete mode 100644 lib/unidecode/x079.py delete mode 100644 lib/unidecode/x07a.py delete mode 100644 lib/unidecode/x07b.py delete mode 100644 lib/unidecode/x07c.py delete mode 100644 lib/unidecode/x07d.py delete mode 100644 lib/unidecode/x07e.py delete mode 100644 lib/unidecode/x07f.py delete mode 100644 lib/unidecode/x080.py delete mode 100644 lib/unidecode/x081.py delete mode 100644 lib/unidecode/x082.py delete mode 100644 lib/unidecode/x083.py delete mode 100644 lib/unidecode/x084.py delete mode 100644 lib/unidecode/x085.py delete mode 100644 lib/unidecode/x086.py delete mode 100644 lib/unidecode/x087.py delete mode 100644 lib/unidecode/x088.py delete mode 100644 lib/unidecode/x089.py delete mode 100644 lib/unidecode/x08a.py delete mode 100644 lib/unidecode/x08b.py delete mode 100644 lib/unidecode/x08c.py delete mode 100644 lib/unidecode/x08d.py delete mode 100644 lib/unidecode/x08e.py delete mode 100644 lib/unidecode/x08f.py delete mode 100644 lib/unidecode/x090.py delete mode 100644 lib/unidecode/x091.py delete mode 100644 lib/unidecode/x092.py delete mode 100644 lib/unidecode/x093.py delete mode 100644 lib/unidecode/x094.py delete mode 100644 lib/unidecode/x095.py delete mode 100644 lib/unidecode/x096.py delete mode 100644 lib/unidecode/x097.py delete mode 100644 lib/unidecode/x098.py delete mode 100644 lib/unidecode/x099.py delete mode 100644 lib/unidecode/x09a.py delete mode 100644 lib/unidecode/x09b.py delete mode 100644 lib/unidecode/x09c.py delete mode 100644 lib/unidecode/x09d.py delete mode 100644 lib/unidecode/x09e.py delete mode 100644 lib/unidecode/x09f.py delete mode 100644 lib/unidecode/x0a0.py delete mode 100644 lib/unidecode/x0a1.py delete mode 100644 lib/unidecode/x0a2.py delete mode 100644 lib/unidecode/x0a3.py delete mode 100644 lib/unidecode/x0a4.py delete mode 100644 lib/unidecode/x0ac.py delete mode 100644 lib/unidecode/x0ad.py delete mode 100644 lib/unidecode/x0ae.py delete mode 100644 lib/unidecode/x0af.py delete mode 100644 lib/unidecode/x0b0.py delete mode 100644 lib/unidecode/x0b1.py delete mode 100644 lib/unidecode/x0b2.py delete mode 100644 lib/unidecode/x0b3.py delete mode 100644 lib/unidecode/x0b4.py delete mode 100644 lib/unidecode/x0b5.py delete mode 100644 lib/unidecode/x0b6.py delete mode 100644 lib/unidecode/x0b7.py delete mode 100644 lib/unidecode/x0b8.py delete mode 100644 lib/unidecode/x0b9.py delete mode 100644 lib/unidecode/x0ba.py delete mode 100644 lib/unidecode/x0bb.py delete mode 100644 lib/unidecode/x0bc.py delete mode 100644 lib/unidecode/x0bd.py delete mode 100644 lib/unidecode/x0be.py delete mode 100644 lib/unidecode/x0bf.py delete mode 100644 lib/unidecode/x0c0.py delete mode 100644 lib/unidecode/x0c1.py delete mode 100644 lib/unidecode/x0c2.py delete mode 100644 lib/unidecode/x0c3.py delete mode 100644 lib/unidecode/x0c4.py delete mode 100644 lib/unidecode/x0c5.py delete mode 100644 lib/unidecode/x0c6.py delete mode 100644 lib/unidecode/x0c7.py delete mode 100644 lib/unidecode/x0c8.py delete mode 100644 lib/unidecode/x0c9.py delete mode 100644 lib/unidecode/x0ca.py delete mode 100644 lib/unidecode/x0cb.py delete mode 100644 lib/unidecode/x0cc.py delete mode 100644 lib/unidecode/x0cd.py delete mode 100644 lib/unidecode/x0ce.py delete mode 100644 lib/unidecode/x0cf.py delete mode 100644 lib/unidecode/x0d0.py delete mode 100644 lib/unidecode/x0d1.py delete mode 100644 lib/unidecode/x0d2.py delete mode 100644 lib/unidecode/x0d3.py delete mode 100644 lib/unidecode/x0d4.py delete mode 100644 lib/unidecode/x0d5.py delete mode 100644 lib/unidecode/x0d6.py delete mode 100644 lib/unidecode/x0d7.py delete mode 100644 lib/unidecode/x0f9.py delete mode 100644 lib/unidecode/x0fa.py delete mode 100644 lib/unidecode/x0fb.py delete mode 100644 lib/unidecode/x0fc.py delete mode 100644 lib/unidecode/x0fd.py delete mode 100644 lib/unidecode/x0fe.py delete mode 100644 lib/unidecode/x0ff.py delete mode 100644 lib/unidecode/x1d4.py delete mode 100644 lib/unidecode/x1d5.py delete mode 100644 lib/unidecode/x1d6.py delete mode 100644 lib/unidecode/x1d7.py delete mode 100644 lib/yaml/__init__.py delete mode 100644 lib/yaml/composer.py delete mode 100644 lib/yaml/constructor.py delete mode 100644 lib/yaml/cyaml.py delete mode 100644 lib/yaml/dumper.py delete mode 100644 lib/yaml/emitter.py delete mode 100644 lib/yaml/error.py delete mode 100644 lib/yaml/events.py delete mode 100644 lib/yaml/loader.py delete mode 100644 lib/yaml/nodes.py delete mode 100644 lib/yaml/parser.py delete mode 100644 lib/yaml/reader.py delete mode 100644 lib/yaml/representer.py delete mode 100644 lib/yaml/resolver.py delete mode 100644 lib/yaml/scanner.py delete mode 100644 lib/yaml/serializer.py delete mode 100644 lib/yaml/tokens.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 2c038baf..ec6cbed8 100644 --- a/.gitignore +++ b/.gitignore @@ -171,3 +171,7 @@ _ReSharper*/ /logs .project .pydevproject +.vs/ + +#Python virtual environment +env/ \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index ffed186b..cc0fb495 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,21 +1,24 @@ -# Travis CI configuration file -# http://about.travis-ci.org/docs/ - language: python - -# Available Python versions: -# http://about.travis-ci.org/docs/user/ci-environment/#Python-VM-images python: - - "2.6" - - "2.7" -# pylint 1.4 does not run under python 2.6 + - '2.6' + - '2.7' install: - - pip install pyOpenSSL - - pip install pylint==1.3.1 - - pip install pyflakes - - pip install pep8 + - pip install -r requirements.txt + - pip install pylint + - pip install pyflakes + - pip install pep8 script: - pep8 headphones - - pylint --rcfile=pylintrc headphones - pyflakes headphones - - nosetests headphones +before_deploy: + - pip install -r requirements.txt -t lib/ + - find . -name "*.pyc" -delete + - find ./lib/ -path "*/*\-info/*" -delete + - zip -r -9 headphones.zip data/ headphones/ init-scripts/ lib/ Headphones.py +deploy: + provider: releases + api_key: + secure: + file: headphones.zip + on: + tags: true diff --git a/lib/bencode.py b/headphones/bencode.py similarity index 92% rename from lib/bencode.py rename to headphones/bencode.py index 3181ee1c..4fa1db71 100644 --- a/lib/bencode.py +++ b/headphones/bencode.py @@ -14,6 +14,9 @@ # Minor modifications made by Andrew Resch to replace the BTFailure errors with Exceptions +from types import StringType, IntType, LongType, DictType, ListType, TupleType + + def decode_int(x, f): f += 1 newf = x.index('e', f) @@ -21,32 +24,36 @@ def decode_int(x, f): if x[f] == '-': if x[f + 1] == '0': raise ValueError - elif x[f] == '0' and newf != f+1: + elif x[f] == '0' and newf != f + 1: raise ValueError - return (n, newf+1) + return (n, newf + 1) + def decode_string(x, f): colon = x.index(':', f) n = int(x[f:colon]) - if x[f] == '0' and colon != f+1: + if x[f] == '0' and colon != f + 1: raise ValueError colon += 1 - return (x[colon:colon+n], colon+n) + return (x[colon:colon + n], colon + n) + def decode_list(x, f): - r, f = [], f+1 + r, f = [], f + 1 while x[f] != 'e': v, f = decode_func[x[f]](x, f) r.append(v) return (r, f + 1) + def decode_dict(x, f): - r, f = {}, f+1 + r, f = {}, f + 1 while x[f] != 'e': k, f = decode_string(x, f) r[k], f = decode_func[x[f]](x, f) return (r, f + 1) + decode_func = {} decode_func['l'] = decode_list decode_func['d'] = decode_dict @@ -62,6 +69,7 @@ decode_func['7'] = decode_string decode_func['8'] = decode_string decode_func['9'] = decode_string + def bdecode(x): try: r, l = decode_func[x[0]](x, 0) @@ -70,38 +78,41 @@ def bdecode(x): return r -from types import StringType, IntType, LongType, DictType, ListType, TupleType - class Bencached(object): - __slots__ = ['bencoded'] def __init__(self, s): self.bencoded = s -def encode_bencached(x,r): + +def encode_bencached(x, r): r.append(x.bencoded) + def encode_int(x, r): r.extend(('i', str(x), 'e')) + def encode_bool(x, r): if x: encode_int(1, r) else: encode_int(0, r) + def encode_string(x, r): r.extend((str(len(x)), ':', x)) + def encode_list(x, r): r.append('l') for i in x: encode_func[type(i)](i, r) r.append('e') -def encode_dict(x,r): + +def encode_dict(x, r): r.append('d') ilist = x.items() ilist.sort() @@ -110,6 +121,7 @@ def encode_dict(x,r): encode_func[type(v)](v, r) r.append('e') + encode_func = {} encode_func[Bencached] = encode_bencached encode_func[IntType] = encode_int @@ -121,10 +133,12 @@ encode_func[DictType] = encode_dict try: from types import BooleanType + encode_func[BooleanType] = encode_bool except ImportError: pass + def bencode(x): r = [] encode_func[type(x)](x, r) diff --git a/lib/certgen.py b/headphones/certgen.py similarity index 98% rename from lib/certgen.py rename to headphones/certgen.py index 1b941161..079424b8 100644 --- a/lib/certgen.py +++ b/headphones/certgen.py @@ -8,9 +8,10 @@ Certificate generation module. """ -from OpenSSL import crypto import time +from OpenSSL import crypto + TYPE_RSA = crypto.TYPE_RSA TYPE_DSA = crypto.TYPE_DSA @@ -29,6 +30,7 @@ def createKeyPair(type, bits): pkey.generate_key(type, bits) return pkey + def createCertRequest(pkey, digest="md5", **name): """ Create a certificate request. @@ -49,13 +51,14 @@ def createCertRequest(pkey, digest="md5", **name): req = crypto.X509Req() subj = req.get_subject() - for (key,value) in name.items(): + for (key, value) in name.items(): setattr(subj, key, value) req.set_pubkey(pkey) req.sign(pkey, digest) return req + def createCertificate(req, (issuerCert, issuerKey), serial, (notBefore, notAfter), digest="md5"): """ Generate a certificate given a certificate request. diff --git a/lib/MultipartPostHandler.py b/lib/MultipartPostHandler.py deleted file mode 100644 index ef63afa2..00000000 --- a/lib/MultipartPostHandler.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/python - -#### -# 06/2010 Nic Wolfe -# 02/2006 Will Holcomb -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# - -import urllib -import urllib2 -import mimetools, mimetypes -import os, sys - -# Controls how sequences are uncoded. If true, elements may be given multiple values by -# assigning a sequence. -doseq = 1 - -class MultipartPostHandler(urllib2.BaseHandler): - handler_order = urllib2.HTTPHandler.handler_order - 10 # needs to run first - - def http_request(self, request): - data = request.get_data() - if data is not None and type(data) != str: - v_files = [] - v_vars = [] - try: - for(key, value) in data.items(): - if type(value) in (file, list, tuple): - v_files.append((key, value)) - else: - v_vars.append((key, value)) - except TypeError: - systype, value, traceback = sys.exc_info() - raise TypeError, "not a valid non-string sequence or mapping object", traceback - - if len(v_files) == 0: - data = urllib.urlencode(v_vars, doseq) - else: - boundary, data = MultipartPostHandler.multipart_encode(v_vars, v_files) - contenttype = 'multipart/form-data; boundary=%s' % boundary - if(request.has_header('Content-Type') - and request.get_header('Content-Type').find('multipart/form-data') != 0): - print "Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data') - request.add_unredirected_header('Content-Type', contenttype) - - request.add_data(data) - return request - - @staticmethod - def multipart_encode(vars, files, boundary = None, buffer = None): - if boundary is None: - boundary = mimetools.choose_boundary() - if buffer is None: - buffer = '' - for(key, value) in vars: - buffer += '--%s\r\n' % boundary - buffer += 'Content-Disposition: form-data; name="%s"' % key - buffer += '\r\n\r\n' + value + '\r\n' - for(key, fd) in files: - - # allow them to pass in a file or a tuple with name & data - if type(fd) == file: - name_in = fd.name - fd.seek(0) - data_in = fd.read() - elif type(fd) in (tuple, list): - name_in, data_in = fd - - filename = os.path.basename(name_in) - contenttype = mimetypes.guess_type(filename)[0] or 'application/octet-stream' - buffer += '--%s\r\n' % boundary - buffer += 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename) - buffer += 'Content-Type: %s\r\n' % contenttype - # buffer += 'Content-Length: %s\r\n' % file_size - buffer += '\r\n' + data_in + '\r\n' - buffer += '--%s--\r\n\r\n' % boundary - return boundary, buffer - - https_request = http_request \ No newline at end of file diff --git a/lib/apscheduler/__init__.py b/lib/apscheduler/__init__.py deleted file mode 100644 index 283002b7..00000000 --- a/lib/apscheduler/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -version_info = (3, 0, 1) -version = '3.0.1' -release = '3.0.1' - -__version__ = release # PEP 396 diff --git a/lib/apscheduler/events.py b/lib/apscheduler/events.py deleted file mode 100644 index 9418263d..00000000 --- a/lib/apscheduler/events.py +++ /dev/null @@ -1,73 +0,0 @@ -__all__ = ('EVENT_SCHEDULER_START', 'EVENT_SCHEDULER_SHUTDOWN', 'EVENT_EXECUTOR_ADDED', 'EVENT_EXECUTOR_REMOVED', - 'EVENT_JOBSTORE_ADDED', 'EVENT_JOBSTORE_REMOVED', 'EVENT_ALL_JOBS_REMOVED', 'EVENT_JOB_ADDED', - 'EVENT_JOB_REMOVED', 'EVENT_JOB_MODIFIED', 'EVENT_JOB_EXECUTED', 'EVENT_JOB_ERROR', 'EVENT_JOB_MISSED', - 'SchedulerEvent', 'JobEvent', 'JobExecutionEvent') - - -EVENT_SCHEDULER_START = 1 -EVENT_SCHEDULER_SHUTDOWN = 2 -EVENT_EXECUTOR_ADDED = 4 -EVENT_EXECUTOR_REMOVED = 8 -EVENT_JOBSTORE_ADDED = 16 -EVENT_JOBSTORE_REMOVED = 32 -EVENT_ALL_JOBS_REMOVED = 64 -EVENT_JOB_ADDED = 128 -EVENT_JOB_REMOVED = 256 -EVENT_JOB_MODIFIED = 512 -EVENT_JOB_EXECUTED = 1024 -EVENT_JOB_ERROR = 2048 -EVENT_JOB_MISSED = 4096 -EVENT_ALL = (EVENT_SCHEDULER_START | EVENT_SCHEDULER_SHUTDOWN | EVENT_JOBSTORE_ADDED | EVENT_JOBSTORE_REMOVED | - EVENT_JOB_ADDED | EVENT_JOB_REMOVED | EVENT_JOB_MODIFIED | EVENT_JOB_EXECUTED | - EVENT_JOB_ERROR | EVENT_JOB_MISSED) - - -class SchedulerEvent(object): - """ - An event that concerns the scheduler itself. - - :ivar code: the type code of this event - :ivar alias: alias of the job store or executor that was added or removed (if applicable) - """ - - def __init__(self, code, alias=None): - super(SchedulerEvent, self).__init__() - self.code = code - self.alias = alias - - def __repr__(self): - return '<%s (code=%d)>' % (self.__class__.__name__, self.code) - - -class JobEvent(SchedulerEvent): - """ - An event that concerns a job. - - :ivar code: the type code of this event - :ivar job_id: identifier of the job in question - :ivar jobstore: alias of the job store containing the job in question - """ - - def __init__(self, code, job_id, jobstore): - super(JobEvent, self).__init__(code) - self.code = code - self.job_id = job_id - self.jobstore = jobstore - - -class JobExecutionEvent(JobEvent): - """ - An event that concerns the execution of individual jobs. - - :ivar scheduled_run_time: the time when the job was scheduled to be run - :ivar retval: the return value of the successfully executed job - :ivar exception: the exception raised by the job - :ivar traceback: a formatted traceback for the exception - """ - - def __init__(self, code, job_id, jobstore, scheduled_run_time, retval=None, exception=None, traceback=None): - super(JobExecutionEvent, self).__init__(code, job_id, jobstore) - self.scheduled_run_time = scheduled_run_time - self.retval = retval - self.exception = exception - self.traceback = traceback diff --git a/lib/apscheduler/executors/__init__.py b/lib/apscheduler/executors/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/apscheduler/executors/asyncio.py b/lib/apscheduler/executors/asyncio.py deleted file mode 100644 index fade99f8..00000000 --- a/lib/apscheduler/executors/asyncio.py +++ /dev/null @@ -1,28 +0,0 @@ -from __future__ import absolute_import -import sys - -from apscheduler.executors.base import BaseExecutor, run_job - - -class AsyncIOExecutor(BaseExecutor): - """ - Runs jobs in the default executor of the event loop. - - Plugin alias: ``asyncio`` - """ - - def start(self, scheduler, alias): - super(AsyncIOExecutor, self).start(scheduler, alias) - self._eventloop = scheduler._eventloop - - def _do_submit_job(self, job, run_times): - def callback(f): - try: - events = f.result() - except: - self._run_job_error(job.id, *sys.exc_info()[1:]) - else: - self._run_job_success(job.id, events) - - f = self._eventloop.run_in_executor(None, run_job, job, job._jobstore_alias, run_times, self._logger.name) - f.add_done_callback(callback) diff --git a/lib/apscheduler/executors/base.py b/lib/apscheduler/executors/base.py deleted file mode 100644 index 5a0a19eb..00000000 --- a/lib/apscheduler/executors/base.py +++ /dev/null @@ -1,119 +0,0 @@ -from abc import ABCMeta, abstractmethod -from collections import defaultdict -from datetime import datetime, timedelta -from traceback import format_tb -import logging -import sys - -from pytz import utc -import six - -from apscheduler.events import JobExecutionEvent, EVENT_JOB_MISSED, EVENT_JOB_ERROR, EVENT_JOB_EXECUTED - - -class MaxInstancesReachedError(Exception): - def __init__(self, job): - super(MaxInstancesReachedError, self).__init__( - 'Job "%s" has already reached its maximum number of instances (%d)' % (job.id, job.max_instances)) - - -class BaseExecutor(six.with_metaclass(ABCMeta, object)): - """Abstract base class that defines the interface that every executor must implement.""" - - _scheduler = None - _lock = None - _logger = logging.getLogger('apscheduler.executors') - - def __init__(self): - super(BaseExecutor, self).__init__() - self._instances = defaultdict(lambda: 0) - - def start(self, scheduler, alias): - """ - Called by the scheduler when the scheduler is being started or when the executor is being added to an already - running scheduler. - - :param apscheduler.schedulers.base.BaseScheduler scheduler: the scheduler that is starting this executor - :param str|unicode alias: alias of this executor as it was assigned to the scheduler - """ - - self._scheduler = scheduler - self._lock = scheduler._create_lock() - self._logger = logging.getLogger('apscheduler.executors.%s' % alias) - - def shutdown(self, wait=True): - """ - Shuts down this executor. - - :param bool wait: ``True`` to wait until all submitted jobs have been executed - """ - - def submit_job(self, job, run_times): - """ - Submits job for execution. - - :param Job job: job to execute - :param list[datetime] run_times: list of datetimes specifying when the job should have been run - :raises MaxInstancesReachedError: if the maximum number of allowed instances for this job has been reached - """ - - assert self._lock is not None, 'This executor has not been started yet' - with self._lock: - if self._instances[job.id] >= job.max_instances: - raise MaxInstancesReachedError(job) - - self._do_submit_job(job, run_times) - self._instances[job.id] += 1 - - @abstractmethod - def _do_submit_job(self, job, run_times): - """Performs the actual task of scheduling `run_job` to be called.""" - - def _run_job_success(self, job_id, events): - """Called by the executor with the list of generated events when `run_job` has been successfully called.""" - - with self._lock: - self._instances[job_id] -= 1 - - for event in events: - self._scheduler._dispatch_event(event) - - def _run_job_error(self, job_id, exc, traceback=None): - """Called by the executor with the exception if there is an error calling `run_job`.""" - - with self._lock: - self._instances[job_id] -= 1 - - exc_info = (exc.__class__, exc, traceback) - self._logger.error('Error running job %s', job_id, exc_info=exc_info) - - -def run_job(job, jobstore_alias, run_times, logger_name): - """Called by executors to run the job. Returns a list of scheduler events to be dispatched by the scheduler.""" - - events = [] - logger = logging.getLogger(logger_name) - for run_time in run_times: - # See if the job missed its run time window, and handle possible misfires accordingly - if job.misfire_grace_time is not None: - difference = datetime.now(utc) - run_time - grace_time = timedelta(seconds=job.misfire_grace_time) - if difference > grace_time: - events.append(JobExecutionEvent(EVENT_JOB_MISSED, job.id, jobstore_alias, run_time)) - logger.warning('Run time of job "%s" was missed by %s', job, difference) - continue - - logger.info('Running job "%s" (scheduled at %s)', job, run_time) - try: - retval = job.func(*job.args, **job.kwargs) - except: - exc, tb = sys.exc_info()[1:] - formatted_tb = ''.join(format_tb(tb)) - events.append(JobExecutionEvent(EVENT_JOB_ERROR, job.id, jobstore_alias, run_time, exception=exc, - traceback=formatted_tb)) - logger.exception('Job "%s" raised an exception', job) - else: - events.append(JobExecutionEvent(EVENT_JOB_EXECUTED, job.id, jobstore_alias, run_time, retval=retval)) - logger.info('Job "%s" executed successfully', job) - - return events diff --git a/lib/apscheduler/executors/debug.py b/lib/apscheduler/executors/debug.py deleted file mode 100644 index 1f6f6b1a..00000000 --- a/lib/apscheduler/executors/debug.py +++ /dev/null @@ -1,19 +0,0 @@ -import sys - -from apscheduler.executors.base import BaseExecutor, run_job - - -class DebugExecutor(BaseExecutor): - """ - A special executor that executes the target callable directly instead of deferring it to a thread or process. - - Plugin alias: ``debug`` - """ - - def _do_submit_job(self, job, run_times): - try: - events = run_job(job, job._jobstore_alias, run_times, self._logger.name) - except: - self._run_job_error(job.id, *sys.exc_info()[1:]) - else: - self._run_job_success(job.id, events) diff --git a/lib/apscheduler/executors/gevent.py b/lib/apscheduler/executors/gevent.py deleted file mode 100644 index 9f4db2fc..00000000 --- a/lib/apscheduler/executors/gevent.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import absolute_import -import sys - -from apscheduler.executors.base import BaseExecutor, run_job - - -try: - import gevent -except ImportError: # pragma: nocover - raise ImportError('GeventExecutor requires gevent installed') - - -class GeventExecutor(BaseExecutor): - """ - Runs jobs as greenlets. - - Plugin alias: ``gevent`` - """ - - def _do_submit_job(self, job, run_times): - def callback(greenlet): - try: - events = greenlet.get() - except: - self._run_job_error(job.id, *sys.exc_info()[1:]) - else: - self._run_job_success(job.id, events) - - gevent.spawn(run_job, job, job._jobstore_alias, run_times, self._logger.name).link(callback) diff --git a/lib/apscheduler/executors/pool.py b/lib/apscheduler/executors/pool.py deleted file mode 100644 index 2f4ef455..00000000 --- a/lib/apscheduler/executors/pool.py +++ /dev/null @@ -1,54 +0,0 @@ -from abc import abstractmethod -import concurrent.futures - -from apscheduler.executors.base import BaseExecutor, run_job - - -class BasePoolExecutor(BaseExecutor): - @abstractmethod - def __init__(self, pool): - super(BasePoolExecutor, self).__init__() - self._pool = pool - - def _do_submit_job(self, job, run_times): - def callback(f): - exc, tb = (f.exception_info() if hasattr(f, 'exception_info') else - (f.exception(), getattr(f.exception(), '__traceback__', None))) - if exc: - self._run_job_error(job.id, exc, tb) - else: - self._run_job_success(job.id, f.result()) - - f = self._pool.submit(run_job, job, job._jobstore_alias, run_times, self._logger.name) - f.add_done_callback(callback) - - def shutdown(self, wait=True): - self._pool.shutdown(wait) - - -class ThreadPoolExecutor(BasePoolExecutor): - """ - An executor that runs jobs in a concurrent.futures thread pool. - - Plugin alias: ``threadpool`` - - :param max_workers: the maximum number of spawned threads. - """ - - def __init__(self, max_workers=10): - pool = concurrent.futures.ThreadPoolExecutor(int(max_workers)) - super(ThreadPoolExecutor, self).__init__(pool) - - -class ProcessPoolExecutor(BasePoolExecutor): - """ - An executor that runs jobs in a concurrent.futures process pool. - - Plugin alias: ``processpool`` - - :param max_workers: the maximum number of spawned processes. - """ - - def __init__(self, max_workers=10): - pool = concurrent.futures.ProcessPoolExecutor(int(max_workers)) - super(ProcessPoolExecutor, self).__init__(pool) diff --git a/lib/apscheduler/executors/twisted.py b/lib/apscheduler/executors/twisted.py deleted file mode 100644 index 29217221..00000000 --- a/lib/apscheduler/executors/twisted.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import absolute_import - -from apscheduler.executors.base import BaseExecutor, run_job - - -class TwistedExecutor(BaseExecutor): - """ - Runs jobs in the reactor's thread pool. - - Plugin alias: ``twisted`` - """ - - def start(self, scheduler, alias): - super(TwistedExecutor, self).start(scheduler, alias) - self._reactor = scheduler._reactor - - def _do_submit_job(self, job, run_times): - def callback(success, result): - if success: - self._run_job_success(job.id, result) - else: - self._run_job_error(job.id, result.value, result.tb) - - self._reactor.getThreadPool().callInThreadWithCallback(callback, run_job, job, job._jobstore_alias, run_times, - self._logger.name) diff --git a/lib/apscheduler/job.py b/lib/apscheduler/job.py deleted file mode 100644 index f5639dae..00000000 --- a/lib/apscheduler/job.py +++ /dev/null @@ -1,252 +0,0 @@ -from collections import Iterable, Mapping -from uuid import uuid4 - -import six - -from apscheduler.triggers.base import BaseTrigger -from apscheduler.util import ref_to_obj, obj_to_ref, datetime_repr, repr_escape, get_callable_name, check_callable_args, \ - convert_to_datetime - - -class Job(object): - """ - Contains the options given when scheduling callables and its current schedule and other state. - This class should never be instantiated by the user. - - :var str id: the unique identifier of this job - :var str name: the description of this job - :var func: the callable to execute - :var tuple|list args: positional arguments to the callable - :var dict kwargs: keyword arguments to the callable - :var bool coalesce: whether to only run the job once when several run times are due - :var trigger: the trigger object that controls the schedule of this job - :var str executor: the name of the executor that will run this job - :var int misfire_grace_time: the time (in seconds) how much this job's execution is allowed to be late - :var int max_instances: the maximum number of concurrently executing instances allowed for this job - :var datetime.datetime next_run_time: the next scheduled run time of this job - """ - - __slots__ = ('_scheduler', '_jobstore_alias', 'id', 'trigger', 'executor', 'func', 'func_ref', 'args', 'kwargs', - 'name', 'misfire_grace_time', 'coalesce', 'max_instances', 'next_run_time') - - def __init__(self, scheduler, id=None, **kwargs): - super(Job, self).__init__() - self._scheduler = scheduler - self._jobstore_alias = None - self._modify(id=id or uuid4().hex, **kwargs) - - def modify(self, **changes): - """ - Makes the given changes to this job and saves it in the associated job store. - Accepted keyword arguments are the same as the variables on this class. - - .. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.modify_job` - """ - - self._scheduler.modify_job(self.id, self._jobstore_alias, **changes) - - def reschedule(self, trigger, **trigger_args): - """ - Shortcut for switching the trigger on this job. - - .. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.reschedule_job` - """ - - self._scheduler.reschedule_job(self.id, self._jobstore_alias, trigger, **trigger_args) - - def pause(self): - """ - Temporarily suspend the execution of this job. - - .. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.pause_job` - """ - - self._scheduler.pause_job(self.id, self._jobstore_alias) - - def resume(self): - """ - Resume the schedule of this job if previously paused. - - .. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.resume_job` - """ - - self._scheduler.resume_job(self.id, self._jobstore_alias) - - def remove(self): - """ - Unschedules this job and removes it from its associated job store. - - .. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.remove_job` - """ - - self._scheduler.remove_job(self.id, self._jobstore_alias) - - @property - def pending(self): - """Returns ``True`` if the referenced job is still waiting to be added to its designated job store.""" - - return self._jobstore_alias is None - - # - # Private API - # - - def _get_run_times(self, now): - """ - Computes the scheduled run times between ``next_run_time`` and ``now`` (inclusive). - - :type now: datetime.datetime - :rtype: list[datetime.datetime] - """ - - run_times = [] - next_run_time = self.next_run_time - while next_run_time and next_run_time <= now: - run_times.append(next_run_time) - next_run_time = self.trigger.get_next_fire_time(next_run_time, now) - - return run_times - - def _modify(self, **changes): - """Validates the changes to the Job and makes the modifications if and only if all of them validate.""" - - approved = {} - - if 'id' in changes: - value = changes.pop('id') - if not isinstance(value, six.string_types): - raise TypeError("id must be a nonempty string") - if hasattr(self, 'id'): - raise ValueError('The job ID may not be changed') - approved['id'] = value - - if 'func' in changes or 'args' in changes or 'kwargs' in changes: - func = changes.pop('func') if 'func' in changes else self.func - args = changes.pop('args') if 'args' in changes else self.args - kwargs = changes.pop('kwargs') if 'kwargs' in changes else self.kwargs - - if isinstance(func, str): - func_ref = func - func = ref_to_obj(func) - elif callable(func): - try: - func_ref = obj_to_ref(func) - except ValueError: - # If this happens, this Job won't be serializable - func_ref = None - else: - raise TypeError('func must be a callable or a textual reference to one') - - if not hasattr(self, 'name') and changes.get('name', None) is None: - changes['name'] = get_callable_name(func) - - if isinstance(args, six.string_types) or not isinstance(args, Iterable): - raise TypeError('args must be a non-string iterable') - if isinstance(kwargs, six.string_types) or not isinstance(kwargs, Mapping): - raise TypeError('kwargs must be a dict-like object') - - check_callable_args(func, args, kwargs) - - approved['func'] = func - approved['func_ref'] = func_ref - approved['args'] = args - approved['kwargs'] = kwargs - - if 'name' in changes: - value = changes.pop('name') - if not value or not isinstance(value, six.string_types): - raise TypeError("name must be a nonempty string") - approved['name'] = value - - if 'misfire_grace_time' in changes: - value = changes.pop('misfire_grace_time') - if value is not None and (not isinstance(value, six.integer_types) or value <= 0): - raise TypeError('misfire_grace_time must be either None or a positive integer') - approved['misfire_grace_time'] = value - - if 'coalesce' in changes: - value = bool(changes.pop('coalesce')) - approved['coalesce'] = value - - if 'max_instances' in changes: - value = changes.pop('max_instances') - if not isinstance(value, six.integer_types) or value <= 0: - raise TypeError('max_instances must be a positive integer') - approved['max_instances'] = value - - if 'trigger' in changes: - trigger = changes.pop('trigger') - if not isinstance(trigger, BaseTrigger): - raise TypeError('Expected a trigger instance, got %s instead' % trigger.__class__.__name__) - - approved['trigger'] = trigger - - if 'executor' in changes: - value = changes.pop('executor') - if not isinstance(value, six.string_types): - raise TypeError('executor must be a string') - approved['executor'] = value - - if 'next_run_time' in changes: - value = changes.pop('next_run_time') - approved['next_run_time'] = convert_to_datetime(value, self._scheduler.timezone, 'next_run_time') - - if changes: - raise AttributeError('The following are not modifiable attributes of Job: %s' % ', '.join(changes)) - - for key, value in six.iteritems(approved): - setattr(self, key, value) - - def __getstate__(self): - # Don't allow this Job to be serialized if the function reference could not be determined - if not self.func_ref: - raise ValueError('This Job cannot be serialized since the reference to its callable (%r) could not be ' - 'determined. Consider giving a textual reference (module:function name) instead.' % - (self.func,)) - - return { - 'version': 1, - 'id': self.id, - 'func': self.func_ref, - 'trigger': self.trigger, - 'executor': self.executor, - 'args': self.args, - 'kwargs': self.kwargs, - 'name': self.name, - 'misfire_grace_time': self.misfire_grace_time, - 'coalesce': self.coalesce, - 'max_instances': self.max_instances, - 'next_run_time': self.next_run_time - } - - def __setstate__(self, state): - if state.get('version', 1) > 1: - raise ValueError('Job has version %s, but only version 1 can be handled' % state['version']) - - self.id = state['id'] - self.func_ref = state['func'] - self.func = ref_to_obj(self.func_ref) - self.trigger = state['trigger'] - self.executor = state['executor'] - self.args = state['args'] - self.kwargs = state['kwargs'] - self.name = state['name'] - self.misfire_grace_time = state['misfire_grace_time'] - self.coalesce = state['coalesce'] - self.max_instances = state['max_instances'] - self.next_run_time = state['next_run_time'] - - def __eq__(self, other): - if isinstance(other, Job): - return self.id == other.id - return NotImplemented - - def __repr__(self): - return '' % (repr_escape(self.id), repr_escape(self.name)) - - def __str__(self): - return '%s (trigger: %s, next run at: %s)' % (repr_escape(self.name), repr_escape(str(self.trigger)), - datetime_repr(self.next_run_time)) - - def __unicode__(self): - return six.u('%s (trigger: %s, next run at: %s)') % (self.name, self.trigger, datetime_repr(self.next_run_time)) diff --git a/lib/apscheduler/jobstores/__init__.py b/lib/apscheduler/jobstores/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/apscheduler/jobstores/base.py b/lib/apscheduler/jobstores/base.py deleted file mode 100644 index d9f7147b..00000000 --- a/lib/apscheduler/jobstores/base.py +++ /dev/null @@ -1,127 +0,0 @@ -from abc import ABCMeta, abstractmethod -import logging - -import six - - -class JobLookupError(KeyError): - """Raised when the job store cannot find a job for update or removal.""" - - def __init__(self, job_id): - super(JobLookupError, self).__init__(six.u('No job by the id of %s was found') % job_id) - - -class ConflictingIdError(KeyError): - """Raised when the uniqueness of job IDs is being violated.""" - - def __init__(self, job_id): - super(ConflictingIdError, self).__init__(six.u('Job identifier (%s) conflicts with an existing job') % job_id) - - -class TransientJobError(ValueError): - """Raised when an attempt to add transient (with no func_ref) job to a persistent job store is detected.""" - - def __init__(self, job_id): - super(TransientJobError, self).__init__( - six.u('Job (%s) cannot be added to this job store because a reference to the callable could not be ' - 'determined.') % job_id) - - -class BaseJobStore(six.with_metaclass(ABCMeta)): - """Abstract base class that defines the interface that every job store must implement.""" - - _scheduler = None - _alias = None - _logger = logging.getLogger('apscheduler.jobstores') - - def start(self, scheduler, alias): - """ - Called by the scheduler when the scheduler is being started or when the job store is being added to an already - running scheduler. - - :param apscheduler.schedulers.base.BaseScheduler scheduler: the scheduler that is starting this job store - :param str|unicode alias: alias of this job store as it was assigned to the scheduler - """ - - self._scheduler = scheduler - self._alias = alias - self._logger = logging.getLogger('apscheduler.jobstores.%s' % alias) - - def shutdown(self): - """Frees any resources still bound to this job store.""" - - @abstractmethod - def lookup_job(self, job_id): - """ - Returns a specific job, or ``None`` if it isn't found.. - - The job store is responsible for setting the ``scheduler`` and ``jobstore`` attributes of the returned job to - point to the scheduler and itself, respectively. - - :param str|unicode job_id: identifier of the job - :rtype: Job - """ - - @abstractmethod - def get_due_jobs(self, now): - """ - Returns the list of jobs that have ``next_run_time`` earlier or equal to ``now``. - The returned jobs must be sorted by next run time (ascending). - - :param datetime.datetime now: the current (timezone aware) datetime - :rtype: list[Job] - """ - - @abstractmethod - def get_next_run_time(self): - """ - Returns the earliest run time of all the jobs stored in this job store, or ``None`` if there are no active jobs. - - :rtype: datetime.datetime - """ - - @abstractmethod - def get_all_jobs(self): - """ - Returns a list of all jobs in this job store. The returned jobs should be sorted by next run time (ascending). - Paused jobs (next_run_time is None) should be sorted last. - - The job store is responsible for setting the ``scheduler`` and ``jobstore`` attributes of the returned jobs to - point to the scheduler and itself, respectively. - - :rtype: list[Job] - """ - - @abstractmethod - def add_job(self, job): - """ - Adds the given job to this store. - - :param Job job: the job to add - :raises ConflictingIdError: if there is another job in this store with the same ID - """ - - @abstractmethod - def update_job(self, job): - """ - Replaces the job in the store with the given newer version. - - :param Job job: the job to update - :raises JobLookupError: if the job does not exist - """ - - @abstractmethod - def remove_job(self, job_id): - """ - Removes the given job from this store. - - :param str|unicode job_id: identifier of the job - :raises JobLookupError: if the job does not exist - """ - - @abstractmethod - def remove_all_jobs(self): - """Removes all jobs from this store.""" - - def __repr__(self): - return '<%s>' % self.__class__.__name__ diff --git a/lib/apscheduler/jobstores/memory.py b/lib/apscheduler/jobstores/memory.py deleted file mode 100644 index 645391f3..00000000 --- a/lib/apscheduler/jobstores/memory.py +++ /dev/null @@ -1,107 +0,0 @@ -from __future__ import absolute_import - -from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError -from apscheduler.util import datetime_to_utc_timestamp - - -class MemoryJobStore(BaseJobStore): - """ - Stores jobs in an array in RAM. Provides no persistence support. - - Plugin alias: ``memory`` - """ - - def __init__(self): - super(MemoryJobStore, self).__init__() - self._jobs = [] # list of (job, timestamp), sorted by next_run_time and job id (ascending) - self._jobs_index = {} # id -> (job, timestamp) lookup table - - def lookup_job(self, job_id): - return self._jobs_index.get(job_id, (None, None))[0] - - def get_due_jobs(self, now): - now_timestamp = datetime_to_utc_timestamp(now) - pending = [] - for job, timestamp in self._jobs: - if timestamp is None or timestamp > now_timestamp: - break - pending.append(job) - - return pending - - def get_next_run_time(self): - return self._jobs[0][0].next_run_time if self._jobs else None - - def get_all_jobs(self): - return [j[0] for j in self._jobs] - - def add_job(self, job): - if job.id in self._jobs_index: - raise ConflictingIdError(job.id) - - timestamp = datetime_to_utc_timestamp(job.next_run_time) - index = self._get_job_index(timestamp, job.id) - self._jobs.insert(index, (job, timestamp)) - self._jobs_index[job.id] = (job, timestamp) - - def update_job(self, job): - old_job, old_timestamp = self._jobs_index.get(job.id, (None, None)) - if old_job is None: - raise JobLookupError(job.id) - - # If the next run time has not changed, simply replace the job in its present index. - # Otherwise, reinsert the job to the list to preserve the ordering. - old_index = self._get_job_index(old_timestamp, old_job.id) - new_timestamp = datetime_to_utc_timestamp(job.next_run_time) - if old_timestamp == new_timestamp: - self._jobs[old_index] = (job, new_timestamp) - else: - del self._jobs[old_index] - new_index = self._get_job_index(new_timestamp, job.id) - self._jobs.insert(new_index, (job, new_timestamp)) - - self._jobs_index[old_job.id] = (job, new_timestamp) - - def remove_job(self, job_id): - job, timestamp = self._jobs_index.get(job_id, (None, None)) - if job is None: - raise JobLookupError(job_id) - - index = self._get_job_index(timestamp, job_id) - del self._jobs[index] - del self._jobs_index[job.id] - - def remove_all_jobs(self): - self._jobs = [] - self._jobs_index = {} - - def shutdown(self): - self.remove_all_jobs() - - def _get_job_index(self, timestamp, job_id): - """ - Returns the index of the given job, or if it's not found, the index where the job should be inserted based on - the given timestamp. - - :type timestamp: int - :type job_id: str - """ - - lo, hi = 0, len(self._jobs) - timestamp = float('inf') if timestamp is None else timestamp - while lo < hi: - mid = (lo + hi) // 2 - mid_job, mid_timestamp = self._jobs[mid] - mid_timestamp = float('inf') if mid_timestamp is None else mid_timestamp - if mid_timestamp > timestamp: - hi = mid - elif mid_timestamp < timestamp: - lo = mid + 1 - elif mid_job.id > job_id: - hi = mid - elif mid_job.id < job_id: - lo = mid + 1 - else: - return mid - - return lo diff --git a/lib/apscheduler/jobstores/mongodb.py b/lib/apscheduler/jobstores/mongodb.py deleted file mode 100644 index ff762f7e..00000000 --- a/lib/apscheduler/jobstores/mongodb.py +++ /dev/null @@ -1,124 +0,0 @@ -from __future__ import absolute_import - -from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError -from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime -from apscheduler.job import Job - -try: - import cPickle as pickle -except ImportError: # pragma: nocover - import pickle - -try: - from bson.binary import Binary - from pymongo.errors import DuplicateKeyError - from pymongo import MongoClient, ASCENDING -except ImportError: # pragma: nocover - raise ImportError('MongoDBJobStore requires PyMongo installed') - - -class MongoDBJobStore(BaseJobStore): - """ - Stores jobs in a MongoDB database. Any leftover keyword arguments are directly passed to pymongo's `MongoClient - `_. - - Plugin alias: ``mongodb`` - - :param str database: database to store jobs in - :param str collection: collection to store jobs in - :param client: a :class:`~pymongo.mongo_client.MongoClient` instance to use instead of providing connection - arguments - :param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the highest available - """ - - def __init__(self, database='apscheduler', collection='jobs', client=None, - pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args): - super(MongoDBJobStore, self).__init__() - self.pickle_protocol = pickle_protocol - - if not database: - raise ValueError('The "database" parameter must not be empty') - if not collection: - raise ValueError('The "collection" parameter must not be empty') - - if client: - self.connection = maybe_ref(client) - else: - connect_args.setdefault('w', 1) - self.connection = MongoClient(**connect_args) - - self.collection = self.connection[database][collection] - self.collection.ensure_index('next_run_time', sparse=True) - - def lookup_job(self, job_id): - document = self.collection.find_one(job_id, ['job_state']) - return self._reconstitute_job(document['job_state']) if document else None - - def get_due_jobs(self, now): - timestamp = datetime_to_utc_timestamp(now) - return self._get_jobs({'next_run_time': {'$lte': timestamp}}) - - def get_next_run_time(self): - document = self.collection.find_one({'next_run_time': {'$ne': None}}, fields=['next_run_time'], - sort=[('next_run_time', ASCENDING)]) - return utc_timestamp_to_datetime(document['next_run_time']) if document else None - - def get_all_jobs(self): - return self._get_jobs({}) - - def add_job(self, job): - try: - self.collection.insert({ - '_id': job.id, - 'next_run_time': datetime_to_utc_timestamp(job.next_run_time), - 'job_state': Binary(pickle.dumps(job.__getstate__(), self.pickle_protocol)) - }) - except DuplicateKeyError: - raise ConflictingIdError(job.id) - - def update_job(self, job): - changes = { - 'next_run_time': datetime_to_utc_timestamp(job.next_run_time), - 'job_state': Binary(pickle.dumps(job.__getstate__(), self.pickle_protocol)) - } - result = self.collection.update({'_id': job.id}, {'$set': changes}) - if result and result['n'] == 0: - raise JobLookupError(id) - - def remove_job(self, job_id): - result = self.collection.remove(job_id) - if result and result['n'] == 0: - raise JobLookupError(job_id) - - def remove_all_jobs(self): - self.collection.remove() - - def shutdown(self): - self.connection.disconnect() - - def _reconstitute_job(self, job_state): - job_state = pickle.loads(job_state) - job = Job.__new__(Job) - job.__setstate__(job_state) - job._scheduler = self._scheduler - job._jobstore_alias = self._alias - return job - - def _get_jobs(self, conditions): - jobs = [] - failed_job_ids = [] - for document in self.collection.find(conditions, ['_id', 'job_state'], sort=[('next_run_time', ASCENDING)]): - try: - jobs.append(self._reconstitute_job(document['job_state'])) - except: - self._logger.exception('Unable to restore job "%s" -- removing it', document['_id']) - failed_job_ids.append(document['_id']) - - # Remove all the jobs we failed to restore - if failed_job_ids: - self.collection.remove({'_id': {'$in': failed_job_ids}}) - - return jobs - - def __repr__(self): - return '<%s (client=%s)>' % (self.__class__.__name__, self.connection) diff --git a/lib/apscheduler/jobstores/redis.py b/lib/apscheduler/jobstores/redis.py deleted file mode 100644 index 2b4ffd52..00000000 --- a/lib/apscheduler/jobstores/redis.py +++ /dev/null @@ -1,138 +0,0 @@ -from __future__ import absolute_import - -import six - -from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError -from apscheduler.util import datetime_to_utc_timestamp, utc_timestamp_to_datetime -from apscheduler.job import Job - -try: - import cPickle as pickle -except ImportError: # pragma: nocover - import pickle - -try: - from redis import StrictRedis -except ImportError: # pragma: nocover - raise ImportError('RedisJobStore requires redis installed') - - -class RedisJobStore(BaseJobStore): - """ - Stores jobs in a Redis database. Any leftover keyword arguments are directly passed to redis's StrictRedis. - - Plugin alias: ``redis`` - - :param int db: the database number to store jobs in - :param str jobs_key: key to store jobs in - :param str run_times_key: key to store the jobs' run times in - :param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the highest available - """ - - def __init__(self, db=0, jobs_key='apscheduler.jobs', run_times_key='apscheduler.run_times', - pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args): - super(RedisJobStore, self).__init__() - - if db is None: - raise ValueError('The "db" parameter must not be empty') - if not jobs_key: - raise ValueError('The "jobs_key" parameter must not be empty') - if not run_times_key: - raise ValueError('The "run_times_key" parameter must not be empty') - - self.pickle_protocol = pickle_protocol - self.jobs_key = jobs_key - self.run_times_key = run_times_key - self.redis = StrictRedis(db=int(db), **connect_args) - - def lookup_job(self, job_id): - job_state = self.redis.hget(self.jobs_key, job_id) - return self._reconstitute_job(job_state) if job_state else None - - def get_due_jobs(self, now): - timestamp = datetime_to_utc_timestamp(now) - job_ids = self.redis.zrangebyscore(self.run_times_key, 0, timestamp) - if job_ids: - job_states = self.redis.hmget(self.jobs_key, *job_ids) - return self._reconstitute_jobs(six.moves.zip(job_ids, job_states)) - return [] - - def get_next_run_time(self): - next_run_time = self.redis.zrange(self.run_times_key, 0, 0, withscores=True) - if next_run_time: - return utc_timestamp_to_datetime(next_run_time[0][1]) - - def get_all_jobs(self): - job_states = self.redis.hgetall(self.jobs_key) - jobs = self._reconstitute_jobs(six.iteritems(job_states)) - return sorted(jobs, key=lambda job: job.next_run_time) - - def add_job(self, job): - if self.redis.hexists(self.jobs_key, job.id): - raise ConflictingIdError(job.id) - - with self.redis.pipeline() as pipe: - pipe.multi() - pipe.hset(self.jobs_key, job.id, pickle.dumps(job.__getstate__(), self.pickle_protocol)) - pipe.zadd(self.run_times_key, datetime_to_utc_timestamp(job.next_run_time), job.id) - pipe.execute() - - def update_job(self, job): - if not self.redis.hexists(self.jobs_key, job.id): - raise JobLookupError(job.id) - - with self.redis.pipeline() as pipe: - pipe.hset(self.jobs_key, job.id, pickle.dumps(job.__getstate__(), self.pickle_protocol)) - if job.next_run_time: - pipe.zadd(self.run_times_key, datetime_to_utc_timestamp(job.next_run_time), job.id) - else: - pipe.zrem(self.run_times_key, job.id) - pipe.execute() - - def remove_job(self, job_id): - if not self.redis.hexists(self.jobs_key, job_id): - raise JobLookupError(job_id) - - with self.redis.pipeline() as pipe: - pipe.hdel(self.jobs_key, job_id) - pipe.zrem(self.run_times_key, job_id) - pipe.execute() - - def remove_all_jobs(self): - with self.redis.pipeline() as pipe: - pipe.delete(self.jobs_key) - pipe.delete(self.run_times_key) - pipe.execute() - - def shutdown(self): - self.redis.connection_pool.disconnect() - - def _reconstitute_job(self, job_state): - job_state = pickle.loads(job_state) - job = Job.__new__(Job) - job.__setstate__(job_state) - job._scheduler = self._scheduler - job._jobstore_alias = self._alias - return job - - def _reconstitute_jobs(self, job_states): - jobs = [] - failed_job_ids = [] - for job_id, job_state in job_states: - try: - jobs.append(self._reconstitute_job(job_state)) - except: - self._logger.exception('Unable to restore job "%s" -- removing it', job_id) - failed_job_ids.append(job_id) - - # Remove all the jobs we failed to restore - if failed_job_ids: - with self.redis.pipeline() as pipe: - pipe.hdel(self.jobs_key, *failed_job_ids) - pipe.zrem(self.run_times_key, *failed_job_ids) - pipe.execute() - - return jobs - - def __repr__(self): - return '<%s>' % self.__class__.__name__ diff --git a/lib/apscheduler/jobstores/sqlalchemy.py b/lib/apscheduler/jobstores/sqlalchemy.py deleted file mode 100644 index f8a3c151..00000000 --- a/lib/apscheduler/jobstores/sqlalchemy.py +++ /dev/null @@ -1,137 +0,0 @@ -from __future__ import absolute_import - -from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError -from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime -from apscheduler.job import Job - -try: - import cPickle as pickle -except ImportError: # pragma: nocover - import pickle - -try: - from sqlalchemy import create_engine, Table, Column, MetaData, Unicode, Float, LargeBinary, select - from sqlalchemy.exc import IntegrityError -except ImportError: # pragma: nocover - raise ImportError('SQLAlchemyJobStore requires SQLAlchemy installed') - - -class SQLAlchemyJobStore(BaseJobStore): - """ - Stores jobs in a database table using SQLAlchemy. The table will be created if it doesn't exist in the database. - - Plugin alias: ``sqlalchemy`` - - :param str url: connection string (see `SQLAlchemy documentation - `_ - on this) - :param engine: an SQLAlchemy Engine to use instead of creating a new one based on ``url`` - :param str tablename: name of the table to store jobs in - :param metadata: a :class:`~sqlalchemy.MetaData` instance to use instead of creating a new one - :param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the highest available - """ - - def __init__(self, url=None, engine=None, tablename='apscheduler_jobs', metadata=None, - pickle_protocol=pickle.HIGHEST_PROTOCOL): - super(SQLAlchemyJobStore, self).__init__() - self.pickle_protocol = pickle_protocol - metadata = maybe_ref(metadata) or MetaData() - - if engine: - self.engine = maybe_ref(engine) - elif url: - self.engine = create_engine(url) - else: - raise ValueError('Need either "engine" or "url" defined') - - # 191 = max key length in MySQL for InnoDB/utf8mb4 tables, 25 = precision that translates to an 8-byte float - self.jobs_t = Table( - tablename, metadata, - Column('id', Unicode(191, _warn_on_bytestring=False), primary_key=True), - Column('next_run_time', Float(25), index=True), - Column('job_state', LargeBinary, nullable=False) - ) - - self.jobs_t.create(self.engine, True) - - def lookup_job(self, job_id): - selectable = select([self.jobs_t.c.job_state]).where(self.jobs_t.c.id == job_id) - job_state = self.engine.execute(selectable).scalar() - return self._reconstitute_job(job_state) if job_state else None - - def get_due_jobs(self, now): - timestamp = datetime_to_utc_timestamp(now) - return self._get_jobs(self.jobs_t.c.next_run_time <= timestamp) - - def get_next_run_time(self): - selectable = select([self.jobs_t.c.next_run_time]).where(self.jobs_t.c.next_run_time != None).\ - order_by(self.jobs_t.c.next_run_time).limit(1) - next_run_time = self.engine.execute(selectable).scalar() - return utc_timestamp_to_datetime(next_run_time) - - def get_all_jobs(self): - return self._get_jobs() - - def add_job(self, job): - insert = self.jobs_t.insert().values(**{ - 'id': job.id, - 'next_run_time': datetime_to_utc_timestamp(job.next_run_time), - 'job_state': pickle.dumps(job.__getstate__(), self.pickle_protocol) - }) - try: - self.engine.execute(insert) - except IntegrityError: - raise ConflictingIdError(job.id) - - def update_job(self, job): - update = self.jobs_t.update().values(**{ - 'next_run_time': datetime_to_utc_timestamp(job.next_run_time), - 'job_state': pickle.dumps(job.__getstate__(), self.pickle_protocol) - }).where(self.jobs_t.c.id == job.id) - result = self.engine.execute(update) - if result.rowcount == 0: - raise JobLookupError(id) - - def remove_job(self, job_id): - delete = self.jobs_t.delete().where(self.jobs_t.c.id == job_id) - result = self.engine.execute(delete) - if result.rowcount == 0: - raise JobLookupError(job_id) - - def remove_all_jobs(self): - delete = self.jobs_t.delete() - self.engine.execute(delete) - - def shutdown(self): - self.engine.dispose() - - def _reconstitute_job(self, job_state): - job_state = pickle.loads(job_state) - job_state['jobstore'] = self - job = Job.__new__(Job) - job.__setstate__(job_state) - job._scheduler = self._scheduler - job._jobstore_alias = self._alias - return job - - def _get_jobs(self, *conditions): - jobs = [] - selectable = select([self.jobs_t.c.id, self.jobs_t.c.job_state]).order_by(self.jobs_t.c.next_run_time) - selectable = selectable.where(*conditions) if conditions else selectable - failed_job_ids = set() - for row in self.engine.execute(selectable): - try: - jobs.append(self._reconstitute_job(row.job_state)) - except: - self._logger.exception('Unable to restore job "%s" -- removing it', row.id) - failed_job_ids.add(row.id) - - # Remove all the jobs we failed to restore - if failed_job_ids: - delete = self.jobs_t.delete().where(self.jobs_t.c.id.in_(failed_job_ids)) - self.engine.execute(delete) - - return jobs - - def __repr__(self): - return '<%s (url=%s)>' % (self.__class__.__name__, self.engine.url) diff --git a/lib/apscheduler/schedulers/__init__.py b/lib/apscheduler/schedulers/__init__.py deleted file mode 100644 index bd8a7900..00000000 --- a/lib/apscheduler/schedulers/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -class SchedulerAlreadyRunningError(Exception): - """Raised when attempting to start or configure the scheduler when it's already running.""" - - def __str__(self): - return 'Scheduler is already running' - - -class SchedulerNotRunningError(Exception): - """Raised when attempting to shutdown the scheduler when it's not running.""" - - def __str__(self): - return 'Scheduler is not running' diff --git a/lib/apscheduler/schedulers/asyncio.py b/lib/apscheduler/schedulers/asyncio.py deleted file mode 100644 index b91ee97a..00000000 --- a/lib/apscheduler/schedulers/asyncio.py +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import absolute_import -from functools import wraps - -from apscheduler.schedulers.base import BaseScheduler -from apscheduler.util import maybe_ref - -try: - import asyncio -except ImportError: # pragma: nocover - try: - import trollius as asyncio - except ImportError: - raise ImportError('AsyncIOScheduler requires either Python 3.4 or the asyncio package installed') - - -def run_in_event_loop(func): - @wraps(func) - def wrapper(self, *args, **kwargs): - self._eventloop.call_soon_threadsafe(func, self, *args, **kwargs) - return wrapper - - -class AsyncIOScheduler(BaseScheduler): - """ - A scheduler that runs on an asyncio (:pep:`3156`) event loop. - - Extra options: - - ============== ============================================================= - ``event_loop`` AsyncIO event loop to use (defaults to the global event loop) - ============== ============================================================= - """ - - _eventloop = None - _timeout = None - - def start(self): - super(AsyncIOScheduler, self).start() - self.wakeup() - - @run_in_event_loop - def shutdown(self, wait=True): - super(AsyncIOScheduler, self).shutdown(wait) - self._stop_timer() - - def _configure(self, config): - self._eventloop = maybe_ref(config.pop('event_loop', None)) or asyncio.get_event_loop() - super(AsyncIOScheduler, self)._configure(config) - - def _start_timer(self, wait_seconds): - self._stop_timer() - if wait_seconds is not None: - self._timeout = self._eventloop.call_later(wait_seconds, self.wakeup) - - def _stop_timer(self): - if self._timeout: - self._timeout.cancel() - del self._timeout - - @run_in_event_loop - def wakeup(self): - self._stop_timer() - wait_seconds = self._process_jobs() - self._start_timer(wait_seconds) - - def _create_default_executor(self): - from apscheduler.executors.asyncio import AsyncIOExecutor - return AsyncIOExecutor() diff --git a/lib/apscheduler/schedulers/background.py b/lib/apscheduler/schedulers/background.py deleted file mode 100644 index 86ff2ba3..00000000 --- a/lib/apscheduler/schedulers/background.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import absolute_import -from threading import Thread, Event - -from apscheduler.schedulers.base import BaseScheduler -from apscheduler.schedulers.blocking import BlockingScheduler -from apscheduler.util import asbool - - -class BackgroundScheduler(BlockingScheduler): - """ - A scheduler that runs in the background using a separate thread - (:meth:`~apscheduler.schedulers.base.BaseScheduler.start` will return immediately). - - Extra options: - - ========== ============================================================================================ - ``daemon`` Set the ``daemon`` option in the background thread (defaults to ``True``, - see `the documentation `_ - for further details) - ========== ============================================================================================ - """ - - _thread = None - - def _configure(self, config): - self._daemon = asbool(config.pop('daemon', True)) - super(BackgroundScheduler, self)._configure(config) - - def start(self): - BaseScheduler.start(self) - self._event = Event() - self._thread = Thread(target=self._main_loop, name='APScheduler') - self._thread.daemon = self._daemon - self._thread.start() - - def shutdown(self, wait=True): - super(BackgroundScheduler, self).shutdown(wait) - self._thread.join() - del self._thread diff --git a/lib/apscheduler/schedulers/base.py b/lib/apscheduler/schedulers/base.py deleted file mode 100644 index cdc0bf06..00000000 --- a/lib/apscheduler/schedulers/base.py +++ /dev/null @@ -1,845 +0,0 @@ -from __future__ import print_function -from abc import ABCMeta, abstractmethod -from collections import MutableMapping -from threading import RLock -from datetime import datetime -from logging import getLogger -import sys - -from pkg_resources import iter_entry_points -from tzlocal import get_localzone -import six - -from apscheduler.schedulers import SchedulerAlreadyRunningError, SchedulerNotRunningError -from apscheduler.executors.base import MaxInstancesReachedError, BaseExecutor -from apscheduler.executors.pool import ThreadPoolExecutor -from apscheduler.jobstores.base import ConflictingIdError, JobLookupError, BaseJobStore -from apscheduler.jobstores.memory import MemoryJobStore -from apscheduler.job import Job -from apscheduler.triggers.base import BaseTrigger -from apscheduler.util import asbool, asint, astimezone, maybe_ref, timedelta_seconds, undefined -from apscheduler.events import ( - SchedulerEvent, JobEvent, EVENT_SCHEDULER_START, EVENT_SCHEDULER_SHUTDOWN, EVENT_JOBSTORE_ADDED, - EVENT_JOBSTORE_REMOVED, EVENT_ALL, EVENT_JOB_MODIFIED, EVENT_JOB_REMOVED, EVENT_JOB_ADDED, EVENT_EXECUTOR_ADDED, - EVENT_EXECUTOR_REMOVED, EVENT_ALL_JOBS_REMOVED) - - -class BaseScheduler(six.with_metaclass(ABCMeta)): - """ - Abstract base class for all schedulers. Takes the following keyword arguments: - - :param str|logging.Logger logger: logger to use for the scheduler's logging (defaults to apscheduler.scheduler) - :param str|datetime.tzinfo timezone: the default time zone (defaults to the local timezone) - :param dict job_defaults: default values for newly added jobs - :param dict jobstores: a dictionary of job store alias -> job store instance or configuration dict - :param dict executors: a dictionary of executor alias -> executor instance or configuration dict - - .. seealso:: :ref:`scheduler-config` - """ - - _trigger_plugins = dict((ep.name, ep) for ep in iter_entry_points('apscheduler.triggers')) - _trigger_classes = {} - _executor_plugins = dict((ep.name, ep) for ep in iter_entry_points('apscheduler.executors')) - _executor_classes = {} - _jobstore_plugins = dict((ep.name, ep) for ep in iter_entry_points('apscheduler.jobstores')) - _jobstore_classes = {} - _stopped = True - - # - # Public API - # - - def __init__(self, gconfig={}, **options): - super(BaseScheduler, self).__init__() - self._executors = {} - self._executors_lock = self._create_lock() - self._jobstores = {} - self._jobstores_lock = self._create_lock() - self._listeners = [] - self._listeners_lock = self._create_lock() - self._pending_jobs = [] - self.configure(gconfig, **options) - - def configure(self, gconfig={}, prefix='apscheduler.', **options): - """ - Reconfigures the scheduler with the given options. Can only be done when the scheduler isn't running. - - :param dict gconfig: a "global" configuration dictionary whose values can be overridden by keyword arguments to - this method - :param str|unicode prefix: pick only those keys from ``gconfig`` that are prefixed with this string - (pass an empty string or ``None`` to use all keys) - :raises SchedulerAlreadyRunningError: if the scheduler is already running - """ - - if self.running: - raise SchedulerAlreadyRunningError - - # If a non-empty prefix was given, strip it from the keys in the global configuration dict - if prefix: - prefixlen = len(prefix) - gconfig = dict((key[prefixlen:], value) for key, value in six.iteritems(gconfig) if key.startswith(prefix)) - - # Create a structure from the dotted options (e.g. "a.b.c = d" -> {'a': {'b': {'c': 'd'}}}) - config = {} - for key, value in six.iteritems(gconfig): - parts = key.split('.') - parent = config - key = parts.pop(0) - while parts: - parent = parent.setdefault(key, {}) - key = parts.pop(0) - parent[key] = value - - # Override any options with explicit keyword arguments - config.update(options) - self._configure(config) - - @abstractmethod - def start(self): - """ - Starts the scheduler. The details of this process depend on the implementation. - - :raises SchedulerAlreadyRunningError: if the scheduler is already running - """ - - if self.running: - raise SchedulerAlreadyRunningError - - with self._executors_lock: - # Create a default executor if nothing else is configured - if 'default' not in self._executors: - self.add_executor(self._create_default_executor(), 'default') - - # Start all the executors - for alias, executor in six.iteritems(self._executors): - executor.start(self, alias) - - with self._jobstores_lock: - # Create a default job store if nothing else is configured - if 'default' not in self._jobstores: - self.add_jobstore(self._create_default_jobstore(), 'default') - - # Start all the job stores - for alias, store in six.iteritems(self._jobstores): - store.start(self, alias) - - # Schedule all pending jobs - for job, jobstore_alias, replace_existing in self._pending_jobs: - self._real_add_job(job, jobstore_alias, replace_existing, False) - del self._pending_jobs[:] - - self._stopped = False - self._logger.info('Scheduler started') - - # Notify listeners that the scheduler has been started - self._dispatch_event(SchedulerEvent(EVENT_SCHEDULER_START)) - - @abstractmethod - def shutdown(self, wait=True): - """ - Shuts down the scheduler. Does not interrupt any currently running jobs. - - :param bool wait: ``True`` to wait until all currently executing jobs have finished - :raises SchedulerNotRunningError: if the scheduler has not been started yet - """ - - if not self.running: - raise SchedulerNotRunningError - - self._stopped = True - - # Shut down all executors - for executor in six.itervalues(self._executors): - executor.shutdown(wait) - - # Shut down all job stores - for jobstore in six.itervalues(self._jobstores): - jobstore.shutdown() - - self._logger.info('Scheduler has been shut down') - self._dispatch_event(SchedulerEvent(EVENT_SCHEDULER_SHUTDOWN)) - - @property - def running(self): - return not self._stopped - - def add_executor(self, executor, alias='default', **executor_opts): - """ - Adds an executor to this scheduler. Any extra keyword arguments will be passed to the executor plugin's - constructor, assuming that the first argument is the name of an executor plugin. - - :param str|unicode|apscheduler.executors.base.BaseExecutor executor: either an executor instance or the name of - an executor plugin - :param str|unicode alias: alias for the scheduler - :raises ValueError: if there is already an executor by the given alias - """ - - with self._executors_lock: - if alias in self._executors: - raise ValueError('This scheduler already has an executor by the alias of "%s"' % alias) - - if isinstance(executor, BaseExecutor): - self._executors[alias] = executor - elif isinstance(executor, six.string_types): - self._executors[alias] = executor = self._create_plugin_instance('executor', executor, executor_opts) - else: - raise TypeError('Expected an executor instance or a string, got %s instead' % - executor.__class__.__name__) - - # Start the executor right away if the scheduler is running - if self.running: - executor.start(self) - - self._dispatch_event(SchedulerEvent(EVENT_EXECUTOR_ADDED, alias)) - - def remove_executor(self, alias, shutdown=True): - """ - Removes the executor by the given alias from this scheduler. - - :param str|unicode alias: alias of the executor - :param bool shutdown: ``True`` to shut down the executor after removing it - """ - - with self._jobstores_lock: - executor = self._lookup_executor(alias) - del self._executors[alias] - - if shutdown: - executor.shutdown() - - self._dispatch_event(SchedulerEvent(EVENT_EXECUTOR_REMOVED, alias)) - - def add_jobstore(self, jobstore, alias='default', **jobstore_opts): - """ - Adds a job store to this scheduler. Any extra keyword arguments will be passed to the job store plugin's - constructor, assuming that the first argument is the name of a job store plugin. - - :param str|unicode|apscheduler.jobstores.base.BaseJobStore jobstore: job store to be added - :param str|unicode alias: alias for the job store - :raises ValueError: if there is already a job store by the given alias - """ - - with self._jobstores_lock: - if alias in self._jobstores: - raise ValueError('This scheduler already has a job store by the alias of "%s"' % alias) - - if isinstance(jobstore, BaseJobStore): - self._jobstores[alias] = jobstore - elif isinstance(jobstore, six.string_types): - self._jobstores[alias] = jobstore = self._create_plugin_instance('jobstore', jobstore, jobstore_opts) - else: - raise TypeError('Expected a job store instance or a string, got %s instead' % - jobstore.__class__.__name__) - - # Start the job store right away if the scheduler is running - if self.running: - jobstore.start(self, alias) - - # Notify listeners that a new job store has been added - self._dispatch_event(SchedulerEvent(EVENT_JOBSTORE_ADDED, alias)) - - # Notify the scheduler so it can scan the new job store for jobs - if self.running: - self.wakeup() - - def remove_jobstore(self, alias, shutdown=True): - """ - Removes the job store by the given alias from this scheduler. - - :param str|unicode alias: alias of the job store - :param bool shutdown: ``True`` to shut down the job store after removing it - """ - - with self._jobstores_lock: - jobstore = self._lookup_jobstore(alias) - del self._jobstores[alias] - - if shutdown: - jobstore.shutdown() - - self._dispatch_event(SchedulerEvent(EVENT_JOBSTORE_REMOVED, alias)) - - def add_listener(self, callback, mask=EVENT_ALL): - """ - add_listener(callback, mask=EVENT_ALL) - - Adds a listener for scheduler events. When a matching event occurs, ``callback`` is executed with the event - object as its sole argument. If the ``mask`` parameter is not provided, the callback will receive events of all - types. - - :param callback: any callable that takes one argument - :param int mask: bitmask that indicates which events should be listened to - - .. seealso:: :mod:`apscheduler.events` - .. seealso:: :ref:`scheduler-events` - """ - - with self._listeners_lock: - self._listeners.append((callback, mask)) - - def remove_listener(self, callback): - """Removes a previously added event listener.""" - - with self._listeners_lock: - for i, (cb, _) in enumerate(self._listeners): - if callback == cb: - del self._listeners[i] - - def add_job(self, func, trigger=None, args=None, kwargs=None, id=None, name=None, misfire_grace_time=undefined, - coalesce=undefined, max_instances=undefined, next_run_time=undefined, jobstore='default', - executor='default', replace_existing=False, **trigger_args): - """ - add_job(func, trigger=None, args=None, kwargs=None, id=None, name=None, misfire_grace_time=undefined, \ - coalesce=undefined, max_instances=undefined, next_run_time=undefined, jobstore='default', \ - executor='default', replace_existing=False, **trigger_args) - - Adds the given job to the job list and wakes up the scheduler if it's already running. - - Any option that defaults to ``undefined`` will be replaced with the corresponding default value when the job is - scheduled (which happens when the scheduler is started, or immediately if the scheduler is already running). - - The ``func`` argument can be given either as a callable object or a textual reference in the - ``package.module:some.object`` format, where the first half (separated by ``:``) is an importable module and the - second half is a reference to the callable object, relative to the module. - - The ``trigger`` argument can either be: - #. the alias name of the trigger (e.g. ``date``, ``interval`` or ``cron``), in which case any extra keyword - arguments to this method are passed on to the trigger's constructor - #. an instance of a trigger class - - :param func: callable (or a textual reference to one) to run at the given time - :param str|apscheduler.triggers.base.BaseTrigger trigger: trigger that determines when ``func`` is called - :param list|tuple args: list of positional arguments to call func with - :param dict kwargs: dict of keyword arguments to call func with - :param str|unicode id: explicit identifier for the job (for modifying it later) - :param str|unicode name: textual description of the job - :param int misfire_grace_time: seconds after the designated run time that the job is still allowed to be run - :param bool coalesce: run once instead of many times if the scheduler determines that the job should be run more - than once in succession - :param int max_instances: maximum number of concurrently running instances allowed for this job - :param datetime next_run_time: when to first run the job, regardless of the trigger (pass ``None`` to add the - job as paused) - :param str|unicode jobstore: alias of the job store to store the job in - :param str|unicode executor: alias of the executor to run the job with - :param bool replace_existing: ``True`` to replace an existing job with the same ``id`` (but retain the - number of runs from the existing one) - :rtype: Job - """ - - job_kwargs = { - 'trigger': self._create_trigger(trigger, trigger_args), - 'executor': executor, - 'func': func, - 'args': tuple(args) if args is not None else (), - 'kwargs': dict(kwargs) if kwargs is not None else {}, - 'id': id, - 'name': name, - 'misfire_grace_time': misfire_grace_time, - 'coalesce': coalesce, - 'max_instances': max_instances, - 'next_run_time': next_run_time - } - job_kwargs = dict((key, value) for key, value in six.iteritems(job_kwargs) if value is not undefined) - job = Job(self, **job_kwargs) - - # Don't really add jobs to job stores before the scheduler is up and running - with self._jobstores_lock: - if not self.running: - self._pending_jobs.append((job, jobstore, replace_existing)) - self._logger.info('Adding job tentatively -- it will be properly scheduled when the scheduler starts') - else: - self._real_add_job(job, jobstore, replace_existing, True) - - return job - - def scheduled_job(self, trigger, args=None, kwargs=None, id=None, name=None, misfire_grace_time=undefined, - coalesce=undefined, max_instances=undefined, next_run_time=undefined, jobstore='default', - executor='default', **trigger_args): - """ - scheduled_job(trigger, args=None, kwargs=None, id=None, name=None, misfire_grace_time=undefined, \ - coalesce=undefined, max_instances=undefined, next_run_time=undefined, jobstore='default', \ - executor='default',**trigger_args) - - A decorator version of :meth:`add_job`, except that ``replace_existing`` is always ``True``. - - .. important:: The ``id`` argument must be given if scheduling a job in a persistent job store. The scheduler - cannot, however, enforce this requirement. - """ - - def inner(func): - self.add_job(func, trigger, args, kwargs, id, name, misfire_grace_time, coalesce, max_instances, - next_run_time, jobstore, executor, True, **trigger_args) - return func - return inner - - def modify_job(self, job_id, jobstore=None, **changes): - """ - Modifies the properties of a single job. Modifications are passed to this method as extra keyword arguments. - - :param str|unicode job_id: the identifier of the job - :param str|unicode jobstore: alias of the job store that contains the job - """ - with self._jobstores_lock: - job, jobstore = self._lookup_job(job_id, jobstore) - job._modify(**changes) - if jobstore: - self._lookup_jobstore(jobstore).update_job(job) - - self._dispatch_event(JobEvent(EVENT_JOB_MODIFIED, job_id, jobstore)) - - # Wake up the scheduler since the job's next run time may have been changed - self.wakeup() - - def reschedule_job(self, job_id, jobstore=None, trigger=None, **trigger_args): - """ - Constructs a new trigger for a job and updates its next run time. - Extra keyword arguments are passed directly to the trigger's constructor. - - :param str|unicode job_id: the identifier of the job - :param str|unicode jobstore: alias of the job store that contains the job - :param trigger: alias of the trigger type or a trigger instance - """ - - trigger = self._create_trigger(trigger, trigger_args) - now = datetime.now(self.timezone) - next_run_time = trigger.get_next_fire_time(None, now) - self.modify_job(job_id, jobstore, trigger=trigger, next_run_time=next_run_time) - - def pause_job(self, job_id, jobstore=None): - """ - Causes the given job not to be executed until it is explicitly resumed. - - :param str|unicode job_id: the identifier of the job - :param str|unicode jobstore: alias of the job store that contains the job - """ - - self.modify_job(job_id, jobstore, next_run_time=None) - - def resume_job(self, job_id, jobstore=None): - """ - Resumes the schedule of the given job, or removes the job if its schedule is finished. - - :param str|unicode job_id: the identifier of the job - :param str|unicode jobstore: alias of the job store that contains the job - """ - - with self._jobstores_lock: - job, jobstore = self._lookup_job(job_id, jobstore) - now = datetime.now(self.timezone) - next_run_time = job.trigger.get_next_fire_time(None, now) - if next_run_time: - self.modify_job(job_id, jobstore, next_run_time=next_run_time) - else: - self.remove_job(job.id, jobstore) - - def get_jobs(self, jobstore=None, pending=None): - """ - Returns a list of pending jobs (if the scheduler hasn't been started yet) and scheduled jobs, either from a - specific job store or from all of them. - - :param str|unicode jobstore: alias of the job store - :param bool pending: ``False`` to leave out pending jobs (jobs that are waiting for the scheduler start to be - added to their respective job stores), ``True`` to only include pending jobs, anything else - to return both - :rtype: list[Job] - """ - - with self._jobstores_lock: - jobs = [] - - if pending is not False: - for job, alias, replace_existing in self._pending_jobs: - if jobstore is None or alias == jobstore: - jobs.append(job) - - if pending is not True: - for alias, store in six.iteritems(self._jobstores): - if jobstore is None or alias == jobstore: - jobs.extend(store.get_all_jobs()) - - return jobs - - def get_job(self, job_id, jobstore=None): - """ - Returns the Job that matches the given ``job_id``. - - :param str|unicode job_id: the identifier of the job - :param str|unicode jobstore: alias of the job store that most likely contains the job - :return: the Job by the given ID, or ``None`` if it wasn't found - :rtype: Job - """ - - with self._jobstores_lock: - try: - return self._lookup_job(job_id, jobstore)[0] - except JobLookupError: - return - - def remove_job(self, job_id, jobstore=None): - """ - Removes a job, preventing it from being run any more. - - :param str|unicode job_id: the identifier of the job - :param str|unicode jobstore: alias of the job store that contains the job - :raises JobLookupError: if the job was not found - """ - - with self._jobstores_lock: - # Check if the job is among the pending jobs - for i, (job, jobstore_alias, replace_existing) in enumerate(self._pending_jobs): - if job.id == job_id: - del self._pending_jobs[i] - jobstore = jobstore_alias - break - else: - # Otherwise, try to remove it from each store until it succeeds or we run out of stores to check - for alias, store in six.iteritems(self._jobstores): - if jobstore in (None, alias): - try: - store.remove_job(job_id) - except JobLookupError: - continue - - jobstore = alias - break - - if jobstore is None: - raise JobLookupError(job_id) - - # Notify listeners that a job has been removed - event = JobEvent(EVENT_JOB_REMOVED, job_id, jobstore) - self._dispatch_event(event) - - self._logger.info('Removed job %s', job_id) - - def remove_all_jobs(self, jobstore=None): - """ - Removes all jobs from the specified job store, or all job stores if none is given. - - :param str|unicode jobstore: alias of the job store - """ - - with self._jobstores_lock: - if jobstore: - self._pending_jobs = [pending for pending in self._pending_jobs if pending[1] != jobstore] - else: - self._pending_jobs = [] - - for alias, store in six.iteritems(self._jobstores): - if jobstore in (None, alias): - store.remove_all_jobs() - - self._dispatch_event(SchedulerEvent(EVENT_ALL_JOBS_REMOVED, jobstore)) - - def print_jobs(self, jobstore=None, out=None): - """ - print_jobs(jobstore=None, out=sys.stdout) - - Prints out a textual listing of all jobs currently scheduled on either all job stores or just a specific one. - - :param str|unicode jobstore: alias of the job store, ``None`` to list jobs from all stores - :param file out: a file-like object to print to (defaults to **sys.stdout** if nothing is given) - """ - - out = out or sys.stdout - with self._jobstores_lock: - if self._pending_jobs: - print(six.u('Pending jobs:'), file=out) - for job, jobstore_alias, replace_existing in self._pending_jobs: - if jobstore in (None, jobstore_alias): - print(six.u(' %s') % job, file=out) - - for alias, store in six.iteritems(self._jobstores): - if jobstore in (None, alias): - print(six.u('Jobstore %s:') % alias, file=out) - jobs = store.get_all_jobs() - if jobs: - for job in jobs: - print(six.u(' %s') % job, file=out) - else: - print(six.u(' No scheduled jobs'), file=out) - - @abstractmethod - def wakeup(self): - """ - Notifies the scheduler that there may be jobs due for execution. - Triggers :meth:`_process_jobs` to be run in an implementation specific manner. - """ - - # - # Private API - # - - def _configure(self, config): - # Set general options - self._logger = maybe_ref(config.pop('logger', None)) or getLogger('apscheduler.scheduler') - self.timezone = astimezone(config.pop('timezone', None)) or get_localzone() - - # Set the job defaults - job_defaults = config.get('job_defaults', {}) - self._job_defaults = { - 'misfire_grace_time': asint(job_defaults.get('misfire_grace_time', 1)), - 'coalesce': asbool(job_defaults.get('coalesce', True)), - 'max_instances': asint(job_defaults.get('max_instances', 1)) - } - - # Configure executors - self._executors.clear() - for alias, value in six.iteritems(config.get('executors', {})): - if isinstance(value, BaseExecutor): - self.add_executor(value, alias) - elif isinstance(value, MutableMapping): - executor_class = value.pop('class', None) - plugin = value.pop('type', None) - if plugin: - executor = self._create_plugin_instance('executor', plugin, value) - elif executor_class: - cls = maybe_ref(executor_class) - executor = cls(**value) - else: - raise ValueError('Cannot create executor "%s" -- either "type" or "class" must be defined' % alias) - - self.add_executor(executor, alias) - else: - raise TypeError("Expected executor instance or dict for executors['%s'], got %s instead" % ( - alias, value.__class__.__name__)) - - # Configure job stores - self._jobstores.clear() - for alias, value in six.iteritems(config.get('jobstores', {})): - if isinstance(value, BaseJobStore): - self.add_jobstore(value, alias) - elif isinstance(value, MutableMapping): - jobstore_class = value.pop('class', None) - plugin = value.pop('type', None) - if plugin: - jobstore = self._create_plugin_instance('jobstore', plugin, value) - elif jobstore_class: - cls = maybe_ref(jobstore_class) - jobstore = cls(**value) - else: - raise ValueError('Cannot create job store "%s" -- either "type" or "class" must be defined' % alias) - - self.add_jobstore(jobstore, alias) - else: - raise TypeError("Expected job store instance or dict for jobstores['%s'], got %s instead" % ( - alias, value.__class__.__name__)) - - def _create_default_executor(self): - """Creates a default executor store, specific to the particular scheduler type.""" - - return ThreadPoolExecutor() - - def _create_default_jobstore(self): - """Creates a default job store, specific to the particular scheduler type.""" - - return MemoryJobStore() - - def _lookup_executor(self, alias): - """ - Returns the executor instance by the given name from the list of executors that were added to this scheduler. - - :type alias: str - :raises KeyError: if no executor by the given alias is not found - """ - - try: - return self._executors[alias] - except KeyError: - raise KeyError('No such executor: %s' % alias) - - def _lookup_jobstore(self, alias): - """ - Returns the job store instance by the given name from the list of job stores that were added to this scheduler. - - :type alias: str - :raises KeyError: if no job store by the given alias is not found - """ - - try: - return self._jobstores[alias] - except KeyError: - raise KeyError('No such job store: %s' % alias) - - def _lookup_job(self, job_id, jobstore_alias): - """ - Finds a job by its ID. - - :type job_id: str - :param str jobstore_alias: alias of a job store to look in - :return tuple[Job, str]: a tuple of job, jobstore alias (jobstore alias is None in case of a pending job) - :raises JobLookupError: if no job by the given ID is found. - """ - - # Check if the job is among the pending jobs - for job, alias, replace_existing in self._pending_jobs: - if job.id == job_id: - return job, None - - # Look in all job stores - for alias, store in six.iteritems(self._jobstores): - if jobstore_alias in (None, alias): - job = store.lookup_job(job_id) - if job is not None: - return job, alias - - raise JobLookupError(job_id) - - def _dispatch_event(self, event): - """ - Dispatches the given event to interested listeners. - - :param SchedulerEvent event: the event to send - """ - - with self._listeners_lock: - listeners = tuple(self._listeners) - - for cb, mask in listeners: - if event.code & mask: - try: - cb(event) - except: - self._logger.exception('Error notifying listener') - - def _real_add_job(self, job, jobstore_alias, replace_existing, wakeup): - """ - :param Job job: the job to add - :param bool replace_existing: ``True`` to use update_job() in case the job already exists in the store - :param bool wakeup: ``True`` to wake up the scheduler after adding the job - """ - - # Fill in undefined values with defaults - replacements = {} - for key, value in six.iteritems(self._job_defaults): - if not hasattr(job, key): - replacements[key] = value - - # Calculate the next run time if there is none defined - if not hasattr(job, 'next_run_time'): - now = datetime.now(self.timezone) - replacements['next_run_time'] = job.trigger.get_next_fire_time(None, now) - - # Apply any replacements - job._modify(**replacements) - - # Add the job to the given job store - store = self._lookup_jobstore(jobstore_alias) - try: - store.add_job(job) - except ConflictingIdError: - if replace_existing: - store.update_job(job) - else: - raise - - # Mark the job as no longer pending - job._jobstore_alias = jobstore_alias - - # Notify listeners that a new job has been added - event = JobEvent(EVENT_JOB_ADDED, job.id, jobstore_alias) - self._dispatch_event(event) - - self._logger.info('Added job "%s" to job store "%s"', job.name, jobstore_alias) - - # Notify the scheduler about the new job - if wakeup: - self.wakeup() - - def _create_plugin_instance(self, type_, alias, constructor_kwargs): - """Creates an instance of the given plugin type, loading the plugin first if necessary.""" - - plugin_container, class_container, base_class = { - 'trigger': (self._trigger_plugins, self._trigger_classes, BaseTrigger), - 'jobstore': (self._jobstore_plugins, self._jobstore_classes, BaseJobStore), - 'executor': (self._executor_plugins, self._executor_classes, BaseExecutor) - }[type_] - - try: - plugin_cls = class_container[alias] - except KeyError: - if alias in plugin_container: - plugin_cls = class_container[alias] = plugin_container[alias].load() - if not issubclass(plugin_cls, base_class): - raise TypeError('The {0} entry point does not point to a {0} class'.format(type_)) - else: - raise LookupError('No {0} by the name "{1}" was found'.format(type_, alias)) - - return plugin_cls(**constructor_kwargs) - - def _create_trigger(self, trigger, trigger_args): - if isinstance(trigger, BaseTrigger): - return trigger - elif trigger is None: - trigger = 'date' - elif not isinstance(trigger, six.string_types): - raise TypeError('Expected a trigger instance or string, got %s instead' % trigger.__class__.__name__) - - # Use the scheduler's time zone if nothing else is specified - trigger_args.setdefault('timezone', self.timezone) - - # Instantiate the trigger class - return self._create_plugin_instance('trigger', trigger, trigger_args) - - def _create_lock(self): - """Creates a reentrant lock object.""" - - return RLock() - - def _process_jobs(self): - """ - Iterates through jobs in every jobstore, starts jobs that are due and figures out how long to wait for the next - round. - """ - - self._logger.debug('Looking for jobs to run') - now = datetime.now(self.timezone) - next_wakeup_time = None - - with self._jobstores_lock: - for jobstore_alias, jobstore in six.iteritems(self._jobstores): - for job in jobstore.get_due_jobs(now): - # Look up the job's executor - try: - executor = self._lookup_executor(job.executor) - except: - self._logger.error( - 'Executor lookup ("%s") failed for job "%s" -- removing it from the job store', - job.executor, job) - self.remove_job(job.id, jobstore_alias) - continue - - run_times = job._get_run_times(now) - run_times = run_times[-1:] if run_times and job.coalesce else run_times - if run_times: - try: - executor.submit_job(job, run_times) - except MaxInstancesReachedError: - self._logger.warning( - 'Execution of job "%s" skipped: maximum number of running instances reached (%d)', - job, job.max_instances) - except: - self._logger.exception('Error submitting job "%s" to executor "%s"', job, job.executor) - - # Update the job if it has a next execution time. Otherwise remove it from the job store. - job_next_run = job.trigger.get_next_fire_time(run_times[-1], now) - if job_next_run: - job._modify(next_run_time=job_next_run) - jobstore.update_job(job) - else: - self.remove_job(job.id, jobstore_alias) - - # Set a new next wakeup time if there isn't one yet or the jobstore has an even earlier one - jobstore_next_run_time = jobstore.get_next_run_time() - if jobstore_next_run_time and (next_wakeup_time is None or jobstore_next_run_time < next_wakeup_time): - next_wakeup_time = jobstore_next_run_time - - # Determine the delay until this method should be called again - if next_wakeup_time is not None: - wait_seconds = max(timedelta_seconds(next_wakeup_time - now), 0) - self._logger.debug('Next wakeup is due at %s (in %f seconds)', next_wakeup_time, wait_seconds) - else: - wait_seconds = None - self._logger.debug('No jobs; waiting until a job is added') - - return wait_seconds diff --git a/lib/apscheduler/schedulers/blocking.py b/lib/apscheduler/schedulers/blocking.py deleted file mode 100644 index 2720822c..00000000 --- a/lib/apscheduler/schedulers/blocking.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import absolute_import -from threading import Event - -from apscheduler.schedulers.base import BaseScheduler - - -class BlockingScheduler(BaseScheduler): - """ - A scheduler that runs in the foreground (:meth:`~apscheduler.schedulers.base.BaseScheduler.start` will block). - """ - - MAX_WAIT_TIME = 4294967 # Maximum value accepted by Event.wait() on Windows - - _event = None - - def start(self): - super(BlockingScheduler, self).start() - self._event = Event() - self._main_loop() - - def shutdown(self, wait=True): - super(BlockingScheduler, self).shutdown(wait) - self._event.set() - - def _main_loop(self): - while self.running: - wait_seconds = self._process_jobs() - self._event.wait(wait_seconds if wait_seconds is not None else self.MAX_WAIT_TIME) - self._event.clear() - - def wakeup(self): - self._event.set() diff --git a/lib/apscheduler/schedulers/gevent.py b/lib/apscheduler/schedulers/gevent.py deleted file mode 100644 index 9cce6589..00000000 --- a/lib/apscheduler/schedulers/gevent.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import absolute_import - -from apscheduler.schedulers.blocking import BlockingScheduler -from apscheduler.schedulers.base import BaseScheduler - -try: - from gevent.event import Event - from gevent.lock import RLock - import gevent -except ImportError: # pragma: nocover - raise ImportError('GeventScheduler requires gevent installed') - - -class GeventScheduler(BlockingScheduler): - """A scheduler that runs as a Gevent greenlet.""" - - _greenlet = None - - def start(self): - BaseScheduler.start(self) - self._event = Event() - self._greenlet = gevent.spawn(self._main_loop) - return self._greenlet - - def shutdown(self, wait=True): - super(GeventScheduler, self).shutdown(wait) - self._greenlet.join() - del self._greenlet - - def _create_lock(self): - return RLock() - - def _create_default_executor(self): - from apscheduler.executors.gevent import GeventExecutor - return GeventExecutor() diff --git a/lib/apscheduler/schedulers/qt.py b/lib/apscheduler/schedulers/qt.py deleted file mode 100644 index dde5afaa..00000000 --- a/lib/apscheduler/schedulers/qt.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import absolute_import - -from apscheduler.schedulers.base import BaseScheduler - -try: - from PyQt5.QtCore import QObject, QTimer -except ImportError: # pragma: nocover - try: - from PyQt4.QtCore import QObject, QTimer - except ImportError: - try: - from PySide.QtCore import QObject, QTimer # flake8: noqa - except ImportError: - raise ImportError('QtScheduler requires either PyQt5, PyQt4 or PySide installed') - - -class QtScheduler(BaseScheduler): - """A scheduler that runs in a Qt event loop.""" - - _timer = None - - def start(self): - super(QtScheduler, self).start() - self.wakeup() - - def shutdown(self, wait=True): - super(QtScheduler, self).shutdown(wait) - self._stop_timer() - - def _start_timer(self, wait_seconds): - self._stop_timer() - if wait_seconds is not None: - self._timer = QTimer.singleShot(wait_seconds * 1000, self._process_jobs) - - def _stop_timer(self): - if self._timer: - if self._timer.isActive(): - self._timer.stop() - del self._timer - - def wakeup(self): - self._start_timer(0) - - def _process_jobs(self): - wait_seconds = super(QtScheduler, self)._process_jobs() - self._start_timer(wait_seconds) diff --git a/lib/apscheduler/schedulers/tornado.py b/lib/apscheduler/schedulers/tornado.py deleted file mode 100644 index 78093308..00000000 --- a/lib/apscheduler/schedulers/tornado.py +++ /dev/null @@ -1,60 +0,0 @@ -from __future__ import absolute_import -from datetime import timedelta -from functools import wraps - -from apscheduler.schedulers.base import BaseScheduler -from apscheduler.util import maybe_ref - -try: - from tornado.ioloop import IOLoop -except ImportError: # pragma: nocover - raise ImportError('TornadoScheduler requires tornado installed') - - -def run_in_ioloop(func): - @wraps(func) - def wrapper(self, *args, **kwargs): - self._ioloop.add_callback(func, self, *args, **kwargs) - return wrapper - - -class TornadoScheduler(BaseScheduler): - """ - A scheduler that runs on a Tornado IOLoop. - - =========== =============================================================== - ``io_loop`` Tornado IOLoop instance to use (defaults to the global IO loop) - =========== =============================================================== - """ - - _ioloop = None - _timeout = None - - def start(self): - super(TornadoScheduler, self).start() - self.wakeup() - - @run_in_ioloop - def shutdown(self, wait=True): - super(TornadoScheduler, self).shutdown(wait) - self._stop_timer() - - def _configure(self, config): - self._ioloop = maybe_ref(config.pop('io_loop', None)) or IOLoop.current() - super(TornadoScheduler, self)._configure(config) - - def _start_timer(self, wait_seconds): - self._stop_timer() - if wait_seconds is not None: - self._timeout = self._ioloop.add_timeout(timedelta(seconds=wait_seconds), self.wakeup) - - def _stop_timer(self): - if self._timeout: - self._ioloop.remove_timeout(self._timeout) - del self._timeout - - @run_in_ioloop - def wakeup(self): - self._stop_timer() - wait_seconds = self._process_jobs() - self._start_timer(wait_seconds) diff --git a/lib/apscheduler/schedulers/twisted.py b/lib/apscheduler/schedulers/twisted.py deleted file mode 100644 index 166b6130..00000000 --- a/lib/apscheduler/schedulers/twisted.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import absolute_import -from functools import wraps - -from apscheduler.schedulers.base import BaseScheduler -from apscheduler.util import maybe_ref - -try: - from twisted.internet import reactor as default_reactor -except ImportError: # pragma: nocover - raise ImportError('TwistedScheduler requires Twisted installed') - - -def run_in_reactor(func): - @wraps(func) - def wrapper(self, *args, **kwargs): - self._reactor.callFromThread(func, self, *args, **kwargs) - return wrapper - - -class TwistedScheduler(BaseScheduler): - """ - A scheduler that runs on a Twisted reactor. - - Extra options: - - =========== ======================================================== - ``reactor`` Reactor instance to use (defaults to the global reactor) - =========== ======================================================== - """ - - _reactor = None - _delayedcall = None - - def _configure(self, config): - self._reactor = maybe_ref(config.pop('reactor', default_reactor)) - super(TwistedScheduler, self)._configure(config) - - def start(self): - super(TwistedScheduler, self).start() - self.wakeup() - - @run_in_reactor - def shutdown(self, wait=True): - super(TwistedScheduler, self).shutdown(wait) - self._stop_timer() - - def _start_timer(self, wait_seconds): - self._stop_timer() - if wait_seconds is not None: - self._delayedcall = self._reactor.callLater(wait_seconds, self.wakeup) - - def _stop_timer(self): - if self._delayedcall and self._delayedcall.active(): - self._delayedcall.cancel() - del self._delayedcall - - @run_in_reactor - def wakeup(self): - self._stop_timer() - wait_seconds = self._process_jobs() - self._start_timer(wait_seconds) - - def _create_default_executor(self): - from apscheduler.executors.twisted import TwistedExecutor - return TwistedExecutor() diff --git a/lib/apscheduler/triggers/__init__.py b/lib/apscheduler/triggers/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/apscheduler/triggers/base.py b/lib/apscheduler/triggers/base.py deleted file mode 100644 index 3520d316..00000000 --- a/lib/apscheduler/triggers/base.py +++ /dev/null @@ -1,16 +0,0 @@ -from abc import ABCMeta, abstractmethod - -import six - - -class BaseTrigger(six.with_metaclass(ABCMeta)): - """Abstract base class that defines the interface that every trigger must implement.""" - - @abstractmethod - def get_next_fire_time(self, previous_fire_time, now): - """ - Returns the next datetime to fire on, If no such datetime can be calculated, returns ``None``. - - :param datetime.datetime previous_fire_time: the previous time the trigger was fired - :param datetime.datetime now: current datetime - """ diff --git a/lib/apscheduler/triggers/cron/__init__.py b/lib/apscheduler/triggers/cron/__init__.py deleted file mode 100644 index 8df901ea..00000000 --- a/lib/apscheduler/triggers/cron/__init__.py +++ /dev/null @@ -1,176 +0,0 @@ -from datetime import datetime, timedelta - -from tzlocal import get_localzone -import six - -from apscheduler.triggers.base import BaseTrigger -from apscheduler.triggers.cron.fields import BaseField, WeekField, DayOfMonthField, DayOfWeekField, DEFAULT_VALUES -from apscheduler.util import datetime_ceil, convert_to_datetime, datetime_repr, astimezone - - -class CronTrigger(BaseTrigger): - """ - Triggers when current time matches all specified time constraints, similarly to how the UNIX cron scheduler works. - - :param int|str year: 4-digit year - :param int|str month: month (1-12) - :param int|str day: day of the (1-31) - :param int|str week: ISO week (1-53) - :param int|str day_of_week: number or name of weekday (0-6 or mon,tue,wed,thu,fri,sat,sun) - :param int|str hour: hour (0-23) - :param int|str minute: minute (0-59) - :param int|str second: second (0-59) - :param datetime|str start_date: earliest possible date/time to trigger on (inclusive) - :param datetime|str end_date: latest possible date/time to trigger on (inclusive) - :param datetime.tzinfo|str timezone: time zone to use for the date/time calculations - (defaults to scheduler timezone) - - .. note:: The first weekday is always **monday**. - """ - - FIELD_NAMES = ('year', 'month', 'day', 'week', 'day_of_week', 'hour', 'minute', 'second') - FIELDS_MAP = { - 'year': BaseField, - 'month': BaseField, - 'week': WeekField, - 'day': DayOfMonthField, - 'day_of_week': DayOfWeekField, - 'hour': BaseField, - 'minute': BaseField, - 'second': BaseField - } - - __slots__ = 'timezone', 'start_date', 'end_date', 'fields' - - def __init__(self, year=None, month=None, day=None, week=None, day_of_week=None, hour=None, minute=None, - second=None, start_date=None, end_date=None, timezone=None): - if timezone: - self.timezone = astimezone(timezone) - elif start_date and start_date.tzinfo: - self.timezone = start_date.tzinfo - elif end_date and end_date.tzinfo: - self.timezone = end_date.tzinfo - else: - self.timezone = get_localzone() - - self.start_date = convert_to_datetime(start_date, self.timezone, 'start_date') - self.end_date = convert_to_datetime(end_date, self.timezone, 'end_date') - - values = dict((key, value) for (key, value) in six.iteritems(locals()) - if key in self.FIELD_NAMES and value is not None) - self.fields = [] - assign_defaults = False - for field_name in self.FIELD_NAMES: - if field_name in values: - exprs = values.pop(field_name) - is_default = False - assign_defaults = not values - elif assign_defaults: - exprs = DEFAULT_VALUES[field_name] - is_default = True - else: - exprs = '*' - is_default = True - - field_class = self.FIELDS_MAP[field_name] - field = field_class(field_name, exprs, is_default) - self.fields.append(field) - - def _increment_field_value(self, dateval, fieldnum): - """ - Increments the designated field and resets all less significant fields to their minimum values. - - :type dateval: datetime - :type fieldnum: int - :return: a tuple containing the new date, and the number of the field that was actually incremented - :rtype: tuple - """ - - values = {} - i = 0 - while i < len(self.fields): - field = self.fields[i] - if not field.REAL: - if i == fieldnum: - fieldnum -= 1 - i -= 1 - else: - i += 1 - continue - - if i < fieldnum: - values[field.name] = field.get_value(dateval) - i += 1 - elif i > fieldnum: - values[field.name] = field.get_min(dateval) - i += 1 - else: - value = field.get_value(dateval) - maxval = field.get_max(dateval) - if value == maxval: - fieldnum -= 1 - i -= 1 - else: - values[field.name] = value + 1 - i += 1 - - difference = datetime(**values) - dateval.replace(tzinfo=None) - return self.timezone.normalize(dateval + difference), fieldnum - - def _set_field_value(self, dateval, fieldnum, new_value): - values = {} - for i, field in enumerate(self.fields): - if field.REAL: - if i < fieldnum: - values[field.name] = field.get_value(dateval) - elif i > fieldnum: - values[field.name] = field.get_min(dateval) - else: - values[field.name] = new_value - - difference = datetime(**values) - dateval.replace(tzinfo=None) - return self.timezone.normalize(dateval + difference) - - def get_next_fire_time(self, previous_fire_time, now): - if previous_fire_time: - start_date = max(now, previous_fire_time + timedelta(microseconds=1)) - else: - start_date = max(now, self.start_date) if self.start_date else now - - fieldnum = 0 - next_date = datetime_ceil(start_date).astimezone(self.timezone) - while 0 <= fieldnum < len(self.fields): - field = self.fields[fieldnum] - curr_value = field.get_value(next_date) - next_value = field.get_next_value(next_date) - - if next_value is None: - # No valid value was found - next_date, fieldnum = self._increment_field_value(next_date, fieldnum - 1) - elif next_value > curr_value: - # A valid, but higher than the starting value, was found - if field.REAL: - next_date = self._set_field_value(next_date, fieldnum, next_value) - fieldnum += 1 - else: - next_date, fieldnum = self._increment_field_value(next_date, fieldnum) - else: - # A valid value was found, no changes necessary - fieldnum += 1 - - # Return if the date has rolled past the end date - if self.end_date and next_date > self.end_date: - return None - - if fieldnum >= 0: - return next_date - - def __str__(self): - options = ["%s='%s'" % (f.name, f) for f in self.fields if not f.is_default] - return 'cron[%s]' % (', '.join(options)) - - def __repr__(self): - options = ["%s='%s'" % (f.name, f) for f in self.fields if not f.is_default] - if self.start_date: - options.append("start_date='%s'" % datetime_repr(self.start_date)) - return '<%s (%s)>' % (self.__class__.__name__, ', '.join(options)) diff --git a/lib/apscheduler/triggers/cron/expressions.py b/lib/apscheduler/triggers/cron/expressions.py deleted file mode 100644 index 55272db4..00000000 --- a/lib/apscheduler/triggers/cron/expressions.py +++ /dev/null @@ -1,188 +0,0 @@ -""" -This module contains the expressions applicable for CronTrigger's fields. -""" - -from calendar import monthrange -import re - -from apscheduler.util import asint - -__all__ = ('AllExpression', 'RangeExpression', 'WeekdayRangeExpression', 'WeekdayPositionExpression', - 'LastDayOfMonthExpression') - - -WEEKDAYS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] - - -class AllExpression(object): - value_re = re.compile(r'\*(?:/(?P\d+))?$') - - def __init__(self, step=None): - self.step = asint(step) - if self.step == 0: - raise ValueError('Increment must be higher than 0') - - def get_next_value(self, date, field): - start = field.get_value(date) - minval = field.get_min(date) - maxval = field.get_max(date) - start = max(start, minval) - - if not self.step: - next = start - else: - distance_to_next = (self.step - (start - minval)) % self.step - next = start + distance_to_next - - if next <= maxval: - return next - - def __str__(self): - if self.step: - return '*/%d' % self.step - return '*' - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self.step) - - -class RangeExpression(AllExpression): - value_re = re.compile( - r'(?P\d+)(?:-(?P\d+))?(?:/(?P\d+))?$') - - def __init__(self, first, last=None, step=None): - AllExpression.__init__(self, step) - first = asint(first) - last = asint(last) - if last is None and step is None: - last = first - if last is not None and first > last: - raise ValueError('The minimum value in a range must not be higher than the maximum') - self.first = first - self.last = last - - def get_next_value(self, date, field): - start = field.get_value(date) - minval = field.get_min(date) - maxval = field.get_max(date) - - # Apply range limits - minval = max(minval, self.first) - if self.last is not None: - maxval = min(maxval, self.last) - start = max(start, minval) - - if not self.step: - next = start - else: - distance_to_next = (self.step - (start - minval)) % self.step - next = start + distance_to_next - - if next <= maxval: - return next - - def __str__(self): - if self.last != self.first and self.last is not None: - range = '%d-%d' % (self.first, self.last) - else: - range = str(self.first) - - if self.step: - return '%s/%d' % (range, self.step) - return range - - def __repr__(self): - args = [str(self.first)] - if self.last != self.first and self.last is not None or self.step: - args.append(str(self.last)) - if self.step: - args.append(str(self.step)) - return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) - - -class WeekdayRangeExpression(RangeExpression): - value_re = re.compile(r'(?P[a-z]+)(?:-(?P[a-z]+))?', re.IGNORECASE) - - def __init__(self, first, last=None): - try: - first_num = WEEKDAYS.index(first.lower()) - except ValueError: - raise ValueError('Invalid weekday name "%s"' % first) - - if last: - try: - last_num = WEEKDAYS.index(last.lower()) - except ValueError: - raise ValueError('Invalid weekday name "%s"' % last) - else: - last_num = None - - RangeExpression.__init__(self, first_num, last_num) - - def __str__(self): - if self.last != self.first and self.last is not None: - return '%s-%s' % (WEEKDAYS[self.first], WEEKDAYS[self.last]) - return WEEKDAYS[self.first] - - def __repr__(self): - args = ["'%s'" % WEEKDAYS[self.first]] - if self.last != self.first and self.last is not None: - args.append("'%s'" % WEEKDAYS[self.last]) - return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) - - -class WeekdayPositionExpression(AllExpression): - options = ['1st', '2nd', '3rd', '4th', '5th', 'last'] - value_re = re.compile(r'(?P%s) +(?P(?:\d+|\w+))' % '|'.join(options), re.IGNORECASE) - - def __init__(self, option_name, weekday_name): - try: - self.option_num = self.options.index(option_name.lower()) - except ValueError: - raise ValueError('Invalid weekday position "%s"' % option_name) - - try: - self.weekday = WEEKDAYS.index(weekday_name.lower()) - except ValueError: - raise ValueError('Invalid weekday name "%s"' % weekday_name) - - def get_next_value(self, date, field): - # Figure out the weekday of the month's first day and the number - # of days in that month - first_day_wday, last_day = monthrange(date.year, date.month) - - # Calculate which day of the month is the first of the target weekdays - first_hit_day = self.weekday - first_day_wday + 1 - if first_hit_day <= 0: - first_hit_day += 7 - - # Calculate what day of the month the target weekday would be - if self.option_num < 5: - target_day = first_hit_day + self.option_num * 7 - else: - target_day = first_hit_day + ((last_day - first_hit_day) / 7) * 7 - - if target_day <= last_day and target_day >= date.day: - return target_day - - def __str__(self): - return '%s %s' % (self.options[self.option_num], WEEKDAYS[self.weekday]) - - def __repr__(self): - return "%s('%s', '%s')" % (self.__class__.__name__, self.options[self.option_num], WEEKDAYS[self.weekday]) - - -class LastDayOfMonthExpression(AllExpression): - value_re = re.compile(r'last', re.IGNORECASE) - - def __init__(self): - pass - - def get_next_value(self, date, field): - return monthrange(date.year, date.month)[1] - - def __str__(self): - return 'last' - - def __repr__(self): - return "%s()" % self.__class__.__name__ diff --git a/lib/apscheduler/triggers/cron/fields.py b/lib/apscheduler/triggers/cron/fields.py deleted file mode 100644 index e220599f..00000000 --- a/lib/apscheduler/triggers/cron/fields.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Fields represent CronTrigger options which map to :class:`~datetime.datetime` -fields. -""" - -from calendar import monthrange - -from apscheduler.triggers.cron.expressions import ( - AllExpression, RangeExpression, WeekdayPositionExpression, LastDayOfMonthExpression, WeekdayRangeExpression) - - -__all__ = ('MIN_VALUES', 'MAX_VALUES', 'DEFAULT_VALUES', 'BaseField', 'WeekField', 'DayOfMonthField', 'DayOfWeekField') - - -MIN_VALUES = {'year': 1970, 'month': 1, 'day': 1, 'week': 1, 'day_of_week': 0, 'hour': 0, 'minute': 0, 'second': 0} -MAX_VALUES = {'year': 2 ** 63, 'month': 12, 'day:': 31, 'week': 53, 'day_of_week': 6, 'hour': 23, 'minute': 59, - 'second': 59} -DEFAULT_VALUES = {'year': '*', 'month': 1, 'day': 1, 'week': '*', 'day_of_week': '*', 'hour': 0, 'minute': 0, - 'second': 0} - - -class BaseField(object): - REAL = True - COMPILERS = [AllExpression, RangeExpression] - - def __init__(self, name, exprs, is_default=False): - self.name = name - self.is_default = is_default - self.compile_expressions(exprs) - - def get_min(self, dateval): - return MIN_VALUES[self.name] - - def get_max(self, dateval): - return MAX_VALUES[self.name] - - def get_value(self, dateval): - return getattr(dateval, self.name) - - def get_next_value(self, dateval): - smallest = None - for expr in self.expressions: - value = expr.get_next_value(dateval, self) - if smallest is None or (value is not None and value < smallest): - smallest = value - - return smallest - - def compile_expressions(self, exprs): - self.expressions = [] - - # Split a comma-separated expression list, if any - exprs = str(exprs).strip() - if ',' in exprs: - for expr in exprs.split(','): - self.compile_expression(expr) - else: - self.compile_expression(exprs) - - def compile_expression(self, expr): - for compiler in self.COMPILERS: - match = compiler.value_re.match(expr) - if match: - compiled_expr = compiler(**match.groupdict()) - self.expressions.append(compiled_expr) - return - - raise ValueError('Unrecognized expression "%s" for field "%s"' % (expr, self.name)) - - def __str__(self): - expr_strings = (str(e) for e in self.expressions) - return ','.join(expr_strings) - - def __repr__(self): - return "%s('%s', '%s')" % (self.__class__.__name__, self.name, self) - - -class WeekField(BaseField): - REAL = False - - def get_value(self, dateval): - return dateval.isocalendar()[1] - - -class DayOfMonthField(BaseField): - COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression, LastDayOfMonthExpression] - - def get_max(self, dateval): - return monthrange(dateval.year, dateval.month)[1] - - -class DayOfWeekField(BaseField): - REAL = False - COMPILERS = BaseField.COMPILERS + [WeekdayRangeExpression] - - def get_value(self, dateval): - return dateval.weekday() diff --git a/lib/apscheduler/triggers/date.py b/lib/apscheduler/triggers/date.py deleted file mode 100644 index 237e6b4e..00000000 --- a/lib/apscheduler/triggers/date.py +++ /dev/null @@ -1,30 +0,0 @@ -from datetime import datetime - -from tzlocal import get_localzone - -from apscheduler.triggers.base import BaseTrigger -from apscheduler.util import convert_to_datetime, datetime_repr, astimezone - - -class DateTrigger(BaseTrigger): - """ - Triggers once on the given datetime. If ``run_date`` is left empty, current time is used. - - :param datetime|str run_date: the date/time to run the job at - :param datetime.tzinfo|str timezone: time zone for ``run_date`` if it doesn't have one already - """ - - __slots__ = 'timezone', 'run_date' - - def __init__(self, run_date=None, timezone=None): - timezone = astimezone(timezone) or get_localzone() - self.run_date = convert_to_datetime(run_date or datetime.now(), timezone, 'run_date') - - def get_next_fire_time(self, previous_fire_time, now): - return self.run_date if previous_fire_time is None else None - - def __str__(self): - return 'date[%s]' % datetime_repr(self.run_date) - - def __repr__(self): - return "<%s (run_date='%s')>" % (self.__class__.__name__, datetime_repr(self.run_date)) diff --git a/lib/apscheduler/triggers/interval.py b/lib/apscheduler/triggers/interval.py deleted file mode 100644 index df9e6fe7..00000000 --- a/lib/apscheduler/triggers/interval.py +++ /dev/null @@ -1,65 +0,0 @@ -from datetime import timedelta, datetime -from math import ceil - -from tzlocal import get_localzone - -from apscheduler.triggers.base import BaseTrigger -from apscheduler.util import convert_to_datetime, timedelta_seconds, datetime_repr, astimezone - - -class IntervalTrigger(BaseTrigger): - """ - Triggers on specified intervals, starting on ``start_date`` if specified, ``datetime.now()`` + interval - otherwise. - - :param int weeks: number of weeks to wait - :param int days: number of days to wait - :param int hours: number of hours to wait - :param int minutes: number of minutes to wait - :param int seconds: number of seconds to wait - :param datetime|str start_date: starting point for the interval calculation - :param datetime|str end_date: latest possible date/time to trigger on - :param datetime.tzinfo|str timezone: time zone to use for the date/time calculations - """ - - __slots__ = 'timezone', 'start_date', 'end_date', 'interval' - - def __init__(self, weeks=0, days=0, hours=0, minutes=0, seconds=0, start_date=None, end_date=None, timezone=None): - self.interval = timedelta(weeks=weeks, days=days, hours=hours, minutes=minutes, seconds=seconds) - self.interval_length = timedelta_seconds(self.interval) - if self.interval_length == 0: - self.interval = timedelta(seconds=1) - self.interval_length = 1 - - if timezone: - self.timezone = astimezone(timezone) - elif start_date and start_date.tzinfo: - self.timezone = start_date.tzinfo - elif end_date and end_date.tzinfo: - self.timezone = end_date.tzinfo - else: - self.timezone = get_localzone() - - start_date = start_date or (datetime.now(self.timezone) + self.interval) - self.start_date = convert_to_datetime(start_date, self.timezone, 'start_date') - self.end_date = convert_to_datetime(end_date, self.timezone, 'end_date') - - def get_next_fire_time(self, previous_fire_time, now): - if previous_fire_time: - next_fire_time = previous_fire_time + self.interval - elif self.start_date > now: - next_fire_time = self.start_date - else: - timediff_seconds = timedelta_seconds(now - self.start_date) - next_interval_num = int(ceil(timediff_seconds / self.interval_length)) - next_fire_time = self.start_date + self.interval * next_interval_num - - if not self.end_date or next_fire_time <= self.end_date: - return self.timezone.normalize(next_fire_time) - - def __str__(self): - return 'interval[%s]' % str(self.interval) - - def __repr__(self): - return "<%s (interval=%r, start_date='%s')>" % (self.__class__.__name__, self.interval, - datetime_repr(self.start_date)) diff --git a/lib/apscheduler/util.py b/lib/apscheduler/util.py deleted file mode 100644 index 988f9427..00000000 --- a/lib/apscheduler/util.py +++ /dev/null @@ -1,385 +0,0 @@ -"""This module contains several handy functions primarily meant for internal use.""" - -from __future__ import division -from datetime import date, datetime, time, timedelta, tzinfo -from inspect import isfunction, ismethod, getargspec -from calendar import timegm -import re - -from pytz import timezone, utc -import six - -try: - from inspect import signature -except ImportError: # pragma: nocover - try: - from funcsigs import signature - except ImportError: - signature = None - -__all__ = ('asint', 'asbool', 'astimezone', 'convert_to_datetime', 'datetime_to_utc_timestamp', - 'utc_timestamp_to_datetime', 'timedelta_seconds', 'datetime_ceil', 'get_callable_name', 'obj_to_ref', - 'ref_to_obj', 'maybe_ref', 'repr_escape', 'check_callable_args') - - -class _Undefined(object): - def __nonzero__(self): - return False - - def __bool__(self): - return False - - def __repr__(self): - return '' - -undefined = _Undefined() #: a unique object that only signifies that no value is defined - - -def asint(text): - """ - Safely converts a string to an integer, returning None if the string is None. - - :type text: str - :rtype: int - """ - - if text is not None: - return int(text) - - -def asbool(obj): - """ - Interprets an object as a boolean value. - - :rtype: bool - """ - - if isinstance(obj, str): - obj = obj.strip().lower() - if obj in ('true', 'yes', 'on', 'y', 't', '1'): - return True - if obj in ('false', 'no', 'off', 'n', 'f', '0'): - return False - raise ValueError('Unable to interpret value "%s" as boolean' % obj) - return bool(obj) - - -def astimezone(obj): - """ - Interprets an object as a timezone. - - :rtype: tzinfo - """ - - if isinstance(obj, six.string_types): - return timezone(obj) - if isinstance(obj, tzinfo): - if not hasattr(obj, 'localize') or not hasattr(obj, 'normalize'): - raise TypeError('Only timezones from the pytz library are supported') - if obj.zone == 'local': - raise ValueError('Unable to determine the name of the local timezone -- use an explicit timezone instead') - return obj - if obj is not None: - raise TypeError('Expected tzinfo, got %s instead' % obj.__class__.__name__) - - -_DATE_REGEX = re.compile( - r'(?P\d{4})-(?P\d{1,2})-(?P\d{1,2})' - r'(?: (?P\d{1,2}):(?P\d{1,2}):(?P\d{1,2})' - r'(?:\.(?P\d{1,6}))?)?') - - -def convert_to_datetime(input, tz, arg_name): - """ - Converts the given object to a timezone aware datetime object. - If a timezone aware datetime object is passed, it is returned unmodified. - If a native datetime object is passed, it is given the specified timezone. - If the input is a string, it is parsed as a datetime with the given timezone. - - Date strings are accepted in three different forms: date only (Y-m-d), - date with time (Y-m-d H:M:S) or with date+time with microseconds - (Y-m-d H:M:S.micro). - - :param str|datetime input: the datetime or string to convert to a timezone aware datetime - :param datetime.tzinfo tz: timezone to interpret ``input`` in - :param str arg_name: the name of the argument (used in an error message) - :rtype: datetime - """ - - if input is None: - return - elif isinstance(input, datetime): - datetime_ = input - elif isinstance(input, date): - datetime_ = datetime.combine(input, time()) - elif isinstance(input, six.string_types): - m = _DATE_REGEX.match(input) - if not m: - raise ValueError('Invalid date string') - values = [(k, int(v or 0)) for k, v in m.groupdict().items()] - values = dict(values) - datetime_ = datetime(**values) - else: - raise TypeError('Unsupported type for %s: %s' % (arg_name, input.__class__.__name__)) - - if datetime_.tzinfo is not None: - return datetime_ - if tz is None: - raise ValueError('The "tz" argument must be specified if %s has no timezone information' % arg_name) - if isinstance(tz, six.string_types): - tz = timezone(tz) - - try: - return tz.localize(datetime_, is_dst=None) - except AttributeError: - raise TypeError('Only pytz timezones are supported (need the localize() and normalize() methods)') - - -def datetime_to_utc_timestamp(timeval): - """ - Converts a datetime instance to a timestamp. - - :type timeval: datetime - :rtype: float - """ - - if timeval is not None: - return timegm(timeval.utctimetuple()) + timeval.microsecond / 1000000 - - -def utc_timestamp_to_datetime(timestamp): - """ - Converts the given timestamp to a datetime instance. - - :type timestamp: float - :rtype: datetime - """ - - if timestamp is not None: - return datetime.fromtimestamp(timestamp, utc) - - -def timedelta_seconds(delta): - """ - Converts the given timedelta to seconds. - - :type delta: timedelta - :rtype: float - """ - - return delta.days * 24 * 60 * 60 + delta.seconds + \ - delta.microseconds / 1000000.0 - - -def datetime_ceil(dateval): - """ - Rounds the given datetime object upwards. - - :type dateval: datetime - """ - - if dateval.microsecond > 0: - return dateval + timedelta(seconds=1, microseconds=-dateval.microsecond) - return dateval - - -def datetime_repr(dateval): - return dateval.strftime('%Y-%m-%d %H:%M:%S %Z') if dateval else 'None' - - -def get_callable_name(func): - """ - Returns the best available display name for the given function/callable. - - :rtype: str - """ - - # the easy case (on Python 3.3+) - if hasattr(func, '__qualname__'): - return func.__qualname__ - - # class methods, bound and unbound methods - f_self = getattr(func, '__self__', None) or getattr(func, 'im_self', None) - if f_self and hasattr(func, '__name__'): - f_class = f_self if isinstance(f_self, type) else f_self.__class__ - else: - f_class = getattr(func, 'im_class', None) - - if f_class and hasattr(func, '__name__'): - return '%s.%s' % (f_class.__name__, func.__name__) - - # class or class instance - if hasattr(func, '__call__'): - # class - if hasattr(func, '__name__'): - return func.__name__ - - # instance of a class with a __call__ method - return func.__class__.__name__ - - raise TypeError('Unable to determine a name for %r -- maybe it is not a callable?' % func) - - -def obj_to_ref(obj): - """ - Returns the path to the given object. - - :rtype: str - """ - - try: - ref = '%s:%s' % (obj.__module__, get_callable_name(obj)) - obj2 = ref_to_obj(ref) - if obj != obj2: - raise ValueError - except Exception: - raise ValueError('Cannot determine the reference to %r' % obj) - - return ref - - -def ref_to_obj(ref): - """ - Returns the object pointed to by ``ref``. - - :type ref: str - """ - - if not isinstance(ref, six.string_types): - raise TypeError('References must be strings') - if ':' not in ref: - raise ValueError('Invalid reference') - - modulename, rest = ref.split(':', 1) - try: - obj = __import__(modulename) - except ImportError: - raise LookupError('Error resolving reference %s: could not import module' % ref) - - try: - for name in modulename.split('.')[1:] + rest.split('.'): - obj = getattr(obj, name) - return obj - except Exception: - raise LookupError('Error resolving reference %s: error looking up object' % ref) - - -def maybe_ref(ref): - """ - Returns the object that the given reference points to, if it is indeed a reference. - If it is not a reference, the object is returned as-is. - """ - - if not isinstance(ref, str): - return ref - return ref_to_obj(ref) - - -if six.PY2: - def repr_escape(string): - if isinstance(string, six.text_type): - return string.encode('ascii', 'backslashreplace') - return string -else: - repr_escape = lambda string: string - - -def check_callable_args(func, args, kwargs): - """ - Ensures that the given callable can be called with the given arguments. - - :type args: tuple - :type kwargs: dict - """ - - pos_kwargs_conflicts = [] # parameters that have a match in both args and kwargs - positional_only_kwargs = [] # positional-only parameters that have a match in kwargs - unsatisfied_args = [] # parameters in signature that don't have a match in args or kwargs - unsatisfied_kwargs = [] # keyword-only arguments that don't have a match in kwargs - unmatched_args = list(args) # args that didn't match any of the parameters in the signature - unmatched_kwargs = list(kwargs) # kwargs that didn't match any of the parameters in the signature - has_varargs = has_var_kwargs = False # indicates if the signature defines *args and **kwargs respectively - - if signature: - try: - sig = signature(func) - except ValueError: - return # signature() doesn't work against every kind of callable - - for param in six.itervalues(sig.parameters): - if param.kind == param.POSITIONAL_OR_KEYWORD: - if param.name in unmatched_kwargs and unmatched_args: - pos_kwargs_conflicts.append(param.name) - elif unmatched_args: - del unmatched_args[0] - elif param.name in unmatched_kwargs: - unmatched_kwargs.remove(param.name) - elif param.default is param.empty: - unsatisfied_args.append(param.name) - elif param.kind == param.POSITIONAL_ONLY: - if unmatched_args: - del unmatched_args[0] - elif param.name in unmatched_kwargs: - unmatched_kwargs.remove(param.name) - positional_only_kwargs.append(param.name) - elif param.default is param.empty: - unsatisfied_args.append(param.name) - elif param.kind == param.KEYWORD_ONLY: - if param.name in unmatched_kwargs: - unmatched_kwargs.remove(param.name) - elif param.default is param.empty: - unsatisfied_kwargs.append(param.name) - elif param.kind == param.VAR_POSITIONAL: - has_varargs = True - elif param.kind == param.VAR_KEYWORD: - has_var_kwargs = True - else: - if not isfunction(func) and not ismethod(func) and hasattr(func, '__call__'): - func = func.__call__ - - try: - argspec = getargspec(func) - except TypeError: - return # getargspec() doesn't work certain callables - - argspec_args = argspec.args if not ismethod(func) else argspec.args[1:] - has_varargs = bool(argspec.varargs) - has_var_kwargs = bool(argspec.keywords) - for arg, default in six.moves.zip_longest(argspec_args, argspec.defaults or (), fillvalue=undefined): - if arg in unmatched_kwargs and unmatched_args: - pos_kwargs_conflicts.append(arg) - elif unmatched_args: - del unmatched_args[0] - elif arg in unmatched_kwargs: - unmatched_kwargs.remove(arg) - elif default is undefined: - unsatisfied_args.append(arg) - - # Make sure there are no conflicts between args and kwargs - if pos_kwargs_conflicts: - raise ValueError('The following arguments are supplied in both args and kwargs: %s' % - ', '.join(pos_kwargs_conflicts)) - - # Check if keyword arguments are being fed to positional-only parameters - if positional_only_kwargs: - raise ValueError('The following arguments cannot be given as keyword arguments: %s' % - ', '.join(positional_only_kwargs)) - - # Check that the number of positional arguments minus the number of matched kwargs matches the argspec - if unsatisfied_args: - raise ValueError('The following arguments have not been supplied: %s' % ', '.join(unsatisfied_args)) - - # Check that all keyword-only arguments have been supplied - if unsatisfied_kwargs: - raise ValueError('The following keyword-only arguments have not been supplied in kwargs: %s' % - ', '.join(unsatisfied_kwargs)) - - # Check that the callable can accept the given number of positional arguments - if not has_varargs and unmatched_args: - raise ValueError('The list of positional arguments is longer than the target callable can handle ' - '(allowed: %d, given in args: %d)' % (len(args) - len(unmatched_args), len(args))) - - # Check that the callable can accept the given keyword arguments - if not has_var_kwargs and unmatched_kwargs: - raise ValueError('The target callable does not accept the following keyword arguments: %s' % - ', '.join(unmatched_kwargs)) diff --git a/lib/argparse.py b/lib/argparse.py deleted file mode 100644 index f0cfe27e..00000000 --- a/lib/argparse.py +++ /dev/null @@ -1,2386 +0,0 @@ -# Author: Steven J. Bethard . - -"""Command-line parsing library - -This module is an optparse-inspired command-line parsing library that: - - - handles both optional and positional arguments - - produces highly informative usage messages - - supports parsers that dispatch to sub-parsers - -The following is a simple usage example that sums integers from the -command-line and writes the result to a file:: - - parser = argparse.ArgumentParser( - description='sum the integers at the command line') - parser.add_argument( - 'integers', metavar='int', nargs='+', type=int, - help='an integer to be summed') - parser.add_argument( - '--log', default=sys.stdout, type=argparse.FileType('w'), - help='the file where the sum should be written') - args = parser.parse_args() - args.log.write('%s' % sum(args.integers)) - args.log.close() - -The module contains the following public classes: - - - ArgumentParser -- The main entry point for command-line parsing. As the - example above shows, the add_argument() method is used to populate - the parser with actions for optional and positional arguments. Then - the parse_args() method is invoked to convert the args at the - command-line into an object with attributes. - - - ArgumentError -- The exception raised by ArgumentParser objects when - there are errors with the parser's actions. Errors raised while - parsing the command-line are caught by ArgumentParser and emitted - as command-line messages. - - - FileType -- A factory for defining types of files to be created. As the - example above shows, instances of FileType are typically passed as - the type= argument of add_argument() calls. - - - Action -- The base class for parser actions. Typically actions are - selected by passing strings like 'store_true' or 'append_const' to - the action= argument of add_argument(). However, for greater - customization of ArgumentParser actions, subclasses of Action may - be defined and passed as the action= argument. - - - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter, - ArgumentDefaultsHelpFormatter -- Formatter classes which - may be passed as the formatter_class= argument to the - ArgumentParser constructor. HelpFormatter is the default, - RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser - not to change the formatting for help text, and - ArgumentDefaultsHelpFormatter adds information about argument defaults - to the help. - -All other classes in this module are considered implementation details. -(Also note that HelpFormatter and RawDescriptionHelpFormatter are only -considered public as object names -- the API of the formatter objects is -still considered an implementation detail.) -""" - -__version__ = '1.1' -__all__ = [ - 'ArgumentParser', - 'ArgumentError', - 'ArgumentTypeError', - 'FileType', - 'HelpFormatter', - 'ArgumentDefaultsHelpFormatter', - 'RawDescriptionHelpFormatter', - 'RawTextHelpFormatter', - 'MetavarTypeHelpFormatter', - 'Namespace', - 'Action', - 'ONE_OR_MORE', - 'OPTIONAL', - 'PARSER', - 'REMAINDER', - 'SUPPRESS', - 'ZERO_OR_MORE', -] - - -import collections as _collections -import copy as _copy -import os as _os -import re as _re -import sys as _sys -import textwrap as _textwrap - -from gettext import gettext as _, ngettext - - -def _callable(obj): - return hasattr(obj, '__call__') or hasattr(obj, '__bases__') - - -SUPPRESS = '==SUPPRESS==' - -OPTIONAL = '?' -ZERO_OR_MORE = '*' -ONE_OR_MORE = '+' -PARSER = 'A...' -REMAINDER = '...' -_UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args' - -# ============================= -# Utility functions and classes -# ============================= - -class _AttributeHolder(object): - """Abstract base class that provides __repr__. - - The __repr__ method returns a string in the format:: - ClassName(attr=name, attr=name, ...) - The attributes are determined either by a class-level attribute, - '_kwarg_names', or by inspecting the instance __dict__. - """ - - def __repr__(self): - type_name = type(self).__name__ - arg_strings = [] - for arg in self._get_args(): - arg_strings.append(repr(arg)) - for name, value in self._get_kwargs(): - arg_strings.append('%s=%r' % (name, value)) - return '%s(%s)' % (type_name, ', '.join(arg_strings)) - - def _get_kwargs(self): - return sorted(self.__dict__.items()) - - def _get_args(self): - return [] - - -def _ensure_value(namespace, name, value): - if getattr(namespace, name, None) is None: - setattr(namespace, name, value) - return getattr(namespace, name) - - -# =============== -# Formatting Help -# =============== - -class HelpFormatter(object): - """Formatter for generating usage messages and argument help strings. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def __init__(self, - prog, - indent_increment=2, - max_help_position=24, - width=None): - - # default setting for width - if width is None: - try: - width = int(_os.environ['COLUMNS']) - except (KeyError, ValueError): - width = 80 - width -= 2 - - self._prog = prog - self._indent_increment = indent_increment - self._max_help_position = max_help_position - self._width = width - - self._current_indent = 0 - self._level = 0 - self._action_max_length = 0 - - self._root_section = self._Section(self, None) - self._current_section = self._root_section - - self._whitespace_matcher = _re.compile(r'\s+') - self._long_break_matcher = _re.compile(r'\n\n\n+') - - # =============================== - # Section and indentation methods - # =============================== - def _indent(self): - self._current_indent += self._indent_increment - self._level += 1 - - def _dedent(self): - self._current_indent -= self._indent_increment - assert self._current_indent >= 0, 'Indent decreased below 0.' - self._level -= 1 - - class _Section(object): - - def __init__(self, formatter, parent, heading=None): - self.formatter = formatter - self.parent = parent - self.heading = heading - self.items = [] - - def format_help(self): - # format the indented section - if self.parent is not None: - self.formatter._indent() - join = self.formatter._join_parts - for func, args in self.items: - func(*args) - item_help = join([func(*args) for func, args in self.items]) - if self.parent is not None: - self.formatter._dedent() - - # return nothing if the section was empty - if not item_help: - return '' - - # add the heading if the section was non-empty - if self.heading is not SUPPRESS and self.heading is not None: - current_indent = self.formatter._current_indent - heading = '%*s%s:\n' % (current_indent, '', self.heading) - else: - heading = '' - - # join the section-initial newline, the heading and the help - return join(['\n', heading, item_help, '\n']) - - def _add_item(self, func, args): - self._current_section.items.append((func, args)) - - # ======================== - # Message building methods - # ======================== - def start_section(self, heading): - self._indent() - section = self._Section(self, self._current_section, heading) - self._add_item(section.format_help, []) - self._current_section = section - - def end_section(self): - self._current_section = self._current_section.parent - self._dedent() - - def add_text(self, text): - if text is not SUPPRESS and text is not None: - self._add_item(self._format_text, [text]) - - def add_usage(self, usage, actions, groups, prefix=None): - if usage is not SUPPRESS: - args = usage, actions, groups, prefix - self._add_item(self._format_usage, args) - - def add_argument(self, action): - if action.help is not SUPPRESS: - - # find all invocations - get_invocation = self._format_action_invocation - invocations = [get_invocation(action)] - for subaction in self._iter_indented_subactions(action): - invocations.append(get_invocation(subaction)) - - # update the maximum item length - invocation_length = max([len(s) for s in invocations]) - action_length = invocation_length + self._current_indent - self._action_max_length = max(self._action_max_length, - action_length) - - # add the item to the list - self._add_item(self._format_action, [action]) - - def add_arguments(self, actions): - for action in actions: - self.add_argument(action) - - # ======================= - # Help-formatting methods - # ======================= - def format_help(self): - help = self._root_section.format_help() - if help: - help = self._long_break_matcher.sub('\n\n', help) - help = help.strip('\n') + '\n' - return help - - def _join_parts(self, part_strings): - return ''.join([part - for part in part_strings - if part and part is not SUPPRESS]) - - def _format_usage(self, usage, actions, groups, prefix): - if prefix is None: - prefix = _('usage: ') - - # if usage is specified, use that - if usage is not None: - usage = usage % dict(prog=self._prog) - - # if no optionals or positionals are available, usage is just prog - elif usage is None and not actions: - usage = '%(prog)s' % dict(prog=self._prog) - - # if optionals and positionals are available, calculate usage - elif usage is None: - prog = '%(prog)s' % dict(prog=self._prog) - - # split optionals from positionals - optionals = [] - positionals = [] - for action in actions: - if action.option_strings: - optionals.append(action) - else: - positionals.append(action) - - # build full usage string - format = self._format_actions_usage - action_usage = format(optionals + positionals, groups) - usage = ' '.join([s for s in [prog, action_usage] if s]) - - # wrap the usage parts if it's too long - text_width = self._width - self._current_indent - if len(prefix) + len(usage) > text_width: - - # break usage into wrappable parts - part_regexp = r'\(.*?\)+|\[.*?\]+|\S+' - opt_usage = format(optionals, groups) - pos_usage = format(positionals, groups) - opt_parts = _re.findall(part_regexp, opt_usage) - pos_parts = _re.findall(part_regexp, pos_usage) - assert ' '.join(opt_parts) == opt_usage - assert ' '.join(pos_parts) == pos_usage - - # helper for wrapping lines - def get_lines(parts, indent, prefix=None): - lines = [] - line = [] - if prefix is not None: - line_len = len(prefix) - 1 - else: - line_len = len(indent) - 1 - for part in parts: - if line_len + 1 + len(part) > text_width: - lines.append(indent + ' '.join(line)) - line = [] - line_len = len(indent) - 1 - line.append(part) - line_len += len(part) + 1 - if line: - lines.append(indent + ' '.join(line)) - if prefix is not None: - lines[0] = lines[0][len(indent):] - return lines - - # if prog is short, follow it with optionals or positionals - if len(prefix) + len(prog) <= 0.75 * text_width: - indent = ' ' * (len(prefix) + len(prog) + 1) - if opt_parts: - lines = get_lines([prog] + opt_parts, indent, prefix) - lines.extend(get_lines(pos_parts, indent)) - elif pos_parts: - lines = get_lines([prog] + pos_parts, indent, prefix) - else: - lines = [prog] - - # if prog is long, put it on its own line - else: - indent = ' ' * len(prefix) - parts = opt_parts + pos_parts - lines = get_lines(parts, indent) - if len(lines) > 1: - lines = [] - lines.extend(get_lines(opt_parts, indent)) - lines.extend(get_lines(pos_parts, indent)) - lines = [prog] + lines - - # join lines into usage - usage = '\n'.join(lines) - - # prefix with 'usage:' - return '%s%s\n\n' % (prefix, usage) - - def _format_actions_usage(self, actions, groups): - # find group indices and identify actions in groups - group_actions = set() - inserts = {} - for group in groups: - try: - start = actions.index(group._group_actions[0]) - except ValueError: - continue - else: - end = start + len(group._group_actions) - if actions[start:end] == group._group_actions: - for action in group._group_actions: - group_actions.add(action) - if not group.required: - if start in inserts: - inserts[start] += ' [' - else: - inserts[start] = '[' - inserts[end] = ']' - else: - if start in inserts: - inserts[start] += ' (' - else: - inserts[start] = '(' - inserts[end] = ')' - for i in range(start + 1, end): - inserts[i] = '|' - - # collect all actions format strings - parts = [] - for i, action in enumerate(actions): - - # suppressed arguments are marked with None - # remove | separators for suppressed arguments - if action.help is SUPPRESS: - parts.append(None) - if inserts.get(i) == '|': - inserts.pop(i) - elif inserts.get(i + 1) == '|': - inserts.pop(i + 1) - - # produce all arg strings - elif not action.option_strings: - default = self._get_default_metavar_for_positional(action) - part = self._format_args(action, default) - - # if it's in a group, strip the outer [] - if action in group_actions: - if part[0] == '[' and part[-1] == ']': - part = part[1:-1] - - # add the action string to the list - parts.append(part) - - # produce the first way to invoke the option in brackets - else: - option_string = action.option_strings[0] - - # if the Optional doesn't take a value, format is: - # -s or --long - if action.nargs == 0: - part = '%s' % option_string - - # if the Optional takes a value, format is: - # -s ARGS or --long ARGS - else: - default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - part = '%s %s' % (option_string, args_string) - - # make it look optional if it's not required or in a group - if not action.required and action not in group_actions: - part = '[%s]' % part - - # add the action string to the list - parts.append(part) - - # insert things at the necessary indices - for i in sorted(inserts, reverse=True): - parts[i:i] = [inserts[i]] - - # join all the action items with spaces - text = ' '.join([item for item in parts if item is not None]) - - # clean up separators for mutually exclusive groups - open = r'[\[(]' - close = r'[\])]' - text = _re.sub(r'(%s) ' % open, r'\1', text) - text = _re.sub(r' (%s)' % close, r'\1', text) - text = _re.sub(r'%s *%s' % (open, close), r'', text) - text = _re.sub(r'\(([^|]*)\)', r'\1', text) - text = text.strip() - - # return the text - return text - - def _format_text(self, text): - if '%(prog)' in text: - text = text % dict(prog=self._prog) - text_width = self._width - self._current_indent - indent = ' ' * self._current_indent - return self._fill_text(text, text_width, indent) + '\n\n' - - def _format_action(self, action): - # determine the required width and the entry label - help_position = min(self._action_max_length + 2, - self._max_help_position) - help_width = self._width - help_position - action_width = help_position - self._current_indent - 2 - action_header = self._format_action_invocation(action) - - # ho nelp; start on same line and add a final newline - if not action.help: - tup = self._current_indent, '', action_header - action_header = '%*s%s\n' % tup - - # short action name; start on the same line and pad two spaces - elif len(action_header) <= action_width: - tup = self._current_indent, '', action_width, action_header - action_header = '%*s%-*s ' % tup - indent_first = 0 - - # long action name; start on the next line - else: - tup = self._current_indent, '', action_header - action_header = '%*s%s\n' % tup - indent_first = help_position - - # collect the pieces of the action help - parts = [action_header] - - # if there was help for the action, add lines of help text - if action.help: - help_text = self._expand_help(action) - help_lines = self._split_lines(help_text, help_width) - parts.append('%*s%s\n' % (indent_first, '', help_lines[0])) - for line in help_lines[1:]: - parts.append('%*s%s\n' % (help_position, '', line)) - - # or add a newline if the description doesn't end with one - elif not action_header.endswith('\n'): - parts.append('\n') - - # if there are any sub-actions, add their help as well - for subaction in self._iter_indented_subactions(action): - parts.append(self._format_action(subaction)) - - # return a single string - return self._join_parts(parts) - - def _format_action_invocation(self, action): - if not action.option_strings: - default = self._get_default_metavar_for_positional(action) - metavar, = self._metavar_formatter(action, default)(1) - return metavar - - else: - parts = [] - - # if the Optional doesn't take a value, format is: - # -s, --long - if action.nargs == 0: - parts.extend(action.option_strings) - - # if the Optional takes a value, format is: - # -s ARGS, --long ARGS - else: - default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - for option_string in action.option_strings: - parts.append('%s %s' % (option_string, args_string)) - - return ', '.join(parts) - - def _metavar_formatter(self, action, default_metavar): - if action.metavar is not None: - result = action.metavar - elif action.choices is not None: - choice_strs = [str(choice) for choice in action.choices] - result = '{%s}' % ','.join(choice_strs) - else: - result = default_metavar - - def format(tuple_size): - if isinstance(result, tuple): - return result - else: - return (result, ) * tuple_size - return format - - def _format_args(self, action, default_metavar): - get_metavar = self._metavar_formatter(action, default_metavar) - if action.nargs is None: - result = '%s' % get_metavar(1) - elif action.nargs == OPTIONAL: - result = '[%s]' % get_metavar(1) - elif action.nargs == ZERO_OR_MORE: - result = '[%s [%s ...]]' % get_metavar(2) - elif action.nargs == ONE_OR_MORE: - result = '%s [%s ...]' % get_metavar(2) - elif action.nargs == REMAINDER: - result = '...' - elif action.nargs == PARSER: - result = '%s ...' % get_metavar(1) - else: - formats = ['%s' for _ in range(action.nargs)] - result = ' '.join(formats) % get_metavar(action.nargs) - return result - - def _expand_help(self, action): - params = dict(vars(action), prog=self._prog) - for name in list(params): - if params[name] is SUPPRESS: - del params[name] - for name in list(params): - if hasattr(params[name], '__name__'): - params[name] = params[name].__name__ - if params.get('choices') is not None: - choices_str = ', '.join([str(c) for c in params['choices']]) - params['choices'] = choices_str - return self._get_help_string(action) % params - - def _iter_indented_subactions(self, action): - try: - get_subactions = action._get_subactions - except AttributeError: - pass - else: - self._indent() - for subaction in get_subactions(): - yield subaction - self._dedent() - - def _split_lines(self, text, width): - text = self._whitespace_matcher.sub(' ', text).strip() - return _textwrap.wrap(text, width) - - def _fill_text(self, text, width, indent): - text = self._whitespace_matcher.sub(' ', text).strip() - return _textwrap.fill(text, width, initial_indent=indent, - subsequent_indent=indent) - - def _get_help_string(self, action): - return action.help - - def _get_default_metavar_for_optional(self, action): - return action.dest.upper() - - def _get_default_metavar_for_positional(self, action): - return action.dest - - -class RawDescriptionHelpFormatter(HelpFormatter): - """Help message formatter which retains any formatting in descriptions. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _fill_text(self, text, width, indent): - return ''.join([indent + line for line in text.splitlines(True)]) - - -class RawTextHelpFormatter(RawDescriptionHelpFormatter): - """Help message formatter which retains formatting of all help text. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _split_lines(self, text, width): - return text.splitlines() - - -class ArgumentDefaultsHelpFormatter(HelpFormatter): - """Help message formatter which adds default values to argument help. - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _get_help_string(self, action): - help = action.help - if '%(default)' not in action.help: - if action.default is not SUPPRESS: - defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] - if action.option_strings or action.nargs in defaulting_nargs: - help += ' (default: %(default)s)' - return help - - -class MetavarTypeHelpFormatter(HelpFormatter): - """Help message formatter which uses the argument 'type' as the default - metavar value (instead of the argument 'dest') - - Only the name of this class is considered a public API. All the methods - provided by the class are considered an implementation detail. - """ - - def _get_default_metavar_for_optional(self, action): - return action.type.__name__ - - def _get_default_metavar_for_positional(self, action): - return action.type.__name__ - - - -# ===================== -# Options and Arguments -# ===================== - -def _get_action_name(argument): - if argument is None: - return None - elif argument.option_strings: - return '/'.join(argument.option_strings) - elif argument.metavar not in (None, SUPPRESS): - return argument.metavar - elif argument.dest not in (None, SUPPRESS): - return argument.dest - else: - return None - - -class ArgumentError(Exception): - """An error from creating or using an argument (optional or positional). - - The string value of this exception is the message, augmented with - information about the argument that caused it. - """ - - def __init__(self, argument, message): - self.argument_name = _get_action_name(argument) - self.message = message - - def __str__(self): - if self.argument_name is None: - format = '%(message)s' - else: - format = 'argument %(argument_name)s: %(message)s' - return format % dict(message=self.message, - argument_name=self.argument_name) - - -class ArgumentTypeError(Exception): - """An error from trying to convert a command line string to a type.""" - pass - - -# ============== -# Action classes -# ============== - -class Action(_AttributeHolder): - """Information about how to convert command line strings to Python objects. - - Action objects are used by an ArgumentParser to represent the information - needed to parse a single argument from one or more strings from the - command line. The keyword arguments to the Action constructor are also - all attributes of Action instances. - - Keyword Arguments: - - - option_strings -- A list of command-line option strings which - should be associated with this action. - - - dest -- The name of the attribute to hold the created object(s) - - - nargs -- The number of command-line arguments that should be - consumed. By default, one argument will be consumed and a single - value will be produced. Other values include: - - N (an integer) consumes N arguments (and produces a list) - - '?' consumes zero or one arguments - - '*' consumes zero or more arguments (and produces a list) - - '+' consumes one or more arguments (and produces a list) - Note that the difference between the default and nargs=1 is that - with the default, a single value will be produced, while with - nargs=1, a list containing a single value will be produced. - - - const -- The value to be produced if the option is specified and the - option uses an action that takes no values. - - - default -- The value to be produced if the option is not specified. - - - type -- The type which the command-line arguments should be converted - to, should be one of 'string', 'int', 'float', 'complex' or a - callable object that accepts a single string argument. If None, - 'string' is assumed. - - - choices -- A container of values that should be allowed. If not None, - after a command-line argument has been converted to the appropriate - type, an exception will be raised if it is not a member of this - collection. - - - required -- True if the action must always be specified at the - command line. This is only meaningful for optional command-line - arguments. - - - help -- The help string describing the argument. - - - metavar -- The name to be used for the option's argument with the - help string. If None, the 'dest' value will be used as the name. - """ - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - self.option_strings = option_strings - self.dest = dest - self.nargs = nargs - self.const = const - self.default = default - self.type = type - self.choices = choices - self.required = required - self.help = help - self.metavar = metavar - - def _get_kwargs(self): - names = [ - 'option_strings', - 'dest', - 'nargs', - 'const', - 'default', - 'type', - 'choices', - 'help', - 'metavar', - ] - return [(name, getattr(self, name)) for name in names] - - def __call__(self, parser, namespace, values, option_string=None): - raise NotImplementedError(_('.__call__() not defined')) - - -class _StoreAction(Action): - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - if nargs == 0: - raise ValueError('nargs for store actions must be > 0; if you ' - 'have nothing to store, actions such as store ' - 'true or store const may be more appropriate') - if const is not None and nargs != OPTIONAL: - raise ValueError('nargs must be %r to supply const' % OPTIONAL) - super(_StoreAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=nargs, - const=const, - default=default, - type=type, - choices=choices, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, values) - - -class _StoreConstAction(Action): - - def __init__(self, - option_strings, - dest, - const, - default=None, - required=False, - help=None, - metavar=None): - super(_StoreConstAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - const=const, - default=default, - required=required, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, self.const) - - -class _StoreTrueAction(_StoreConstAction): - - def __init__(self, - option_strings, - dest, - default=False, - required=False, - help=None): - super(_StoreTrueAction, self).__init__( - option_strings=option_strings, - dest=dest, - const=True, - default=default, - required=required, - help=help) - - -class _StoreFalseAction(_StoreConstAction): - - def __init__(self, - option_strings, - dest, - default=True, - required=False, - help=None): - super(_StoreFalseAction, self).__init__( - option_strings=option_strings, - dest=dest, - const=False, - default=default, - required=required, - help=help) - - -class _AppendAction(Action): - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): - if nargs == 0: - raise ValueError('nargs for append actions must be > 0; if arg ' - 'strings are not supplying the value to append, ' - 'the append const action may be more appropriate') - if const is not None and nargs != OPTIONAL: - raise ValueError('nargs must be %r to supply const' % OPTIONAL) - super(_AppendAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=nargs, - const=const, - default=default, - type=type, - choices=choices, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - items = _copy.copy(_ensure_value(namespace, self.dest, [])) - items.append(values) - setattr(namespace, self.dest, items) - - -class _AppendConstAction(Action): - - def __init__(self, - option_strings, - dest, - const, - default=None, - required=False, - help=None, - metavar=None): - super(_AppendConstAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - const=const, - default=default, - required=required, - help=help, - metavar=metavar) - - def __call__(self, parser, namespace, values, option_string=None): - items = _copy.copy(_ensure_value(namespace, self.dest, [])) - items.append(self.const) - setattr(namespace, self.dest, items) - - -class _CountAction(Action): - - def __init__(self, - option_strings, - dest, - default=None, - required=False, - help=None): - super(_CountAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - default=default, - required=required, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - new_count = _ensure_value(namespace, self.dest, 0) + 1 - setattr(namespace, self.dest, new_count) - - -class _HelpAction(Action): - - def __init__(self, - option_strings, - dest=SUPPRESS, - default=SUPPRESS, - help=None): - super(_HelpAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help) - - def __call__(self, parser, namespace, values, option_string=None): - parser.print_help() - parser.exit() - - -class _VersionAction(Action): - - def __init__(self, - option_strings, - version=None, - dest=SUPPRESS, - default=SUPPRESS, - help="show program's version number and exit"): - super(_VersionAction, self).__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help) - self.version = version - - def __call__(self, parser, namespace, values, option_string=None): - version = self.version - if version is None: - version = parser.version - formatter = parser._get_formatter() - formatter.add_text(version) - parser.exit(message=formatter.format_help()) - - -class _SubParsersAction(Action): - - class _ChoicesPseudoAction(Action): - - def __init__(self, name, aliases, help): - metavar = dest = name - if aliases: - metavar += ' (%s)' % ', '.join(aliases) - sup = super(_SubParsersAction._ChoicesPseudoAction, self) - sup.__init__(option_strings=[], dest=dest, help=help, - metavar=metavar) - - def __init__(self, - option_strings, - prog, - parser_class, - dest=SUPPRESS, - help=None, - metavar=None): - - self._prog_prefix = prog - self._parser_class = parser_class - self._name_parser_map = _collections.OrderedDict() - self._choices_actions = [] - - super(_SubParsersAction, self).__init__( - option_strings=option_strings, - dest=dest, - nargs=PARSER, - choices=self._name_parser_map, - help=help, - metavar=metavar) - - def add_parser(self, name, **kwargs): - # set prog from the existing prefix - if kwargs.get('prog') is None: - kwargs['prog'] = '%s %s' % (self._prog_prefix, name) - - aliases = kwargs.pop('aliases', ()) - - # create a pseudo-action to hold the choice help - if 'help' in kwargs: - help = kwargs.pop('help') - choice_action = self._ChoicesPseudoAction(name, aliases, help) - self._choices_actions.append(choice_action) - - # create the parser and add it to the map - parser = self._parser_class(**kwargs) - self._name_parser_map[name] = parser - - # make parser available under aliases also - for alias in aliases: - self._name_parser_map[alias] = parser - - return parser - - def _get_subactions(self): - return self._choices_actions - - def __call__(self, parser, namespace, values, option_string=None): - parser_name = values[0] - arg_strings = values[1:] - - # set the parser name if requested - if self.dest is not SUPPRESS: - setattr(namespace, self.dest, parser_name) - - # select the parser - try: - parser = self._name_parser_map[parser_name] - except KeyError: - args = {'parser_name': parser_name, - 'choices': ', '.join(self._name_parser_map)} - msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args - raise ArgumentError(self, msg) - - # parse all the remaining options into the namespace - # store any unrecognized options on the object, so that the top - # level parser can decide what to do with them - namespace, arg_strings = parser.parse_known_args(arg_strings, namespace) - if arg_strings: - vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) - getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) - - -# ============== -# Type classes -# ============== - -class FileType(object): - """Factory for creating file object types - - Instances of FileType are typically passed as type= arguments to the - ArgumentParser add_argument() method. - - Keyword Arguments: - - mode -- A string indicating how the file is to be opened. Accepts the - same values as the builtin open() function. - - bufsize -- The file's desired buffer size. Accepts the same values as - the builtin open() function. - """ - - def __init__(self, mode='r', bufsize=-1): - self._mode = mode - self._bufsize = bufsize - - def __call__(self, string): - # the special argument "-" means sys.std{in,out} - if string == '-': - if 'r' in self._mode: - return _sys.stdin - elif 'w' in self._mode: - return _sys.stdout - else: - msg = _('argument "-" with mode %r') % self._mode - raise ValueError(msg) - - # all other arguments are used as file names - try: - return open(string, self._mode, self._bufsize) - except IOError as e: - message = _("can't open '%s': %s") - raise ArgumentTypeError(message % (string, e)) - - def __repr__(self): - args = self._mode, self._bufsize - args_str = ', '.join(repr(arg) for arg in args if arg != -1) - return '%s(%s)' % (type(self).__name__, args_str) - -# =========================== -# Optional and Positional Parsing -# =========================== - -class Namespace(_AttributeHolder): - """Simple object for storing attributes. - - Implements equality by attribute names and values, and provides a simple - string representation. - """ - - def __init__(self, **kwargs): - for name in kwargs: - setattr(self, name, kwargs[name]) - - def __eq__(self, other): - return vars(self) == vars(other) - - def __ne__(self, other): - return not (self == other) - - def __contains__(self, key): - return key in self.__dict__ - - -class _ActionsContainer(object): - - def __init__(self, - description, - prefix_chars, - argument_default, - conflict_handler): - super(_ActionsContainer, self).__init__() - - self.description = description - self.argument_default = argument_default - self.prefix_chars = prefix_chars - self.conflict_handler = conflict_handler - - # set up registries - self._registries = {} - - # register actions - self.register('action', None, _StoreAction) - self.register('action', 'store', _StoreAction) - self.register('action', 'store_const', _StoreConstAction) - self.register('action', 'store_true', _StoreTrueAction) - self.register('action', 'store_false', _StoreFalseAction) - self.register('action', 'append', _AppendAction) - self.register('action', 'append_const', _AppendConstAction) - self.register('action', 'count', _CountAction) - self.register('action', 'help', _HelpAction) - self.register('action', 'version', _VersionAction) - self.register('action', 'parsers', _SubParsersAction) - - # raise an exception if the conflict handler is invalid - self._get_handler() - - # action storage - self._actions = [] - self._option_string_actions = {} - - # groups - self._action_groups = [] - self._mutually_exclusive_groups = [] - - # defaults storage - self._defaults = {} - - # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') - - # whether or not there are any optionals that look like negative - # numbers -- uses a list so it can be shared and edited - self._has_negative_number_optionals = [] - - # ==================== - # Registration methods - # ==================== - def register(self, registry_name, value, object): - registry = self._registries.setdefault(registry_name, {}) - registry[value] = object - - def _registry_get(self, registry_name, value, default=None): - return self._registries[registry_name].get(value, default) - - # ================================== - # Namespace default accessor methods - # ================================== - def set_defaults(self, **kwargs): - self._defaults.update(kwargs) - - # if these defaults match any existing arguments, replace - # the previous default on the object with the new one - for action in self._actions: - if action.dest in kwargs: - action.default = kwargs[action.dest] - - def get_default(self, dest): - for action in self._actions: - if action.dest == dest and action.default is not None: - return action.default - return self._defaults.get(dest, None) - - - # ======================= - # Adding argument actions - # ======================= - def add_argument(self, *args, **kwargs): - """ - add_argument(dest, ..., name=value, ...) - add_argument(option_string, option_string, ..., name=value, ...) - """ - - # if no positional args are supplied or only one is supplied and - # it doesn't look like an option string, parse a positional - # argument - chars = self.prefix_chars - if not args or len(args) == 1 and args[0][0] not in chars: - if args and 'dest' in kwargs: - raise ValueError('dest supplied twice for positional argument') - kwargs = self._get_positional_kwargs(*args, **kwargs) - - # otherwise, we're adding an optional argument - else: - kwargs = self._get_optional_kwargs(*args, **kwargs) - - # if no default was supplied, use the parser-level default - if 'default' not in kwargs: - dest = kwargs['dest'] - if dest in self._defaults: - kwargs['default'] = self._defaults[dest] - elif self.argument_default is not None: - kwargs['default'] = self.argument_default - - # create the action object, and add it to the parser - action_class = self._pop_action_class(kwargs) - if not _callable(action_class): - raise ValueError('unknown action "%s"' % (action_class,)) - action = action_class(**kwargs) - - # raise an error if the action type is not callable - type_func = self._registry_get('type', action.type, action.type) - if not _callable(type_func): - raise ValueError('%r is not callable' % (type_func,)) - - # raise an error if the metavar does not match the type - if hasattr(self, "_get_formatter"): - try: - self._get_formatter()._format_args(action, None) - except TypeError: - raise ValueError("length of metavar tuple does not match nargs") - - return self._add_action(action) - - def add_argument_group(self, *args, **kwargs): - group = _ArgumentGroup(self, *args, **kwargs) - self._action_groups.append(group) - return group - - def add_mutually_exclusive_group(self, **kwargs): - group = _MutuallyExclusiveGroup(self, **kwargs) - self._mutually_exclusive_groups.append(group) - return group - - def _add_action(self, action): - # resolve any conflicts - self._check_conflict(action) - - # add to actions list - self._actions.append(action) - action.container = self - - # index the action by any option strings it has - for option_string in action.option_strings: - self._option_string_actions[option_string] = action - - # set the flag if any option strings look like negative numbers - for option_string in action.option_strings: - if self._negative_number_matcher.match(option_string): - if not self._has_negative_number_optionals: - self._has_negative_number_optionals.append(True) - - # return the created action - return action - - def _remove_action(self, action): - self._actions.remove(action) - - def _add_container_actions(self, container): - # collect groups by titles - title_group_map = {} - for group in self._action_groups: - if group.title in title_group_map: - msg = _('cannot merge actions - two groups are named %r') - raise ValueError(msg % (group.title)) - title_group_map[group.title] = group - - # map each action to its group - group_map = {} - for group in container._action_groups: - - # if a group with the title exists, use that, otherwise - # create a new group matching the container's group - if group.title not in title_group_map: - title_group_map[group.title] = self.add_argument_group( - title=group.title, - description=group.description, - conflict_handler=group.conflict_handler) - - # map the actions to their new group - for action in group._group_actions: - group_map[action] = title_group_map[group.title] - - # add container's mutually exclusive groups - # NOTE: if add_mutually_exclusive_group ever gains title= and - # description= then this code will need to be expanded as above - for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( - required=group.required) - - # map the actions to their new mutex group - for action in group._group_actions: - group_map[action] = mutex_group - - # add all actions to this container or their group - for action in container._actions: - group_map.get(action, self)._add_action(action) - - def _get_positional_kwargs(self, dest, **kwargs): - # make sure required is not specified - if 'required' in kwargs: - msg = _("'required' is an invalid argument for positionals") - raise TypeError(msg) - - # mark positional arguments as required if at least one is - # always required - if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: - kwargs['required'] = True - if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: - kwargs['required'] = True - - # return the keyword arguments with no option strings - return dict(kwargs, dest=dest, option_strings=[]) - - def _get_optional_kwargs(self, *args, **kwargs): - # determine short and long option strings - option_strings = [] - long_option_strings = [] - for option_string in args: - # error on strings that don't start with an appropriate prefix - if not option_string[0] in self.prefix_chars: - args = {'option': option_string, - 'prefix_chars': self.prefix_chars} - msg = _('invalid option string %(option)r: ' - 'must start with a character %(prefix_chars)r') - raise ValueError(msg % args) - - # strings starting with two prefix characters are long options - option_strings.append(option_string) - if option_string[0] in self.prefix_chars: - if len(option_string) > 1: - if option_string[1] in self.prefix_chars: - long_option_strings.append(option_string) - - # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' - dest = kwargs.pop('dest', None) - if dest is None: - if long_option_strings: - dest_option_string = long_option_strings[0] - else: - dest_option_string = option_strings[0] - dest = dest_option_string.lstrip(self.prefix_chars) - if not dest: - msg = _('dest= is required for options like %r') - raise ValueError(msg % option_string) - dest = dest.replace('-', '_') - - # return the updated keyword arguments - return dict(kwargs, dest=dest, option_strings=option_strings) - - def _pop_action_class(self, kwargs, default=None): - action = kwargs.pop('action', default) - return self._registry_get('action', action, action) - - def _get_handler(self): - # determine function from conflict handler string - handler_func_name = '_handle_conflict_%s' % self.conflict_handler - try: - return getattr(self, handler_func_name) - except AttributeError: - msg = _('invalid conflict_resolution value: %r') - raise ValueError(msg % self.conflict_handler) - - def _check_conflict(self, action): - - # find all options that conflict with this option - confl_optionals = [] - for option_string in action.option_strings: - if option_string in self._option_string_actions: - confl_optional = self._option_string_actions[option_string] - confl_optionals.append((option_string, confl_optional)) - - # resolve any conflicts - if confl_optionals: - conflict_handler = self._get_handler() - conflict_handler(action, confl_optionals) - - def _handle_conflict_error(self, action, conflicting_actions): - message = ngettext('conflicting option string: %s', - 'conflicting option strings: %s', - len(conflicting_actions)) - conflict_string = ', '.join([option_string - for option_string, action - in conflicting_actions]) - raise ArgumentError(action, message % conflict_string) - - def _handle_conflict_resolve(self, action, conflicting_actions): - - # remove all conflicting options - for option_string, action in conflicting_actions: - - # remove the conflicting option - action.option_strings.remove(option_string) - self._option_string_actions.pop(option_string, None) - - # if the option now has no option string, remove it from the - # container holding it - if not action.option_strings: - action.container._remove_action(action) - - -class _ArgumentGroup(_ActionsContainer): - - def __init__(self, container, title=None, description=None, **kwargs): - # add any missing keyword arguments by checking the container - update = kwargs.setdefault - update('conflict_handler', container.conflict_handler) - update('prefix_chars', container.prefix_chars) - update('argument_default', container.argument_default) - super_init = super(_ArgumentGroup, self).__init__ - super_init(description=description, **kwargs) - - # group attributes - self.title = title - self._group_actions = [] - - # share most attributes with the container - self._registries = container._registries - self._actions = container._actions - self._option_string_actions = container._option_string_actions - self._defaults = container._defaults - self._has_negative_number_optionals = \ - container._has_negative_number_optionals - self._mutually_exclusive_groups = container._mutually_exclusive_groups - - def _add_action(self, action): - action = super(_ArgumentGroup, self)._add_action(action) - self._group_actions.append(action) - return action - - def _remove_action(self, action): - super(_ArgumentGroup, self)._remove_action(action) - self._group_actions.remove(action) - - -class _MutuallyExclusiveGroup(_ArgumentGroup): - - def __init__(self, container, required=False): - super(_MutuallyExclusiveGroup, self).__init__(container) - self.required = required - self._container = container - - def _add_action(self, action): - if action.required: - msg = _('mutually exclusive arguments must be optional') - raise ValueError(msg) - action = self._container._add_action(action) - self._group_actions.append(action) - return action - - def _remove_action(self, action): - self._container._remove_action(action) - self._group_actions.remove(action) - - -class ArgumentParser(_AttributeHolder, _ActionsContainer): - """Object for parsing command line strings into Python objects. - - Keyword Arguments: - - prog -- The name of the program (default: sys.argv[0]) - - usage -- A usage message (default: auto-generated from arguments) - - description -- A description of what the program does - - epilog -- Text following the argument descriptions - - parents -- Parsers whose arguments should be copied into this one - - formatter_class -- HelpFormatter class for printing help messages - - prefix_chars -- Characters that prefix optional arguments - - fromfile_prefix_chars -- Characters that prefix files containing - additional arguments - - argument_default -- The default value for all arguments - - conflict_handler -- String indicating how to handle conflicts - - add_help -- Add a -h/-help option - """ - - def __init__(self, - prog=None, - usage=None, - description=None, - epilog=None, - version=None, - parents=[], - formatter_class=HelpFormatter, - prefix_chars='-', - fromfile_prefix_chars=None, - argument_default=None, - conflict_handler='error', - add_help=True): - - if version is not None: - import warnings - warnings.warn( - """The "version" argument to ArgumentParser is deprecated. """ - """Please use """ - """"add_argument(..., action='version', version="N", ...)" """ - """instead""", DeprecationWarning) - - superinit = super(ArgumentParser, self).__init__ - superinit(description=description, - prefix_chars=prefix_chars, - argument_default=argument_default, - conflict_handler=conflict_handler) - - # default setting for prog - if prog is None: - prog = _os.path.basename(_sys.argv[0]) - - self.prog = prog - self.usage = usage - self.epilog = epilog - self.version = version - self.formatter_class = formatter_class - self.fromfile_prefix_chars = fromfile_prefix_chars - self.add_help = add_help - - add_group = self.add_argument_group - self._positionals = add_group(_('positional arguments')) - self._optionals = add_group(_('optional arguments')) - self._subparsers = None - - # register types - def identity(string): - return string - self.register('type', None, identity) - - # add help and version arguments if necessary - # (using explicit default to override global argument_default) - default_prefix = '-' if '-' in prefix_chars else prefix_chars[0] - if self.add_help: - self.add_argument( - default_prefix+'h', default_prefix*2+'help', - action='help', default=SUPPRESS, - help=_('show this help message and exit')) - if self.version: - self.add_argument( - default_prefix+'v', default_prefix*2+'version', - action='version', default=SUPPRESS, - version=self.version, - help=_("show program's version number and exit")) - - # add parent arguments and defaults - for parent in parents: - self._add_container_actions(parent) - try: - defaults = parent._defaults - except AttributeError: - pass - else: - self._defaults.update(defaults) - - # ======================= - # Pretty __repr__ methods - # ======================= - def _get_kwargs(self): - names = [ - 'prog', - 'usage', - 'description', - 'version', - 'formatter_class', - 'conflict_handler', - 'add_help', - ] - return [(name, getattr(self, name)) for name in names] - - # ================================== - # Optional/Positional adding methods - # ================================== - def add_subparsers(self, **kwargs): - if self._subparsers is not None: - self.error(_('cannot have multiple subparser arguments')) - - # add the parser class to the arguments if it's not present - kwargs.setdefault('parser_class', type(self)) - - if 'title' in kwargs or 'description' in kwargs: - title = _(kwargs.pop('title', 'subcommands')) - description = _(kwargs.pop('description', None)) - self._subparsers = self.add_argument_group(title, description) - else: - self._subparsers = self._positionals - - # prog defaults to the usage message of this parser, skipping - # optional arguments and with no "usage:" prefix - if kwargs.get('prog') is None: - formatter = self._get_formatter() - positionals = self._get_positional_actions() - groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') - kwargs['prog'] = formatter.format_help().strip() - - # create the parsers action and add it to the positionals list - parsers_class = self._pop_action_class(kwargs, 'parsers') - action = parsers_class(option_strings=[], **kwargs) - self._subparsers._add_action(action) - - # return the created parsers action - return action - - def _add_action(self, action): - if action.option_strings: - self._optionals._add_action(action) - else: - self._positionals._add_action(action) - return action - - def _get_optional_actions(self): - return [action - for action in self._actions - if action.option_strings] - - def _get_positional_actions(self): - return [action - for action in self._actions - if not action.option_strings] - - # ===================================== - # Command line argument parsing methods - # ===================================== - def parse_args(self, args=None, namespace=None): - args, argv = self.parse_known_args(args, namespace) - if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) - return args - - def parse_known_args(self, args=None, namespace=None): - # args default to the system args - if args is None: - args = _sys.argv[1:] - - # default Namespace built from parser defaults - if namespace is None: - namespace = Namespace() - - # add any action defaults that aren't present - for action in self._actions: - if action.dest is not SUPPRESS: - if not hasattr(namespace, action.dest): - if action.default is not SUPPRESS: - default = action.default - if isinstance(action.default, str): - default = self._get_value(action, default) - setattr(namespace, action.dest, default) - - # add any parser defaults that aren't present - for dest in self._defaults: - if not hasattr(namespace, dest): - setattr(namespace, dest, self._defaults[dest]) - - # parse the arguments and exit if there are any errors - try: - namespace, args = self._parse_known_args(args, namespace) - if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): - args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) - delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) - return namespace, args - except ArgumentError: - err = _sys.exc_info()[1] - self.error(str(err)) - - def _parse_known_args(self, arg_strings, namespace): - # replace arg strings that are file references - if self.fromfile_prefix_chars is not None: - arg_strings = self._read_args_from_files(arg_strings) - - # map all mutually exclusive arguments to the other arguments - # they can't occur with - action_conflicts = {} - for mutex_group in self._mutually_exclusive_groups: - group_actions = mutex_group._group_actions - for i, mutex_action in enumerate(mutex_group._group_actions): - conflicts = action_conflicts.setdefault(mutex_action, []) - conflicts.extend(group_actions[:i]) - conflicts.extend(group_actions[i + 1:]) - - # find all option indices, and determine the arg_string_pattern - # which has an 'O' if there is an option at an index, - # an 'A' if there is an argument, or a '-' if there is a '--' - option_string_indices = {} - arg_string_pattern_parts = [] - arg_strings_iter = iter(arg_strings) - for i, arg_string in enumerate(arg_strings_iter): - - # all args after -- are non-options - if arg_string == '--': - arg_string_pattern_parts.append('-') - for arg_string in arg_strings_iter: - arg_string_pattern_parts.append('A') - - # otherwise, add the arg to the arg strings - # and note the index if it was an option - else: - option_tuple = self._parse_optional(arg_string) - if option_tuple is None: - pattern = 'A' - else: - option_string_indices[i] = option_tuple - pattern = 'O' - arg_string_pattern_parts.append(pattern) - - # join the pieces together to form the pattern - arg_strings_pattern = ''.join(arg_string_pattern_parts) - - # converts arg strings to the appropriate and then takes the action - seen_actions = set() - seen_non_default_actions = set() - - def take_action(action, argument_strings, option_string=None): - seen_actions.add(action) - argument_values = self._get_values(action, argument_strings) - - # error if this argument is not allowed with other previously - # seen arguments, assuming that actions that use the default - # value don't really count as "present" - if argument_values is not action.default: - seen_non_default_actions.add(action) - for conflict_action in action_conflicts.get(action, []): - if conflict_action in seen_non_default_actions: - msg = _('not allowed with argument %s') - action_name = _get_action_name(conflict_action) - raise ArgumentError(action, msg % action_name) - - # take the action if we didn't receive a SUPPRESS value - # (e.g. from a default) - if argument_values is not SUPPRESS: - action(self, namespace, argument_values, option_string) - - # function to convert arg_strings into an optional action - def consume_optional(start_index): - - # get the optional identified at this index - option_tuple = option_string_indices[start_index] - action, option_string, explicit_arg = option_tuple - - # identify additional optionals in the same arg string - # (e.g. -xyz is the same as -x -y -z if no args are required) - match_argument = self._match_argument - action_tuples = [] - while True: - - # if we found no optional action, skip it - if action is None: - extras.append(arg_strings[start_index]) - return start_index + 1 - - # if there is an explicit argument, try to match the - # optional's string arguments to only this - if explicit_arg is not None: - arg_count = match_argument(action, 'A') - - # if the action is a single-dash option and takes no - # arguments, try to parse more single-dash options out - # of the tail of the option string - chars = self.prefix_chars - if arg_count == 0 and option_string[1] not in chars: - action_tuples.append((action, [], option_string)) - char = option_string[0] - option_string = char + explicit_arg[0] - new_explicit_arg = explicit_arg[1:] or None - optionals_map = self._option_string_actions - if option_string in optionals_map: - action = optionals_map[option_string] - explicit_arg = new_explicit_arg - else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - - # if the action expect exactly one argument, we've - # successfully matched the option; exit the loop - elif arg_count == 1: - stop = start_index + 1 - args = [explicit_arg] - action_tuples.append((action, args, option_string)) - break - - # error if a double-dash option did not use the - # explicit argument - else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - - # if there is no explicit argument, try to match the - # optional's string arguments with the following strings - # if successful, exit the loop - else: - start = start_index + 1 - selected_patterns = arg_strings_pattern[start:] - arg_count = match_argument(action, selected_patterns) - stop = start + arg_count - args = arg_strings[start:stop] - action_tuples.append((action, args, option_string)) - break - - # add the Optional to the list and return the index at which - # the Optional's string args stopped - assert action_tuples - for action, args, option_string in action_tuples: - take_action(action, args, option_string) - return stop - - # the list of Positionals left to be parsed; this is modified - # by consume_positionals() - positionals = self._get_positional_actions() - - # function to convert arg_strings into positional actions - def consume_positionals(start_index): - # match as many Positionals as possible - match_partial = self._match_arguments_partial - selected_pattern = arg_strings_pattern[start_index:] - arg_counts = match_partial(positionals, selected_pattern) - - # slice off the appropriate arg strings for each Positional - # and add the Positional and its args to the list - for action, arg_count in zip(positionals, arg_counts): - args = arg_strings[start_index: start_index + arg_count] - start_index += arg_count - take_action(action, args) - - # slice off the Positionals that we just parsed and return the - # index at which the Positionals' string args stopped - positionals[:] = positionals[len(arg_counts):] - return start_index - - # consume Positionals and Optionals alternately, until we have - # passed the last option string - extras = [] - start_index = 0 - if option_string_indices: - max_option_string_index = max(option_string_indices) - else: - max_option_string_index = -1 - while start_index <= max_option_string_index: - - # consume any Positionals preceding the next option - next_option_string_index = min([ - index - for index in option_string_indices - if index >= start_index]) - if start_index != next_option_string_index: - positionals_end_index = consume_positionals(start_index) - - # only try to parse the next optional if we didn't consume - # the option string during the positionals parsing - if positionals_end_index > start_index: - start_index = positionals_end_index - continue - else: - start_index = positionals_end_index - - # if we consumed all the positionals we could and we're not - # at the index of an option string, there were extra arguments - if start_index not in option_string_indices: - strings = arg_strings[start_index:next_option_string_index] - extras.extend(strings) - start_index = next_option_string_index - - # consume the next optional and any arguments for it - start_index = consume_optional(start_index) - - # consume any positionals following the last Optional - stop_index = consume_positionals(start_index) - - # if we didn't consume all the argument strings, there were extras - extras.extend(arg_strings[stop_index:]) - - # make sure all required actions were present - required_actions = [_get_action_name(action) for action in self._actions - if action.required and action not in seen_actions] - if required_actions: - self.error(_('the following arguments are required: %s') % - ', '.join(required_actions)) - - # make sure all required groups had one option present - for group in self._mutually_exclusive_groups: - if group.required: - for action in group._group_actions: - if action in seen_non_default_actions: - break - - # if no actions were used, report the error - else: - names = [_get_action_name(action) - for action in group._group_actions - if action.help is not SUPPRESS] - msg = _('one of the arguments %s is required') - self.error(msg % ' '.join(names)) - - # return the updated namespace and the extra arguments - return namespace, extras - - def _read_args_from_files(self, arg_strings): - # expand arguments referencing files - new_arg_strings = [] - for arg_string in arg_strings: - - # for regular arguments, just add them back into the list - if arg_string[0] not in self.fromfile_prefix_chars: - new_arg_strings.append(arg_string) - - # replace arguments referencing files with the file content - else: - try: - args_file = open(arg_string[1:]) - try: - arg_strings = [] - for arg_line in args_file.read().splitlines(): - for arg in self.convert_arg_line_to_args(arg_line): - arg_strings.append(arg) - arg_strings = self._read_args_from_files(arg_strings) - new_arg_strings.extend(arg_strings) - finally: - args_file.close() - except IOError: - err = _sys.exc_info()[1] - self.error(str(err)) - - # return the modified argument list - return new_arg_strings - - def convert_arg_line_to_args(self, arg_line): - return [arg_line] - - def _match_argument(self, action, arg_strings_pattern): - # match the pattern for this action to the arg strings - nargs_pattern = self._get_nargs_pattern(action) - match = _re.match(nargs_pattern, arg_strings_pattern) - - # raise an exception if we weren't able to find a match - if match is None: - nargs_errors = { - None: _('expected one argument'), - OPTIONAL: _('expected at most one argument'), - ONE_OR_MORE: _('expected at least one argument'), - } - default = ngettext('expected %s argument', - 'expected %s arguments', - action.nargs) % action.nargs - msg = nargs_errors.get(action.nargs, default) - raise ArgumentError(action, msg) - - # return the number of arguments matched - return len(match.group(1)) - - def _match_arguments_partial(self, actions, arg_strings_pattern): - # progressively shorten the actions list by slicing off the - # final actions until we find a match - result = [] - for i in range(len(actions), 0, -1): - actions_slice = actions[:i] - pattern = ''.join([self._get_nargs_pattern(action) - for action in actions_slice]) - match = _re.match(pattern, arg_strings_pattern) - if match is not None: - result.extend([len(string) for string in match.groups()]) - break - - # return the list of arg string counts - return result - - def _parse_optional(self, arg_string): - # if it's an empty string, it was meant to be a positional - if not arg_string: - return None - - # if it doesn't start with a prefix, it was meant to be positional - if not arg_string[0] in self.prefix_chars: - return None - - # if the option string is present in the parser, return the action - if arg_string in self._option_string_actions: - action = self._option_string_actions[arg_string] - return action, arg_string, None - - # if it's just a single character, it was meant to be positional - if len(arg_string) == 1: - return None - - # if the option string before the "=" is present, return the action - if '=' in arg_string: - option_string, explicit_arg = arg_string.split('=', 1) - if option_string in self._option_string_actions: - action = self._option_string_actions[option_string] - return action, option_string, explicit_arg - - # search through all possible prefixes of the option string - # and all actions in the parser for possible interpretations - option_tuples = self._get_option_tuples(arg_string) - - # if multiple actions match, the option string was ambiguous - if len(option_tuples) > 1: - options = ', '.join([option_string - for action, option_string, explicit_arg in option_tuples]) - args = {'option': arg_string, 'matches': options} - msg = _('ambiguous option: %(option)s could match %(matches)s') - self.error(msg % args) - - # if exactly one action matched, this segmentation is good, - # so return the parsed action - elif len(option_tuples) == 1: - option_tuple, = option_tuples - return option_tuple - - # if it was not found as an option, but it looks like a negative - # number, it was meant to be positional - # unless there are negative-number-like options - if self._negative_number_matcher.match(arg_string): - if not self._has_negative_number_optionals: - return None - - # if it contains a space, it was meant to be a positional - if ' ' in arg_string: - return None - - # it was meant to be an optional but there is no such option - # in this parser (though it might be a valid option in a subparser) - return None, arg_string, None - - def _get_option_tuples(self, option_string): - result = [] - - # option strings starting with two prefix characters are only - # split at the '=' - chars = self.prefix_chars - if option_string[0] in chars and option_string[1] in chars: - if '=' in option_string: - option_prefix, explicit_arg = option_string.split('=', 1) - else: - option_prefix = option_string - explicit_arg = None - for option_string in self._option_string_actions: - if option_string.startswith(option_prefix): - action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg - result.append(tup) - - # single character options can be concatenated with their arguments - # but multiple character options always have to have their argument - # separate - elif option_string[0] in chars and option_string[1] not in chars: - option_prefix = option_string - explicit_arg = None - short_option_prefix = option_string[:2] - short_explicit_arg = option_string[2:] - - for option_string in self._option_string_actions: - if option_string == short_option_prefix: - action = self._option_string_actions[option_string] - tup = action, option_string, short_explicit_arg - result.append(tup) - elif option_string.startswith(option_prefix): - action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg - result.append(tup) - - # shouldn't ever get here - else: - self.error(_('unexpected option string: %s') % option_string) - - # return the collected option tuples - return result - - def _get_nargs_pattern(self, action): - # in all examples below, we have to allow for '--' args - # which are represented as '-' in the pattern - nargs = action.nargs - - # the default (None) is assumed to be a single argument - if nargs is None: - nargs_pattern = '(-*A-*)' - - # allow zero or one arguments - elif nargs == OPTIONAL: - nargs_pattern = '(-*A?-*)' - - # allow zero or more arguments - elif nargs == ZERO_OR_MORE: - nargs_pattern = '(-*[A-]*)' - - # allow one or more arguments - elif nargs == ONE_OR_MORE: - nargs_pattern = '(-*A[A-]*)' - - # allow any number of options or arguments - elif nargs == REMAINDER: - nargs_pattern = '([-AO]*)' - - # allow one argument followed by any number of options or arguments - elif nargs == PARSER: - nargs_pattern = '(-*A[-AO]*)' - - # all others should be integers - else: - nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) - - # if this is an optional action, -- is not allowed - if action.option_strings: - nargs_pattern = nargs_pattern.replace('-*', '') - nargs_pattern = nargs_pattern.replace('-', '') - - # return the pattern - return nargs_pattern - - # ======================== - # Value conversion methods - # ======================== - def _get_values(self, action, arg_strings): - # for everything but PARSER args, strip out '--' - if action.nargs not in [PARSER, REMAINDER]: - arg_strings = [s for s in arg_strings if s != '--'] - - # optional argument produces a default when not present - if not arg_strings and action.nargs == OPTIONAL: - if action.option_strings: - value = action.const - else: - value = action.default - if isinstance(value, str): - value = self._get_value(action, value) - self._check_value(action, value) - - # when nargs='*' on a positional, if there were no command-line - # args, use the default if it is anything other than None - elif (not arg_strings and action.nargs == ZERO_OR_MORE and - not action.option_strings): - if action.default is not None: - value = action.default - else: - value = arg_strings - self._check_value(action, value) - - # single argument or optional argument produces a single value - elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: - arg_string, = arg_strings - value = self._get_value(action, arg_string) - self._check_value(action, value) - - # REMAINDER arguments convert all values, checking none - elif action.nargs == REMAINDER: - value = [self._get_value(action, v) for v in arg_strings] - - # PARSER arguments convert all values, but check only the first - elif action.nargs == PARSER: - value = [self._get_value(action, v) for v in arg_strings] - self._check_value(action, value[0]) - - # all other types of nargs produce a list - else: - value = [self._get_value(action, v) for v in arg_strings] - for v in value: - self._check_value(action, v) - - # return the converted value - return value - - def _get_value(self, action, arg_string): - type_func = self._registry_get('type', action.type, action.type) - if not _callable(type_func): - msg = _('%r is not callable') - raise ArgumentError(action, msg % type_func) - - # convert the value to the appropriate type - try: - result = type_func(arg_string) - - # ArgumentTypeErrors indicate errors - except ArgumentTypeError: - name = getattr(action.type, '__name__', repr(action.type)) - msg = str(_sys.exc_info()[1]) - raise ArgumentError(action, msg) - - # TypeErrors or ValueErrors also indicate errors - except (TypeError, ValueError): - name = getattr(action.type, '__name__', repr(action.type)) - args = {'type': name, 'value': arg_string} - msg = _('invalid %(type)s value: %(value)r') - raise ArgumentError(action, msg % args) - - # return the converted value - return result - - def _check_value(self, action, value): - # converted value must be one of the choices (if specified) - if action.choices is not None and value not in action.choices: - args = {'value': value, - 'choices': ', '.join(map(repr, action.choices))} - msg = _('invalid choice: %(value)r (choose from %(choices)s)') - raise ArgumentError(action, msg % args) - - # ======================= - # Help-formatting methods - # ======================= - def format_usage(self): - formatter = self._get_formatter() - formatter.add_usage(self.usage, self._actions, - self._mutually_exclusive_groups) - return formatter.format_help() - - def format_help(self): - formatter = self._get_formatter() - - # usage - formatter.add_usage(self.usage, self._actions, - self._mutually_exclusive_groups) - - # description - formatter.add_text(self.description) - - # positionals, optionals and user-defined groups - for action_group in self._action_groups: - formatter.start_section(action_group.title) - formatter.add_text(action_group.description) - formatter.add_arguments(action_group._group_actions) - formatter.end_section() - - # epilog - formatter.add_text(self.epilog) - - # determine help from format above - return formatter.format_help() - - def format_version(self): - import warnings - warnings.warn( - 'The format_version method is deprecated -- the "version" ' - 'argument to ArgumentParser is no longer supported.', - DeprecationWarning) - formatter = self._get_formatter() - formatter.add_text(self.version) - return formatter.format_help() - - def _get_formatter(self): - return self.formatter_class(prog=self.prog) - - # ===================== - # Help-printing methods - # ===================== - def print_usage(self, file=None): - if file is None: - file = _sys.stdout - self._print_message(self.format_usage(), file) - - def print_help(self, file=None): - if file is None: - file = _sys.stdout - self._print_message(self.format_help(), file) - - def print_version(self, file=None): - import warnings - warnings.warn( - 'The print_version method is deprecated -- the "version" ' - 'argument to ArgumentParser is no longer supported.', - DeprecationWarning) - self._print_message(self.format_version(), file) - - def _print_message(self, message, file=None): - if message: - if file is None: - file = _sys.stderr - file.write(message) - - # =============== - # Exiting methods - # =============== - def exit(self, status=0, message=None): - if message: - self._print_message(message, _sys.stderr) - _sys.exit(status) - - def error(self, message): - """error(message: string) - - Prints a usage message incorporating the message to stderr and - exits. - - If you override this in a subclass, it should not return -- it - should either exit or raise an exception. - """ - self.print_usage(_sys.stderr) - args = {'prog': self.prog, 'message': message} - self.exit(2, _('%(prog)s: error: %(message)s\n') % args) diff --git a/lib/beets/LICENSE b/lib/beets/LICENSE deleted file mode 100644 index cddcf990..00000000 --- a/lib/beets/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License - -Copyright (c) 2010-2014 Adrian Sampson - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/lib/beets/README.rst b/lib/beets/README.rst deleted file mode 100644 index 8c64e244..00000000 --- a/lib/beets/README.rst +++ /dev/null @@ -1,94 +0,0 @@ -.. image:: https://travis-ci.org/sampsyo/beets.svg?branch=master - :target: https://travis-ci.org/sampsyo/beets - -.. image:: http://img.shields.io/coveralls/sampsyo/beets.svg - :target: https://coveralls.io/r/sampsyo/beets - -.. image:: http://img.shields.io/pypi/v/beets.svg - :target: https://pypi.python.org/pypi/beets - -Beets is the media library management system for obsessive-compulsive music -geeks. - -The purpose of beets is to get your music collection right once and for all. -It catalogs your collection, automatically improving its metadata as it goes. -It then provides a bouquet of tools for manipulating and accessing your music. - -Here's an example of beets' brainy tag corrector doing its thing:: - - $ beet import ~/music/ladytron - Tagging: - Ladytron - Witching Hour - (Similarity: 98.4%) - * Last One Standing -> The Last One Standing - * Beauty -> Beauty*2 - * White Light Generation -> Whitelightgenerator - * All the Way -> All the Way... - -Because beets is designed as a library, it can do almost anything you can -imagine for your music collection. Via `plugins`_, beets becomes a panacea: - -- Fetch or calculate all the metadata you could possibly need: `album art`_, - `lyrics`_, `genres`_, `tempos`_, `ReplayGain`_ levels, or `acoustic - fingerprints`_. -- Get metadata from `MusicBrainz`_, `Discogs`_, or `Beatport`_. Or guess - metadata using songs' filenames or their acoustic fingerprints. -- `Transcode audio`_ to any format you like. -- Check your library for `duplicate tracks and albums`_ or for `albums that - are missing tracks`_. -- Clean up crufty tags left behind by other, less-awesome tools. -- Embed and extract album art from files' metadata. -- Browse your music library graphically through a Web browser and play it in any - browser that supports `HTML5 Audio`_. -- Analyze music files' metadata from the command line. -- Listen to your library with a music player that speaks the `MPD`_ protocol - and works with a staggering variety of interfaces. - -If beets doesn't do what you want yet, `writing your own plugin`_ is -shockingly simple if you know a little Python. - -.. _plugins: http://beets.readthedocs.org/page/plugins/ -.. _MPD: http://www.musicpd.org/ -.. _MusicBrainz music collection: http://musicbrainz.org/doc/Collections/ -.. _writing your own plugin: - http://beets.readthedocs.org/page/dev/plugins.html -.. _HTML5 Audio: - http://www.w3.org/TR/html-markup/audio.html -.. _albums that are missing tracks: - http://beets.readthedocs.org/page/plugins/missing.html -.. _duplicate tracks and albums: - http://beets.readthedocs.org/page/plugins/duplicates.html -.. _Transcode audio: - http://beets.readthedocs.org/page/plugins/convert.html -.. _Beatport: http://www.beatport.com/ -.. _Discogs: http://www.discogs.com/ -.. _acoustic fingerprints: - http://beets.readthedocs.org/page/plugins/chroma.html -.. _ReplayGain: http://beets.readthedocs.org/page/plugins/replaygain.html -.. _tempos: http://beets.readthedocs.org/page/plugins/echonest.html -.. _genres: http://beets.readthedocs.org/page/plugins/lastgenre.html -.. _album art: http://beets.readthedocs.org/page/plugins/fetchart.html -.. _lyrics: http://beets.readthedocs.org/page/plugins/lyrics.html -.. _MusicBrainz: http://musicbrainz.org/ - -Read More ---------- - -Learn more about beets at `its Web site`_. Follow `@b33ts`_ on Twitter for -news and updates. - -You can install beets by typing ``pip install beets``. Then check out the -`Getting Started`_ guide. - -.. _its Web site: http://beets.radbox.org/ -.. _Getting Started: http://beets.readthedocs.org/page/guides/main.html -.. _@b33ts: http://twitter.com/b33ts/ - -Authors -------- - -Beets is by `Adrian Sampson`_ with a supporting cast of thousands. For help, -please contact the `mailing list`_. - -.. _mailing list: https://groups.google.com/forum/#!forum/beets-users -.. _Adrian Sampson: http://homes.cs.washington.edu/~asampson/ diff --git a/lib/beets/__init__.py b/lib/beets/__init__.py deleted file mode 100644 index d050a028..00000000 --- a/lib/beets/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -# This particular version has been slightly modified to work with Headphones -# https://github.com/rembo10/headphones - -__version__ = '1.3.10-headphones' -__author__ = 'Adrian Sampson ' - -import os - -import beets.library -from beets.util import confit - -Library = beets.library.Library - -config = confit.LazyConfig(os.path.dirname(__file__), __name__) diff --git a/lib/beets/autotag/__init__.py b/lib/beets/autotag/__init__.py deleted file mode 100644 index 7c517c60..00000000 --- a/lib/beets/autotag/__init__.py +++ /dev/null @@ -1,136 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Facilities for automatically determining files' correct metadata. -""" -import logging - -from beets import config - -# Parts of external interface. -from .hooks import AlbumInfo, TrackInfo, AlbumMatch, TrackMatch # noqa -from .match import tag_item, tag_album # noqa -from .match import Recommendation # noqa - -# Global logger. -log = logging.getLogger('beets') - - -# Additional utilities for the main interface. - -def apply_item_metadata(item, track_info): - """Set an item's metadata from its matched TrackInfo object. - """ - item.artist = track_info.artist - item.artist_sort = track_info.artist_sort - item.artist_credit = track_info.artist_credit - item.title = track_info.title - item.mb_trackid = track_info.track_id - if track_info.artist_id: - item.mb_artistid = track_info.artist_id - # At the moment, the other metadata is left intact (including album - # and track number). Perhaps these should be emptied? - - -def apply_metadata(album_info, mapping): - """Set the items' metadata to match an AlbumInfo object using a - mapping from Items to TrackInfo objects. - """ - for item, track_info in mapping.iteritems(): - # Album, artist, track count. - if track_info.artist: - item.artist = track_info.artist - else: - item.artist = album_info.artist - item.albumartist = album_info.artist - item.album = album_info.album - - # Artist sort and credit names. - item.artist_sort = track_info.artist_sort or album_info.artist_sort - item.artist_credit = (track_info.artist_credit or - album_info.artist_credit) - item.albumartist_sort = album_info.artist_sort - item.albumartist_credit = album_info.artist_credit - - # Release date. - for prefix in '', 'original_': - if config['original_date'] and not prefix: - # Ignore specific release date. - continue - - for suffix in 'year', 'month', 'day': - key = prefix + suffix - value = getattr(album_info, key) or 0 - - # If we don't even have a year, apply nothing. - if suffix == 'year' and not value: - break - - # Otherwise, set the fetched value (or 0 for the month - # and day if not available). - item[key] = value - - # If we're using original release date for both fields, - # also set item.year = info.original_year, etc. - if config['original_date']: - item[suffix] = value - - # Title. - item.title = track_info.title - - if config['per_disc_numbering']: - item.track = track_info.medium_index or track_info.index - item.tracktotal = track_info.medium_total or len(album_info.tracks) - else: - item.track = track_info.index - item.tracktotal = len(album_info.tracks) - - # Disc and disc count. - item.disc = track_info.medium - item.disctotal = album_info.mediums - - # MusicBrainz IDs. - item.mb_trackid = track_info.track_id - item.mb_albumid = album_info.album_id - if track_info.artist_id: - item.mb_artistid = track_info.artist_id - else: - item.mb_artistid = album_info.artist_id - item.mb_albumartistid = album_info.artist_id - item.mb_releasegroupid = album_info.releasegroup_id - - # Compilation flag. - item.comp = album_info.va - - # Miscellaneous metadata. - for field in ('albumtype', - 'label', - 'asin', - 'catalognum', - 'script', - 'language', - 'country', - 'albumstatus', - 'albumdisambig'): - value = getattr(album_info, field) - if value is not None: - item[field] = value - if track_info.disctitle is not None: - item.disctitle = track_info.disctitle - - if track_info.media is not None: - item.media = track_info.media - - # Headphones seal of approval - item.comments = 'tagged by headphones/beets' \ No newline at end of file diff --git a/lib/beets/autotag/hooks.py b/lib/beets/autotag/hooks.py deleted file mode 100644 index beb3bd91..00000000 --- a/lib/beets/autotag/hooks.py +++ /dev/null @@ -1,579 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Glue between metadata sources and the matching logic.""" -import logging -from collections import namedtuple -import re - -from beets import plugins -from beets import config -from beets.autotag import mb -from beets.util import levenshtein -from unidecode import unidecode - -log = logging.getLogger('beets') - - -# Classes used to represent candidate options. - -class AlbumInfo(object): - """Describes a canonical release that may be used to match a release - in the library. Consists of these data members: - - - ``album``: the release title - - ``album_id``: MusicBrainz ID; UUID fragment only - - ``artist``: name of the release's primary artist - - ``artist_id`` - - ``tracks``: list of TrackInfo objects making up the release - - ``asin``: Amazon ASIN - - ``albumtype``: string describing the kind of release - - ``va``: boolean: whether the release has "various artists" - - ``year``: release year - - ``month``: release month - - ``day``: release day - - ``label``: music label responsible for the release - - ``mediums``: the number of discs in this release - - ``artist_sort``: name of the release's artist for sorting - - ``releasegroup_id``: MBID for the album's release group - - ``catalognum``: the label's catalog number for the release - - ``script``: character set used for metadata - - ``language``: human language of the metadata - - ``country``: the release country - - ``albumstatus``: MusicBrainz release status (Official, etc.) - - ``media``: delivery mechanism (Vinyl, etc.) - - ``albumdisambig``: MusicBrainz release disambiguation comment - - ``artist_credit``: Release-specific artist name - - ``data_source``: The original data source (MusicBrainz, Discogs, etc.) - - ``data_url``: The data source release URL. - - The fields up through ``tracks`` are required. The others are - optional and may be None. - """ - def __init__(self, album, album_id, artist, artist_id, tracks, asin=None, - albumtype=None, va=False, year=None, month=None, day=None, - label=None, mediums=None, artist_sort=None, - releasegroup_id=None, catalognum=None, script=None, - language=None, country=None, albumstatus=None, media=None, - albumdisambig=None, artist_credit=None, original_year=None, - original_month=None, original_day=None, data_source=None, - data_url=None): - self.album = album - self.album_id = album_id - self.artist = artist - self.artist_id = artist_id - self.tracks = tracks - self.asin = asin - self.albumtype = albumtype - self.va = va - self.year = year - self.month = month - self.day = day - self.label = label - self.mediums = mediums - self.artist_sort = artist_sort - self.releasegroup_id = releasegroup_id - self.catalognum = catalognum - self.script = script - self.language = language - self.country = country - self.albumstatus = albumstatus - self.media = media - self.albumdisambig = albumdisambig - self.artist_credit = artist_credit - self.original_year = original_year - self.original_month = original_month - self.original_day = original_day - self.data_source = data_source - self.data_url = data_url - - # Work around a bug in python-musicbrainz-ngs that causes some - # strings to be bytes rather than Unicode. - # https://github.com/alastair/python-musicbrainz-ngs/issues/85 - def decode(self, codec='utf8'): - """Ensure that all string attributes on this object, and the - constituent `TrackInfo` objects, are decoded to Unicode. - """ - for fld in ['album', 'artist', 'albumtype', 'label', 'artist_sort', - 'catalognum', 'script', 'language', 'country', - 'albumstatus', 'albumdisambig', 'artist_credit', 'media']: - value = getattr(self, fld) - if isinstance(value, str): - setattr(self, fld, value.decode(codec, 'ignore')) - - if self.tracks: - for track in self.tracks: - track.decode(codec) - - -class TrackInfo(object): - """Describes a canonical track present on a release. Appears as part - of an AlbumInfo's ``tracks`` list. Consists of these data members: - - - ``title``: name of the track - - ``track_id``: MusicBrainz ID; UUID fragment only - - ``artist``: individual track artist name - - ``artist_id`` - - ``length``: float: duration of the track in seconds - - ``index``: position on the entire release - - ``media``: delivery mechanism (Vinyl, etc.) - - ``medium``: the disc number this track appears on in the album - - ``medium_index``: the track's position on the disc - - ``medium_total``: the number of tracks on the item's disc - - ``artist_sort``: name of the track artist for sorting - - ``disctitle``: name of the individual medium (subtitle) - - ``artist_credit``: Recording-specific artist name - - Only ``title`` and ``track_id`` are required. The rest of the fields - may be None. The indices ``index``, ``medium``, and ``medium_index`` - are all 1-based. - """ - def __init__(self, title, track_id, artist=None, artist_id=None, - length=None, index=None, medium=None, medium_index=None, - medium_total=None, artist_sort=None, disctitle=None, - artist_credit=None, data_source=None, data_url=None, - media=None): - self.title = title - self.track_id = track_id - self.artist = artist - self.artist_id = artist_id - self.length = length - self.index = index - self.media = media - self.medium = medium - self.medium_index = medium_index - self.medium_total = medium_total - self.artist_sort = artist_sort - self.disctitle = disctitle - self.artist_credit = artist_credit - self.data_source = data_source - self.data_url = data_url - - # As above, work around a bug in python-musicbrainz-ngs. - def decode(self, codec='utf8'): - """Ensure that all string attributes on this object are decoded - to Unicode. - """ - for fld in ['title', 'artist', 'medium', 'artist_sort', 'disctitle', - 'artist_credit', 'media']: - value = getattr(self, fld) - if isinstance(value, str): - setattr(self, fld, value.decode(codec, 'ignore')) - - -# Candidate distance scoring. - -# Parameters for string distance function. -# Words that can be moved to the end of a string using a comma. -SD_END_WORDS = ['the', 'a', 'an'] -# Reduced weights for certain portions of the string. -SD_PATTERNS = [ - (r'^the ', 0.1), - (r'[\[\(]?(ep|single)[\]\)]?', 0.0), - (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1), - (r'\(.*?\)', 0.3), - (r'\[.*?\]', 0.3), - (r'(, )?(pt\.|part) .+', 0.2), -] -# Replacements to use before testing distance. -SD_REPLACE = [ - (r'&', 'and'), -] - - -def _string_dist_basic(str1, str2): - """Basic edit distance between two strings, ignoring - non-alphanumeric characters and case. Comparisons are based on a - transliteration/lowering to ASCII characters. Normalized by string - length. - """ - str1 = unidecode(str1) - str2 = unidecode(str2) - str1 = re.sub(r'[^a-z0-9]', '', str1.lower()) - str2 = re.sub(r'[^a-z0-9]', '', str2.lower()) - if not str1 and not str2: - return 0.0 - return levenshtein(str1, str2) / float(max(len(str1), len(str2))) - - -def string_dist(str1, str2): - """Gives an "intuitive" edit distance between two strings. This is - an edit distance, normalized by the string length, with a number of - tweaks that reflect intuition about text. - """ - if str1 is None and str2 is None: - return 0.0 - if str1 is None or str2 is None: - return 1.0 - - str1 = str1.lower() - str2 = str2.lower() - - # Don't penalize strings that move certain words to the end. For - # example, "the something" should be considered equal to - # "something, the". - for word in SD_END_WORDS: - if str1.endswith(', %s' % word): - str1 = '%s %s' % (word, str1[:-len(word) - 2]) - if str2.endswith(', %s' % word): - str2 = '%s %s' % (word, str2[:-len(word) - 2]) - - # Perform a couple of basic normalizing substitutions. - for pat, repl in SD_REPLACE: - str1 = re.sub(pat, repl, str1) - str2 = re.sub(pat, repl, str2) - - # Change the weight for certain string portions matched by a set - # of regular expressions. We gradually change the strings and build - # up penalties associated with parts of the string that were - # deleted. - base_dist = _string_dist_basic(str1, str2) - penalty = 0.0 - for pat, weight in SD_PATTERNS: - # Get strings that drop the pattern. - case_str1 = re.sub(pat, '', str1) - case_str2 = re.sub(pat, '', str2) - - if case_str1 != str1 or case_str2 != str2: - # If the pattern was present (i.e., it is deleted in the - # the current case), recalculate the distances for the - # modified strings. - case_dist = _string_dist_basic(case_str1, case_str2) - case_delta = max(0.0, base_dist - case_dist) - if case_delta == 0.0: - continue - - # Shift our baseline strings down (to avoid rematching the - # same part of the string) and add a scaled distance - # amount to the penalties. - str1 = case_str1 - str2 = case_str2 - base_dist = case_dist - penalty += weight * case_delta - - return base_dist + penalty - - -class LazyClassProperty(object): - """A decorator implementing a read-only property that is *lazy* in - the sense that the getter is only invoked once. Subsequent accesses - through *any* instance use the cached result. - """ - def __init__(self, getter): - self.getter = getter - self.computed = False - - def __get__(self, obj, owner): - if not self.computed: - self.value = self.getter(owner) - self.computed = True - return self.value - - -class Distance(object): - """Keeps track of multiple distance penalties. Provides a single - weighted distance for all penalties as well as a weighted distance - for each individual penalty. - """ - def __init__(self): - self._penalties = {} - - @LazyClassProperty - def _weights(cls): - """A dictionary from keys to floating-point weights. - """ - weights_view = config['match']['distance_weights'] - weights = {} - for key in weights_view.keys(): - weights[key] = weights_view[key].as_number() - return weights - - # Access the components and their aggregates. - - @property - def distance(self): - """Return a weighted and normalized distance across all - penalties. - """ - dist_max = self.max_distance - if dist_max: - return self.raw_distance / self.max_distance - return 0.0 - - @property - def max_distance(self): - """Return the maximum distance penalty (normalization factor). - """ - dist_max = 0.0 - for key, penalty in self._penalties.iteritems(): - dist_max += len(penalty) * self._weights[key] - return dist_max - - @property - def raw_distance(self): - """Return the raw (denormalized) distance. - """ - dist_raw = 0.0 - for key, penalty in self._penalties.iteritems(): - dist_raw += sum(penalty) * self._weights[key] - return dist_raw - - def items(self): - """Return a list of (key, dist) pairs, with `dist` being the - weighted distance, sorted from highest to lowest. Does not - include penalties with a zero value. - """ - list_ = [] - for key in self._penalties: - dist = self[key] - if dist: - list_.append((key, dist)) - # Convert distance into a negative float we can sort items in - # ascending order (for keys, when the penalty is equal) and - # still get the items with the biggest distance first. - return sorted(list_, key=lambda (key, dist): (0 - dist, key)) - - # Behave like a float. - - def __cmp__(self, other): - return cmp(self.distance, other) - - def __float__(self): - return self.distance - - def __sub__(self, other): - return self.distance - other - - def __rsub__(self, other): - return other - self.distance - - # Behave like a dict. - - def __getitem__(self, key): - """Returns the weighted distance for a named penalty. - """ - dist = sum(self._penalties[key]) * self._weights[key] - dist_max = self.max_distance - if dist_max: - return dist / dist_max - return 0.0 - - def __iter__(self): - return iter(self.items()) - - def __len__(self): - return len(self.items()) - - def keys(self): - return [key for key, _ in self.items()] - - def update(self, dist): - """Adds all the distance penalties from `dist`. - """ - if not isinstance(dist, Distance): - raise ValueError( - '`dist` must be a Distance object, not {0}'.format(type(dist)) - ) - for key, penalties in dist._penalties.iteritems(): - self._penalties.setdefault(key, []).extend(penalties) - - # Adding components. - - def _eq(self, value1, value2): - """Returns True if `value1` is equal to `value2`. `value1` may - be a compiled regular expression, in which case it will be - matched against `value2`. - """ - if isinstance(value1, re._pattern_type): - return bool(value1.match(value2)) - return value1 == value2 - - def add(self, key, dist): - """Adds a distance penalty. `key` must correspond with a - configured weight setting. `dist` must be a float between 0.0 - and 1.0, and will be added to any existing distance penalties - for the same key. - """ - if not 0.0 <= dist <= 1.0: - raise ValueError( - '`dist` must be between 0.0 and 1.0, not {0}'.format(dist) - ) - self._penalties.setdefault(key, []).append(dist) - - def add_equality(self, key, value, options): - """Adds a distance penalty of 1.0 if `value` doesn't match any - of the values in `options`. If an option is a compiled regular - expression, it will be considered equal if it matches against - `value`. - """ - if not isinstance(options, (list, tuple)): - options = [options] - for opt in options: - if self._eq(opt, value): - dist = 0.0 - break - else: - dist = 1.0 - self.add(key, dist) - - def add_expr(self, key, expr): - """Adds a distance penalty of 1.0 if `expr` evaluates to True, - or 0.0. - """ - if expr: - self.add(key, 1.0) - else: - self.add(key, 0.0) - - def add_number(self, key, number1, number2): - """Adds a distance penalty of 1.0 for each number of difference - between `number1` and `number2`, or 0.0 when there is no - difference. Use this when there is no upper limit on the - difference between the two numbers. - """ - diff = abs(number1 - number2) - if diff: - for i in range(diff): - self.add(key, 1.0) - else: - self.add(key, 0.0) - - def add_priority(self, key, value, options): - """Adds a distance penalty that corresponds to the position at - which `value` appears in `options`. A distance penalty of 0.0 - for the first option, or 1.0 if there is no matching option. If - an option is a compiled regular expression, it will be - considered equal if it matches against `value`. - """ - if not isinstance(options, (list, tuple)): - options = [options] - unit = 1.0 / (len(options) or 1) - for i, opt in enumerate(options): - if self._eq(opt, value): - dist = i * unit - break - else: - dist = 1.0 - self.add(key, dist) - - def add_ratio(self, key, number1, number2): - """Adds a distance penalty for `number1` as a ratio of `number2`. - `number1` is bound at 0 and `number2`. - """ - number = float(max(min(number1, number2), 0)) - if number2: - dist = number / number2 - else: - dist = 0.0 - self.add(key, dist) - - def add_string(self, key, str1, str2): - """Adds a distance penalty based on the edit distance between - `str1` and `str2`. - """ - dist = string_dist(str1, str2) - self.add(key, dist) - - -# Structures that compose all the information for a candidate match. - -AlbumMatch = namedtuple('AlbumMatch', ['distance', 'info', 'mapping', - 'extra_items', 'extra_tracks']) - -TrackMatch = namedtuple('TrackMatch', ['distance', 'info']) - - -# Aggregation of sources. - -def album_for_mbid(release_id): - """Get an AlbumInfo object for a MusicBrainz release ID. Return None - if the ID is not found. - """ - try: - return mb.album_for_id(release_id) - except mb.MusicBrainzAPIError as exc: - exc.log(log) - - -def track_for_mbid(recording_id): - """Get a TrackInfo object for a MusicBrainz recording ID. Return None - if the ID is not found. - """ - try: - return mb.track_for_id(recording_id) - except mb.MusicBrainzAPIError as exc: - exc.log(log) - - -def albums_for_id(album_id): - """Get a list of albums for an ID.""" - candidates = [album_for_mbid(album_id)] - candidates.extend(plugins.album_for_id(album_id)) - return filter(None, candidates) - - -def tracks_for_id(track_id): - """Get a list of tracks for an ID.""" - candidates = [track_for_mbid(track_id)] - candidates.extend(plugins.track_for_id(track_id)) - return filter(None, candidates) - - -def album_candidates(items, artist, album, va_likely): - """Search for album matches. ``items`` is a list of Item objects - that make up the album. ``artist`` and ``album`` are the respective - names (strings), which may be derived from the item list or may be - entered by the user. ``va_likely`` is a boolean indicating whether - the album is likely to be a "various artists" release. - """ - out = [] - - # Base candidates if we have album and artist to match. - if artist and album: - try: - out.extend(mb.match_album(artist, album, len(items))) - except mb.MusicBrainzAPIError as exc: - exc.log(log) - - # Also add VA matches from MusicBrainz where appropriate. - if va_likely and album: - try: - out.extend(mb.match_album(None, album, len(items))) - except mb.MusicBrainzAPIError as exc: - exc.log(log) - - # Candidates from plugins. - out.extend(plugins.candidates(items, artist, album, va_likely)) - - return out - - -def item_candidates(item, artist, title): - """Search for item matches. ``item`` is the Item to be matched. - ``artist`` and ``title`` are strings and either reflect the item or - are specified by the user. - """ - out = [] - - # MusicBrainz candidates. - if artist and title: - try: - out.extend(mb.match_track(artist, title)) - except mb.MusicBrainzAPIError as exc: - exc.log(log) - - # Plugin candidates. - out.extend(plugins.item_candidates(item, artist, title)) - - return out diff --git a/lib/beets/autotag/match.py b/lib/beets/autotag/match.py deleted file mode 100644 index 2d1f2007..00000000 --- a/lib/beets/autotag/match.py +++ /dev/null @@ -1,492 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Matches existing metadata with canonical information to identify -releases and tracks. -""" -from __future__ import division - -import datetime -import logging -import re -from munkres import Munkres - -from beets import plugins -from beets import config -from beets.util import plurality -from beets.autotag import hooks -from beets.util.enumeration import OrderedEnum - -# Artist signals that indicate "various artists". These are used at the -# album level to determine whether a given release is likely a VA -# release and also on the track level to to remove the penalty for -# differing artists. -VA_ARTISTS = (u'', u'various artists', u'various', u'va', u'unknown') - -# Global logger. -log = logging.getLogger('beets') - - -# Recommendation enumeration. - -class Recommendation(OrderedEnum): - """Indicates a qualitative suggestion to the user about what should - be done with a given match. - """ - none = 0 - low = 1 - medium = 2 - strong = 3 - - -# Primary matching functionality. - -def current_metadata(items): - """Extract the likely current metadata for an album given a list of its - items. Return two dictionaries: - - The most common value for each field. - - Whether each field's value was unanimous (values are booleans). - """ - assert items # Must be nonempty. - - likelies = {} - consensus = {} - fields = ['artist', 'album', 'albumartist', 'year', 'disctotal', - 'mb_albumid', 'label', 'catalognum', 'country', 'media', - 'albumdisambig'] - for field in fields: - values = [item[field] for item in items if item] - likelies[field], freq = plurality(values) - consensus[field] = (freq == len(values)) - - # If there's an album artist consensus, use this for the artist. - if consensus['albumartist'] and likelies['albumartist']: - likelies['artist'] = likelies['albumartist'] - - return likelies, consensus - - -def assign_items(items, tracks): - """Given a list of Items and a list of TrackInfo objects, find the - best mapping between them. Returns a mapping from Items to TrackInfo - objects, a set of extra Items, and a set of extra TrackInfo - objects. These "extra" objects occur when there is an unequal number - of objects of the two types. - """ - # Construct the cost matrix. - costs = [] - for item in items: - row = [] - for i, track in enumerate(tracks): - row.append(track_distance(item, track)) - costs.append(row) - - # Find a minimum-cost bipartite matching. - matching = Munkres().compute(costs) - - # Produce the output matching. - mapping = dict((items[i], tracks[j]) for (i, j) in matching) - extra_items = list(set(items) - set(mapping.keys())) - extra_items.sort(key=lambda i: (i.disc, i.track, i.title)) - extra_tracks = list(set(tracks) - set(mapping.values())) - extra_tracks.sort(key=lambda t: (t.index, t.title)) - return mapping, extra_items, extra_tracks - - -def track_index_changed(item, track_info): - """Returns True if the item and track info index is different. Tolerates - per disc and per release numbering. - """ - return item.track not in (track_info.medium_index, track_info.index) - - -def track_distance(item, track_info, incl_artist=False): - """Determines the significance of a track metadata change. Returns a - Distance object. `incl_artist` indicates that a distance component should - be included for the track artist (i.e., for various-artist releases). - """ - dist = hooks.Distance() - - # Length. - if track_info.length: - diff = abs(item.length - track_info.length) - \ - config['match']['track_length_grace'].as_number() - dist.add_ratio('track_length', diff, - config['match']['track_length_max'].as_number()) - - # Title. - dist.add_string('track_title', item.title, track_info.title) - - # Artist. Only check if there is actually an artist in the track data. - if incl_artist and track_info.artist and \ - item.artist.lower() not in VA_ARTISTS: - dist.add_string('track_artist', item.artist, track_info.artist) - - # Track index. - if track_info.index and item.track: - dist.add_expr('track_index', track_index_changed(item, track_info)) - - # Track ID. - if item.mb_trackid: - dist.add_expr('track_id', item.mb_trackid != track_info.track_id) - - # Plugins. - dist.update(plugins.track_distance(item, track_info)) - - return dist - - -def distance(items, album_info, mapping): - """Determines how "significant" an album metadata change would be. - Returns a Distance object. `album_info` is an AlbumInfo object - reflecting the album to be compared. `items` is a sequence of all - Item objects that will be matched (order is not important). - `mapping` is a dictionary mapping Items to TrackInfo objects; the - keys are a subset of `items` and the values are a subset of - `album_info.tracks`. - """ - likelies, _ = current_metadata(items) - - dist = hooks.Distance() - - # Artist, if not various. - if not album_info.va: - dist.add_string('artist', likelies['artist'], album_info.artist) - - # Album. - dist.add_string('album', likelies['album'], album_info.album) - - # Current or preferred media. - if album_info.media: - # Preferred media options. - patterns = config['match']['preferred']['media'].as_str_seq() - options = [re.compile(r'(\d+x)?(%s)' % pat, re.I) for pat in patterns] - if options: - dist.add_priority('media', album_info.media, options) - # Current media. - elif likelies['media']: - dist.add_equality('media', album_info.media, likelies['media']) - - # Mediums. - if likelies['disctotal'] and album_info.mediums: - dist.add_number('mediums', likelies['disctotal'], album_info.mediums) - - # Prefer earliest release. - if album_info.year and config['match']['preferred']['original_year']: - # Assume 1889 (earliest first gramophone discs) if we don't know the - # original year. - original = album_info.original_year or 1889 - diff = abs(album_info.year - original) - diff_max = abs(datetime.date.today().year - original) - dist.add_ratio('year', diff, diff_max) - # Year. - elif likelies['year'] and album_info.year: - if likelies['year'] in (album_info.year, album_info.original_year): - # No penalty for matching release or original year. - dist.add('year', 0.0) - elif album_info.original_year: - # Prefer matchest closest to the release year. - diff = abs(likelies['year'] - album_info.year) - diff_max = abs(datetime.date.today().year - - album_info.original_year) - dist.add_ratio('year', diff, diff_max) - else: - # Full penalty when there is no original year. - dist.add('year', 1.0) - - # Preferred countries. - patterns = config['match']['preferred']['countries'].as_str_seq() - options = [re.compile(pat, re.I) for pat in patterns] - if album_info.country and options: - dist.add_priority('country', album_info.country, options) - # Country. - elif likelies['country'] and album_info.country: - dist.add_string('country', likelies['country'], album_info.country) - - # Label. - if likelies['label'] and album_info.label: - dist.add_string('label', likelies['label'], album_info.label) - - # Catalog number. - if likelies['catalognum'] and album_info.catalognum: - dist.add_string('catalognum', likelies['catalognum'], - album_info.catalognum) - - # Disambiguation. - if likelies['albumdisambig'] and album_info.albumdisambig: - dist.add_string('albumdisambig', likelies['albumdisambig'], - album_info.albumdisambig) - - # Album ID. - if likelies['mb_albumid']: - dist.add_equality('album_id', likelies['mb_albumid'], - album_info.album_id) - - # Tracks. - dist.tracks = {} - for item, track in mapping.iteritems(): - dist.tracks[track] = track_distance(item, track, album_info.va) - dist.add('tracks', dist.tracks[track].distance) - - # Missing tracks. - for i in range(len(album_info.tracks) - len(mapping)): - dist.add('missing_tracks', 1.0) - - # Unmatched tracks. - for i in range(len(items) - len(mapping)): - dist.add('unmatched_tracks', 1.0) - - # Plugins. - dist.update(plugins.album_distance(items, album_info, mapping)) - - return dist - - -def match_by_id(items): - """If the items are tagged with a MusicBrainz album ID, returns an - AlbumInfo object for the corresponding album. Otherwise, returns - None. - """ - # Is there a consensus on the MB album ID? - albumids = [item.mb_albumid for item in items if item.mb_albumid] - if not albumids: - log.debug(u'No album IDs found.') - return None - - # If all album IDs are equal, look up the album. - if bool(reduce(lambda x, y: x if x == y else (), albumids)): - albumid = albumids[0] - log.debug(u'Searching for discovered album ID: {0}'.format(albumid)) - return hooks.album_for_mbid(albumid) - else: - log.debug(u'No album ID consensus.') - - -def _recommendation(results): - """Given a sorted list of AlbumMatch or TrackMatch objects, return a - recommendation based on the results' distances. - - If the recommendation is higher than the configured maximum for - an applied penalty, the recommendation will be downgraded to the - configured maximum for that penalty. - """ - if not results: - # No candidates: no recommendation. - return Recommendation.none - - # Basic distance thresholding. - min_dist = results[0].distance - if min_dist < config['match']['strong_rec_thresh'].as_number(): - # Strong recommendation level. - rec = Recommendation.strong - elif min_dist <= config['match']['medium_rec_thresh'].as_number(): - # Medium recommendation level. - rec = Recommendation.medium - elif len(results) == 1: - # Only a single candidate. - rec = Recommendation.low - elif results[1].distance - min_dist >= \ - config['match']['rec_gap_thresh'].as_number(): - # Gap between first two candidates is large. - rec = Recommendation.low - else: - # No conclusion. Return immediately. Can't be downgraded any further. - return Recommendation.none - - # Downgrade to the max rec if it is lower than the current rec for an - # applied penalty. - keys = set(min_dist.keys()) - if isinstance(results[0], hooks.AlbumMatch): - for track_dist in min_dist.tracks.values(): - keys.update(track_dist.keys()) - max_rec_view = config['match']['max_rec'] - for key in keys: - if key in max_rec_view.keys(): - max_rec = max_rec_view[key].as_choice({ - 'strong': Recommendation.strong, - 'medium': Recommendation.medium, - 'low': Recommendation.low, - 'none': Recommendation.none, - }) - rec = min(rec, max_rec) - - return rec - - -def _add_candidate(items, results, info): - """Given a candidate AlbumInfo object, attempt to add the candidate - to the output dictionary of AlbumMatch objects. This involves - checking the track count, ordering the items, checking for - duplicates, and calculating the distance. - """ - log.debug(u'Candidate: {0} - {1}'.format(info.artist, info.album)) - - # Discard albums with zero tracks. - if not info.tracks: - log.debug('No tracks.') - return - - # Don't duplicate. - if info.album_id in results: - log.debug(u'Duplicate.') - return - - # Discard matches without required tags. - for req_tag in config['match']['required'].as_str_seq(): - if getattr(info, req_tag) is None: - log.debug(u'Ignored. Missing required tag: {0}'.format(req_tag)) - return - - # Find mapping between the items and the track info. - mapping, extra_items, extra_tracks = assign_items(items, info.tracks) - - # Get the change distance. - dist = distance(items, info, mapping) - - # Skip matches with ignored penalties. - penalties = [key for _, key in dist] - for penalty in config['match']['ignored'].as_str_seq(): - if penalty in penalties: - log.debug(u'Ignored. Penalty: {0}'.format(penalty)) - return - - log.debug(u'Success. Distance: {0}'.format(dist)) - results[info.album_id] = hooks.AlbumMatch(dist, info, mapping, - extra_items, extra_tracks) - - -def tag_album(items, search_artist=None, search_album=None, - search_id=None): - """Return a tuple of a artist name, an album name, a list of - `AlbumMatch` candidates from the metadata backend, and a - `Recommendation`. - - The artist and album are the most common values of these fields - among `items`. - - The `AlbumMatch` objects are generated by searching the metadata - backends. By default, the metadata of the items is used for the - search. This can be customized by setting the parameters. The - `mapping` field of the album has the matched `items` as keys. - - The recommendation is calculated from the match qualitiy of the - candidates. - """ - # Get current metadata. - likelies, consensus = current_metadata(items) - cur_artist = likelies['artist'] - cur_album = likelies['album'] - log.debug(u'Tagging {0} - {1}'.format(cur_artist, cur_album)) - - # The output result (distance, AlbumInfo) tuples (keyed by MB album - # ID). - candidates = {} - - # Search by explicit ID. - if search_id is not None: - log.debug(u'Searching for album ID: {0}'.format(search_id)) - search_cands = hooks.albums_for_id(search_id) - - # Use existing metadata or text search. - else: - # Try search based on current ID. - id_info = match_by_id(items) - if id_info: - _add_candidate(items, candidates, id_info) - rec = _recommendation(candidates.values()) - log.debug(u'Album ID match recommendation is {0}'.format(str(rec))) - if candidates and not config['import']['timid']: - # If we have a very good MBID match, return immediately. - # Otherwise, this match will compete against metadata-based - # matches. - if rec == Recommendation.strong: - log.debug(u'ID match.') - return cur_artist, cur_album, candidates.values(), rec - - # Search terms. - if not (search_artist and search_album): - # No explicit search terms -- use current metadata. - search_artist, search_album = cur_artist, cur_album - log.debug(u'Search terms: {0} - {1}'.format(search_artist, - search_album)) - - # Is this album likely to be a "various artist" release? - va_likely = ((not consensus['artist']) or - (search_artist.lower() in VA_ARTISTS) or - any(item.comp for item in items)) - log.debug(u'Album might be VA: {0}'.format(str(va_likely))) - - # Get the results from the data sources. - search_cands = hooks.album_candidates(items, search_artist, - search_album, va_likely) - - log.debug(u'Evaluating {0} candidates.'.format(len(search_cands))) - for info in search_cands: - _add_candidate(items, candidates, info) - - # Sort and get the recommendation. - candidates = sorted(candidates.itervalues()) - rec = _recommendation(candidates) - return cur_artist, cur_album, candidates, rec - - -def tag_item(item, search_artist=None, search_title=None, - search_id=None): - """Attempts to find metadata for a single track. Returns a - `(candidates, recommendation)` pair where `candidates` is a list of - TrackMatch objects. `search_artist` and `search_title` may be used - to override the current metadata for the purposes of the MusicBrainz - title; likewise `search_id`. - """ - # Holds candidates found so far: keys are MBIDs; values are - # (distance, TrackInfo) pairs. - candidates = {} - - # First, try matching by MusicBrainz ID. - trackid = search_id or item.mb_trackid - if trackid: - log.debug(u'Searching for track ID: {0}'.format(trackid)) - for track_info in hooks.tracks_for_id(trackid): - dist = track_distance(item, track_info, incl_artist=True) - candidates[track_info.track_id] = \ - hooks.TrackMatch(dist, track_info) - # If this is a good match, then don't keep searching. - rec = _recommendation(candidates.values()) - if rec == Recommendation.strong and not config['import']['timid']: - log.debug(u'Track ID match.') - return candidates.values(), rec - - # If we're searching by ID, don't proceed. - if search_id is not None: - if candidates: - return candidates.values(), rec - else: - return [], Recommendation.none - - # Search terms. - if not (search_artist and search_title): - search_artist, search_title = item.artist, item.title - log.debug(u'Item search terms: {0} - {1}'.format(search_artist, - search_title)) - - # Get and evaluate candidate metadata. - for track_info in hooks.item_candidates(item, search_artist, search_title): - dist = track_distance(item, track_info, incl_artist=True) - candidates[track_info.track_id] = hooks.TrackMatch(dist, track_info) - - # Sort by distance and return with recommendation. - log.debug(u'Found {0} candidates.'.format(len(candidates))) - candidates = sorted(candidates.itervalues()) - rec = _recommendation(candidates) - return candidates, rec diff --git a/lib/beets/autotag/mb.py b/lib/beets/autotag/mb.py deleted file mode 100644 index d063f627..00000000 --- a/lib/beets/autotag/mb.py +++ /dev/null @@ -1,407 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Searches for albums in the MusicBrainz database. -""" -import logging -import musicbrainzngs -import re -import traceback -from urlparse import urljoin - -import beets.autotag.hooks -import beets -from beets import util -from beets import config - -SEARCH_LIMIT = 5 -VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377' -BASE_URL = 'http://musicbrainz.org/' - -musicbrainzngs.set_useragent('beets', beets.__version__, - 'http://beets.radbox.org/') - - -class MusicBrainzAPIError(util.HumanReadableException): - """An error while talking to MusicBrainz. The `query` field is the - parameter to the action and may have any type. - """ - def __init__(self, reason, verb, query, tb=None): - self.query = query - super(MusicBrainzAPIError, self).__init__(reason, verb, tb) - - def get_message(self): - return u'{0} in {1} with query {2}'.format( - self._reasonstr(), self.verb, repr(self.query) - ) - -log = logging.getLogger('beets') - -RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', - 'labels', 'artist-credits', 'aliases'] -TRACK_INCLUDES = ['artists', 'aliases'] - - -def track_url(trackid): - return urljoin(BASE_URL, 'recording/' + trackid) - - -def album_url(albumid): - return urljoin(BASE_URL, 'release/' + albumid) - - -def configure(): - """Set up the python-musicbrainz-ngs module according to settings - from the beets configuration. This should be called at startup. - """ - musicbrainzngs.set_hostname(config['musicbrainz']['host'].get(unicode)) - musicbrainzngs.set_rate_limit( - config['musicbrainz']['ratelimit_interval'].as_number(), - config['musicbrainz']['ratelimit'].get(int), - ) - - -def _preferred_alias(aliases): - """Given an list of alias structures for an artist credit, select - and return the user's preferred alias alias or None if no matching - alias is found. - """ - if not aliases: - return - - # Only consider aliases that have locales set. - aliases = [a for a in aliases if 'locale' in a] - - # Search configured locales in order. - for locale in config['import']['languages'].as_str_seq(): - # Find matching primary aliases for this locale. - matches = [a for a in aliases - if a['locale'] == locale and 'primary' in a] - # Skip to the next locale if we have no matches - if not matches: - continue - - return matches[0] - - -def _flatten_artist_credit(credit): - """Given a list representing an ``artist-credit`` block, flatten the - data into a triple of joined artist name strings: canonical, sort, and - credit. - """ - artist_parts = [] - artist_sort_parts = [] - artist_credit_parts = [] - for el in credit: - if isinstance(el, basestring): - # Join phrase. - artist_parts.append(el) - artist_credit_parts.append(el) - artist_sort_parts.append(el) - - else: - alias = _preferred_alias(el['artist'].get('alias-list', ())) - - # An artist. - if alias: - cur_artist_name = alias['alias'] - else: - cur_artist_name = el['artist']['name'] - artist_parts.append(cur_artist_name) - - # Artist sort name. - if alias: - artist_sort_parts.append(alias['sort-name']) - elif 'sort-name' in el['artist']: - artist_sort_parts.append(el['artist']['sort-name']) - else: - artist_sort_parts.append(cur_artist_name) - - # Artist credit. - if 'name' in el: - artist_credit_parts.append(el['name']) - else: - artist_credit_parts.append(cur_artist_name) - - return ( - ''.join(artist_parts), - ''.join(artist_sort_parts), - ''.join(artist_credit_parts), - ) - - -def track_info(recording, index=None, medium=None, medium_index=None, - medium_total=None): - """Translates a MusicBrainz recording result dictionary into a beets - ``TrackInfo`` object. Three parameters are optional and are used - only for tracks that appear on releases (non-singletons): ``index``, - the overall track number; ``medium``, the disc number; - ``medium_index``, the track's index on its medium; ``medium_total``, - the number of tracks on the medium. Each number is a 1-based index. - """ - info = beets.autotag.hooks.TrackInfo( - recording['title'], - recording['id'], - index=index, - medium=medium, - medium_index=medium_index, - medium_total=medium_total, - data_url=track_url(recording['id']), - ) - - if recording.get('artist-credit'): - # Get the artist names. - info.artist, info.artist_sort, info.artist_credit = \ - _flatten_artist_credit(recording['artist-credit']) - - # Get the ID and sort name of the first artist. - artist = recording['artist-credit'][0]['artist'] - info.artist_id = artist['id'] - - if recording.get('length'): - info.length = int(recording['length']) / (1000.0) - - info.decode() - return info - - -def _set_date_str(info, date_str, original=False): - """Given a (possibly partial) YYYY-MM-DD string and an AlbumInfo - object, set the object's release date fields appropriately. If - `original`, then set the original_year, etc., fields. - """ - if date_str: - date_parts = date_str.split('-') - for key in ('year', 'month', 'day'): - if date_parts: - date_part = date_parts.pop(0) - try: - date_num = int(date_part) - except ValueError: - continue - - if original: - key = 'original_' + key - setattr(info, key, date_num) - - -def album_info(release): - """Takes a MusicBrainz release result dictionary and returns a beets - AlbumInfo object containing the interesting data about that release. - """ - # Get artist name using join phrases. - artist_name, artist_sort_name, artist_credit_name = \ - _flatten_artist_credit(release['artist-credit']) - - # Basic info. - track_infos = [] - index = 0 - for medium in release['medium-list']: - disctitle = medium.get('title') - format = medium.get('format') - for track in medium['track-list']: - # Basic information from the recording. - index += 1 - ti = track_info( - track['recording'], - index, - int(medium['position']), - int(track['position']), - len(medium['track-list']), - ) - ti.disctitle = disctitle - ti.media = format - - # Prefer track data, where present, over recording data. - if track.get('title'): - ti.title = track['title'] - if track.get('artist-credit'): - # Get the artist names. - ti.artist, ti.artist_sort, ti.artist_credit = \ - _flatten_artist_credit(track['artist-credit']) - ti.artist_id = track['artist-credit'][0]['artist']['id'] - if track.get('length'): - ti.length = int(track['length']) / (1000.0) - - track_infos.append(ti) - - info = beets.autotag.hooks.AlbumInfo( - release['title'], - release['id'], - artist_name, - release['artist-credit'][0]['artist']['id'], - track_infos, - mediums=len(release['medium-list']), - artist_sort=artist_sort_name, - artist_credit=artist_credit_name, - data_source='MusicBrainz', - data_url=album_url(release['id']), - ) - info.va = info.artist_id == VARIOUS_ARTISTS_ID - info.asin = release.get('asin') - info.releasegroup_id = release['release-group']['id'] - info.country = release.get('country') - info.albumstatus = release.get('status') - - # Build up the disambiguation string from the release group and release. - disambig = [] - if release['release-group'].get('disambiguation'): - disambig.append(release['release-group'].get('disambiguation')) - if release.get('disambiguation'): - disambig.append(release.get('disambiguation')) - info.albumdisambig = u', '.join(disambig) - - # Release type not always populated. - if 'type' in release['release-group']: - reltype = release['release-group']['type'] - if reltype: - info.albumtype = reltype.lower() - - # Release dates. - release_date = release.get('date') - release_group_date = release['release-group'].get('first-release-date') - if not release_date: - # Fall back if release-specific date is not available. - release_date = release_group_date - _set_date_str(info, release_date, False) - _set_date_str(info, release_group_date, True) - - # Label name. - if release.get('label-info-list'): - label_info = release['label-info-list'][0] - if label_info.get('label'): - label = label_info['label']['name'] - if label != '[no label]': - info.label = label - info.catalognum = label_info.get('catalog-number') - - # Text representation data. - if release.get('text-representation'): - rep = release['text-representation'] - info.script = rep.get('script') - info.language = rep.get('language') - - # Media (format). - if release['medium-list']: - first_medium = release['medium-list'][0] - info.media = first_medium.get('format') - - info.decode() - return info - - -def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): - """Searches for a single album ("release" in MusicBrainz parlance) - and returns an iterator over AlbumInfo objects. May raise a - MusicBrainzAPIError. - - The query consists of an artist name, an album name, and, - optionally, a number of tracks on the album. - """ - # Build search criteria. - criteria = {'release': album.lower().strip()} - if artist is not None: - criteria['artist'] = artist.lower().strip() - else: - # Various Artists search. - criteria['arid'] = VARIOUS_ARTISTS_ID - if tracks is not None: - criteria['tracks'] = str(tracks) - - # Abort if we have no search terms. - if not any(criteria.itervalues()): - return - - try: - res = musicbrainzngs.search_releases(limit=limit, **criteria) - except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError(exc, 'release search', criteria, - traceback.format_exc()) - for release in res['release-list']: - # The search result is missing some data (namely, the tracks), - # so we just use the ID and fetch the rest of the information. - albuminfo = album_for_id(release['id']) - if albuminfo is not None: - yield albuminfo - - -def match_track(artist, title, limit=SEARCH_LIMIT): - """Searches for a single track and returns an iterable of TrackInfo - objects. May raise a MusicBrainzAPIError. - """ - criteria = { - 'artist': artist.lower().strip(), - 'recording': title.lower().strip(), - } - - if not any(criteria.itervalues()): - return - - try: - res = musicbrainzngs.search_recordings(limit=limit, **criteria) - except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError(exc, 'recording search', criteria, - traceback.format_exc()) - for recording in res['recording-list']: - yield track_info(recording) - - -def _parse_id(s): - """Search for a MusicBrainz ID in the given string and return it. If - no ID can be found, return None. - """ - # Find the first thing that looks like a UUID/MBID. - match = re.search('[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s) - if match: - return match.group() - - -def album_for_id(releaseid): - """Fetches an album by its MusicBrainz ID and returns an AlbumInfo - object or None if the album is not found. May raise a - MusicBrainzAPIError. - """ - albumid = _parse_id(releaseid) - if not albumid: - log.debug(u'Invalid MBID ({0}).'.format(releaseid)) - return - try: - res = musicbrainzngs.get_release_by_id(albumid, - RELEASE_INCLUDES) - except musicbrainzngs.ResponseError: - log.debug(u'Album ID match failed.') - return None - except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError(exc, 'get release by ID', albumid, - traceback.format_exc()) - return album_info(res['release']) - - -def track_for_id(releaseid): - """Fetches a track by its MusicBrainz ID. Returns a TrackInfo object - or None if no track is found. May raise a MusicBrainzAPIError. - """ - trackid = _parse_id(releaseid) - if not trackid: - log.debug(u'Invalid MBID ({0}).'.format(releaseid)) - return - try: - res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) - except musicbrainzngs.ResponseError: - log.debug(u'Track ID match failed.') - return None - except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError(exc, 'get recording by ID', trackid, - traceback.format_exc()) - return track_info(res['recording']) diff --git a/lib/beets/config_default.yaml b/lib/beets/config_default.yaml deleted file mode 100644 index 78f16d05..00000000 --- a/lib/beets/config_default.yaml +++ /dev/null @@ -1,109 +0,0 @@ -library: library.db -directory: ~/Music - -import: - write: yes - copy: yes - move: no - link: no - delete: no - resume: ask - incremental: no - quiet_fallback: skip - none_rec_action: ask - timid: no - log: - autotag: yes - quiet: no - singletons: no - default_action: apply - languages: [] - detail: no - flat: no - group_albums: no - pretend: false - -clutter: ["Thumbs.DB", ".DS_Store"] -ignore: [".*", "*~", "System Volume Information"] -replace: - '[\\/]': _ - '^\.': _ - '[\x00-\x1f]': _ - '[<>:"\?\*\|]': _ - '\.$': _ - '\s+$': '' - '^\s+': '' -path_sep_replace: _ -asciify_paths: false -art_filename: cover -max_filename_length: 0 - -plugins: [] -pluginpath: [] -threaded: yes -color: yes -timeout: 5.0 -per_disc_numbering: no -verbose: no -terminal_encoding: utf8 -original_date: no -id3v23: no - -ui: - terminal_width: 80 - length_diff_thresh: 10.0 - -list_format_item: $artist - $album - $title -list_format_album: $albumartist - $album -time_format: '%Y-%m-%d %H:%M:%S' - -sort_album: albumartist+ album+ -sort_item: artist+ album+ disc+ track+ - -paths: - default: $albumartist/$album%aunique{}/$track $title - singleton: Non-Album/$artist/$title - comp: Compilations/$album%aunique{}/$track $title - -statefile: state.pickle - -musicbrainz: - host: musicbrainz.org - ratelimit: 1 - ratelimit_interval: 1.0 - -match: - strong_rec_thresh: 0.04 - medium_rec_thresh: 0.25 - rec_gap_thresh: 0.25 - max_rec: - missing_tracks: medium - unmatched_tracks: medium - distance_weights: - source: 2.0 - artist: 3.0 - album: 3.0 - media: 1.0 - mediums: 1.0 - year: 1.0 - country: 0.5 - label: 0.5 - catalognum: 0.5 - albumdisambig: 0.5 - album_id: 5.0 - tracks: 2.0 - missing_tracks: 0.9 - unmatched_tracks: 0.6 - track_title: 3.0 - track_artist: 2.0 - track_index: 1.0 - track_length: 2.0 - track_id: 5.0 - preferred: - countries: [] - media: [] - original_year: no - ignored: [] - required: [] - track_length_grace: 10 - track_length_max: 30 diff --git a/lib/beets/dbcore/__init__.py b/lib/beets/dbcore/__init__.py deleted file mode 100644 index c364fdfc..00000000 --- a/lib/beets/dbcore/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""DBCore is an abstract database package that forms the basis for beets' -Library. -""" -from .db import Model, Database -from .query import Query, FieldQuery, MatchQuery, AndQuery, OrQuery -from .types import Type -from .queryparse import query_from_strings -from .queryparse import sort_from_strings -from .queryparse import parse_sorted_query - -# flake8: noqa diff --git a/lib/beets/dbcore/db.py b/lib/beets/dbcore/db.py deleted file mode 100644 index 0c786daa..00000000 --- a/lib/beets/dbcore/db.py +++ /dev/null @@ -1,820 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""The central Model and Database constructs for DBCore. -""" -import time -import os -from collections import defaultdict -import threading -import sqlite3 -import contextlib -import collections - -import beets -from beets.util.functemplate import Template -from beets.dbcore import types -from .query import MatchQuery, NullSort, TrueQuery - - -class FormattedMapping(collections.Mapping): - """A `dict`-like formatted view of a model. - - The accessor `mapping[key]` returns the formated version of - `model[key]` as a unicode string. - - If `for_path` is true, all path separators in the formatted values - are replaced. - """ - - def __init__(self, model, for_path=False): - self.for_path = for_path - self.model = model - self.model_keys = model.keys(True) - - def __getitem__(self, key): - if key in self.model_keys: - return self._get_formatted(self.model, key) - else: - raise KeyError(key) - - def __iter__(self): - return iter(self.model_keys) - - def __len__(self): - return len(self.model_keys) - - def get(self, key, default=None): - if default is None: - default = self.model._type(key).format(None) - return super(FormattedMapping, self).get(key, default) - - def _get_formatted(self, model, key): - value = model._type(key).format(model.get(key)) - if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') - - if self.for_path: - sep_repl = beets.config['path_sep_replace'].get(unicode) - for sep in (os.path.sep, os.path.altsep): - if sep: - value = value.replace(sep, sep_repl) - - return value - - -# Abstract base for model classes. - -class Model(object): - """An abstract object representing an object in the database. Model - objects act like dictionaries (i.e., the allow subscript access like - ``obj['field']``). The same field set is available via attribute - access as a shortcut (i.e., ``obj.field``). Three kinds of attributes are - available: - - * **Fixed attributes** come from a predetermined list of field - names. These fields correspond to SQLite table columns and are - thus fast to read, write, and query. - * **Flexible attributes** are free-form and do not need to be listed - ahead of time. - * **Computed attributes** are read-only fields computed by a getter - function provided by a plugin. - - Access to all three field types is uniform: ``obj.field`` works the - same regardless of whether ``field`` is fixed, flexible, or - computed. - - Model objects can optionally be associated with a `Library` object, - in which case they can be loaded and stored from the database. Dirty - flags are used to track which fields need to be stored. - """ - - # Abstract components (to be provided by subclasses). - - _table = None - """The main SQLite table name. - """ - - _flex_table = None - """The flex field SQLite table name. - """ - - _fields = {} - """A mapping indicating available "fixed" fields on this type. The - keys are field names and the values are `Type` objects. - """ - - _search_fields = () - """The fields that should be queried by default by unqualified query - terms. - """ - - _types = {} - """Optional Types for non-fixed (i.e., flexible and computed) fields. - """ - - _sorts = {} - """Optional named sort criteria. The keys are strings and the values - are subclasses of `Sort`. - """ - - _always_dirty = False - """By default, fields only become "dirty" when their value actually - changes. Enabling this flag marks fields as dirty even when the new - value is the same as the old value (e.g., `o.f = o.f`). - """ - - @classmethod - def _getters(cls): - """Return a mapping from field names to getter functions. - """ - # We could cache this if it becomes a performance problem to - # gather the getter mapping every time. - raise NotImplementedError() - - def _template_funcs(self): - """Return a mapping from function names to text-transformer - functions. - """ - # As above: we could consider caching this result. - raise NotImplementedError() - - # Basic operation. - - def __init__(self, db=None, **values): - """Create a new object with an optional Database association and - initial field values. - """ - self._db = db - self._dirty = set() - self._values_fixed = {} - self._values_flex = {} - - # Initial contents. - self.update(values) - self.clear_dirty() - - @classmethod - def _awaken(cls, db=None, fixed_values={}, flex_values={}): - """Create an object with values drawn from the database. - - This is a performance optimization: the checks involved with - ordinary construction are bypassed. - """ - obj = cls(db) - for key, value in fixed_values.iteritems(): - obj._values_fixed[key] = cls._type(key).from_sql(value) - for key, value in flex_values.iteritems(): - obj._values_flex[key] = cls._type(key).from_sql(value) - return obj - - def __repr__(self): - return '{0}({1})'.format( - type(self).__name__, - ', '.join('{0}={1!r}'.format(k, v) for k, v in dict(self).items()), - ) - - def clear_dirty(self): - """Mark all fields as *clean* (i.e., not needing to be stored to - the database). - """ - self._dirty = set() - - def _check_db(self, need_id=True): - """Ensure that this object is associated with a database row: it - has a reference to a database (`_db`) and an id. A ValueError - exception is raised otherwise. - """ - if not self._db: - raise ValueError('{0} has no database'.format(type(self).__name__)) - if need_id and not self.id: - raise ValueError('{0} has no id'.format(type(self).__name__)) - - # Essential field accessors. - - @classmethod - def _type(self, key): - """Get the type of a field, a `Type` instance. - - If the field has no explicit type, it is given the base `Type`, - which does no conversion. - """ - return self._fields.get(key) or self._types.get(key) or types.DEFAULT - - def __getitem__(self, key): - """Get the value for a field. Raise a KeyError if the field is - not available. - """ - getters = self._getters() - if key in getters: # Computed. - return getters[key](self) - elif key in self._fields: # Fixed. - return self._values_fixed.get(key) - elif key in self._values_flex: # Flexible. - return self._values_flex[key] - else: - raise KeyError(key) - - def __setitem__(self, key, value): - """Assign the value for a field. - """ - # Choose where to place the value. - if key in self._fields: - source = self._values_fixed - else: - source = self._values_flex - - # If the field has a type, filter the value. - value = self._type(key).normalize(value) - - # Assign value and possibly mark as dirty. - old_value = source.get(key) - source[key] = value - if self._always_dirty or old_value != value: - self._dirty.add(key) - - def __delitem__(self, key): - """Remove a flexible attribute from the model. - """ - if key in self._values_flex: # Flexible. - del self._values_flex[key] - self._dirty.add(key) # Mark for dropping on store. - elif key in self._getters(): # Computed. - raise KeyError('computed field {0} cannot be deleted'.format(key)) - elif key in self._fields: # Fixed. - raise KeyError('fixed field {0} cannot be deleted'.format(key)) - else: - raise KeyError('no such field {0}'.format(key)) - - def keys(self, computed=False): - """Get a list of available field names for this object. The - `computed` parameter controls whether computed (plugin-provided) - fields are included in the key list. - """ - base_keys = list(self._fields) + self._values_flex.keys() - if computed: - return base_keys + self._getters().keys() - else: - return base_keys - - # Act like a dictionary. - - def update(self, values): - """Assign all values in the given dict. - """ - for key, value in values.items(): - self[key] = value - - def items(self): - """Iterate over (key, value) pairs that this object contains. - Computed fields are not included. - """ - for key in self: - yield key, self[key] - - def get(self, key, default=None): - """Get the value for a given key or `default` if it does not - exist. - """ - if key in self: - return self[key] - else: - return default - - def __contains__(self, key): - """Determine whether `key` is an attribute on this object. - """ - return key in self.keys(True) - - def __iter__(self): - """Iterate over the available field names (excluding computed - fields). - """ - return iter(self.keys()) - - # Convenient attribute access. - - def __getattr__(self, key): - if key.startswith('_'): - raise AttributeError('model has no attribute {0!r}'.format(key)) - else: - try: - return self[key] - except KeyError: - raise AttributeError('no such field {0!r}'.format(key)) - - def __setattr__(self, key, value): - if key.startswith('_'): - super(Model, self).__setattr__(key, value) - else: - self[key] = value - - def __delattr__(self, key): - if key.startswith('_'): - super(Model, self).__delattr__(key) - else: - del self[key] - - # Database interaction (CRUD methods). - - def store(self): - """Save the object's metadata into the library database. - """ - self._check_db() - - # Build assignments for query. - assignments = [] - subvars = [] - for key in self._fields: - if key != 'id' and key in self._dirty: - self._dirty.remove(key) - assignments.append(key + '=?') - value = self._type(key).to_sql(self[key]) - subvars.append(value) - assignments = ','.join(assignments) - - with self._db.transaction() as tx: - # Main table update. - if assignments: - query = 'UPDATE {0} SET {1} WHERE id=?'.format( - self._table, assignments - ) - subvars.append(self.id) - tx.mutate(query, subvars) - - # Modified/added flexible attributes. - for key, value in self._values_flex.items(): - if key in self._dirty: - self._dirty.remove(key) - tx.mutate( - 'INSERT INTO {0} ' - '(entity_id, key, value) ' - 'VALUES (?, ?, ?);'.format(self._flex_table), - (self.id, key, value), - ) - - # Deleted flexible attributes. - for key in self._dirty: - tx.mutate( - 'DELETE FROM {0} ' - 'WHERE entity_id=? AND key=?'.format(self._flex_table), - (self.id, key) - ) - - self.clear_dirty() - - def load(self): - """Refresh the object's metadata from the library database. - """ - self._check_db() - stored_obj = self._db._get(type(self), self.id) - assert stored_obj is not None, "object {0} not in DB".format(self.id) - self._values_fixed = {} - self._values_flex = {} - self.update(dict(stored_obj)) - self.clear_dirty() - - def remove(self): - """Remove the object's associated rows from the database. - """ - self._check_db() - with self._db.transaction() as tx: - tx.mutate( - 'DELETE FROM {0} WHERE id=?'.format(self._table), - (self.id,) - ) - tx.mutate( - 'DELETE FROM {0} WHERE entity_id=?'.format(self._flex_table), - (self.id,) - ) - - def add(self, db=None): - """Add the object to the library database. This object must be - associated with a database; you can provide one via the `db` - parameter or use the currently associated database. - - The object's `id` and `added` fields are set along with any - current field values. - """ - if db: - self._db = db - self._check_db(False) - - with self._db.transaction() as tx: - new_id = tx.mutate( - 'INSERT INTO {0} DEFAULT VALUES'.format(self._table) - ) - self.id = new_id - self.added = time.time() - - # Mark every non-null field as dirty and store. - for key in self: - if self[key] is not None: - self._dirty.add(key) - self.store() - - # Formatting and templating. - - _formatter = FormattedMapping - - def formatted(self, for_path=False): - """Get a mapping containing all values on this object formatted - as human-readable unicode strings. - """ - return self._formatter(self, for_path) - - def evaluate_template(self, template, for_path=False): - """Evaluate a template (a string or a `Template` object) using - the object's fields. If `for_path` is true, then no new path - separators will be added to the template. - """ - # Perform substitution. - if isinstance(template, basestring): - template = Template(template) - return template.substitute(self.formatted(for_path), - self._template_funcs()) - - # Parsing. - - @classmethod - def _parse(cls, key, string): - """Parse a string as a value for the given key. - """ - if not isinstance(string, basestring): - raise TypeError("_parse() argument must be a string") - - return cls._type(key).parse(string) - - -# Database controller and supporting interfaces. - -class Results(object): - """An item query result set. Iterating over the collection lazily - constructs LibModel objects that reflect database rows. - """ - def __init__(self, model_class, rows, db, query=None, sort=None): - """Create a result set that will construct objects of type - `model_class`. - - `model_class` is a subclass of `LibModel` that will be - constructed. `rows` is a query result: a list of mappings. The - new objects will be associated with the database `db`. - - If `query` is provided, it is used as a predicate to filter the - results for a "slow query" that cannot be evaluated by the - database directly. If `sort` is provided, it is used to sort the - full list of results before returning. This means it is a "slow - sort" and all objects must be built before returning the first - one. - """ - self.model_class = model_class - self.rows = rows - self.db = db - self.query = query - self.sort = sort - - # We keep a queue of rows we haven't yet consumed for - # materialization. We preserve the original total number of - # rows. - self._rows = rows - self._row_count = len(rows) - - # The materialized objects corresponding to rows that have been - # consumed. - self._objects = [] - - def _get_objects(self): - """Construct and generate Model objects for they query. The - objects are returned in the order emitted from the database; no - slow sort is applied. - - For performance, this generator caches materialized objects to - avoid constructing them more than once. This way, iterating over - a `Results` object a second time should be much faster than the - first. - """ - index = 0 # Position in the materialized objects. - while index < len(self._objects) or self._rows: - # Are there previously-materialized objects to produce? - if index < len(self._objects): - yield self._objects[index] - index += 1 - - # Otherwise, we consume another row, materialize its object - # and produce it. - else: - while self._rows: - row = self._rows.pop(0) - obj = self._make_model(row) - # If there is a slow-query predicate, ensurer that the - # object passes it. - if not self.query or self.query.match(obj): - self._objects.append(obj) - index += 1 - yield obj - break - - def __iter__(self): - """Construct and generate Model objects for all matching - objects, in sorted order. - """ - if self.sort: - # Slow sort. Must build the full list first. - objects = self.sort.sort(list(self._get_objects())) - return iter(objects) - - else: - # Objects are pre-sorted (i.e., by the database). - return self._get_objects() - - def _make_model(self, row): - # Get the flexible attributes for the object. - with self.db.transaction() as tx: - flex_rows = tx.query( - 'SELECT * FROM {0} WHERE entity_id=?'.format( - self.model_class._flex_table - ), - (row['id'],) - ) - - cols = dict(row) - values = dict((k, v) for (k, v) in cols.items() - if not k[:4] == 'flex') - flex_values = dict((row['key'], row['value']) for row in flex_rows) - - # Construct the Python object - obj = self.model_class._awaken(self.db, values, flex_values) - return obj - - def __len__(self): - """Get the number of matching objects. - """ - if not self._rows: - # Fully materialized. Just count the objects. - return len(self._objects) - - elif self.query: - # A slow query. Fall back to testing every object. - count = 0 - for obj in self: - count += 1 - return count - - else: - # A fast query. Just count the rows. - return self._row_count - - def __nonzero__(self): - """Does this result contain any objects? - """ - return bool(len(self)) - - def __getitem__(self, n): - """Get the nth item in this result set. This is inefficient: all - items up to n are materialized and thrown away. - """ - if not self._rows and not self.sort: - # Fully materialized and already in order. Just look up the - # object. - return self._objects[n] - - it = iter(self) - try: - for i in range(n): - it.next() - return it.next() - except StopIteration: - raise IndexError('result index {0} out of range'.format(n)) - - def get(self): - """Return the first matching object, or None if no objects - match. - """ - it = iter(self) - try: - return it.next() - except StopIteration: - return None - - -class Transaction(object): - """A context manager for safe, concurrent access to the database. - All SQL commands should be executed through a transaction. - """ - def __init__(self, db): - self.db = db - - def __enter__(self): - """Begin a transaction. This transaction may be created while - another is active in a different thread. - """ - with self.db._tx_stack() as stack: - first = not stack - stack.append(self) - if first: - # Beginning a "root" transaction, which corresponds to an - # SQLite transaction. - self.db._db_lock.acquire() - return self - - def __exit__(self, exc_type, exc_value, traceback): - """Complete a transaction. This must be the most recently - entered but not yet exited transaction. If it is the last active - transaction, the database updates are committed. - """ - with self.db._tx_stack() as stack: - assert stack.pop() is self - empty = not stack - if empty: - # Ending a "root" transaction. End the SQLite transaction. - self.db._connection().commit() - self.db._db_lock.release() - - def query(self, statement, subvals=()): - """Execute an SQL statement with substitution values and return - a list of rows from the database. - """ - cursor = self.db._connection().execute(statement, subvals) - return cursor.fetchall() - - def mutate(self, statement, subvals=()): - """Execute an SQL statement with substitution values and return - the row ID of the last affected row. - """ - cursor = self.db._connection().execute(statement, subvals) - return cursor.lastrowid - - def script(self, statements): - """Execute a string containing multiple SQL statements.""" - self.db._connection().executescript(statements) - - -class Database(object): - """A container for Model objects that wraps an SQLite database as - the backend. - """ - _models = () - """The Model subclasses representing tables in this database. - """ - - def __init__(self, path): - self.path = path - - self._connections = {} - self._tx_stacks = defaultdict(list) - - # A lock to protect the _connections and _tx_stacks maps, which - # both map thread IDs to private resources. - self._shared_map_lock = threading.Lock() - - # A lock to protect access to the database itself. SQLite does - # allow multiple threads to access the database at the same - # time, but many users were experiencing crashes related to this - # capability: where SQLite was compiled without HAVE_USLEEP, its - # backoff algorithm in the case of contention was causing - # whole-second sleeps (!) that would trigger its internal - # timeout. Using this lock ensures only one SQLite transaction - # is active at a time. - self._db_lock = threading.Lock() - - # Set up database schema. - for model_cls in self._models: - self._make_table(model_cls._table, model_cls._fields) - self._make_attribute_table(model_cls._flex_table) - - # Primitive access control: connections and transactions. - - def _connection(self): - """Get a SQLite connection object to the underlying database. - One connection object is created per thread. - """ - thread_id = threading.current_thread().ident - with self._shared_map_lock: - if thread_id in self._connections: - return self._connections[thread_id] - else: - # Make a new connection. - conn = sqlite3.connect( - self.path, - timeout=beets.config['timeout'].as_number(), - ) - - # Access SELECT results like dictionaries. - conn.row_factory = sqlite3.Row - - self._connections[thread_id] = conn - return conn - - @contextlib.contextmanager - def _tx_stack(self): - """A context manager providing access to the current thread's - transaction stack. The context manager synchronizes access to - the stack map. Transactions should never migrate across threads. - """ - thread_id = threading.current_thread().ident - with self._shared_map_lock: - yield self._tx_stacks[thread_id] - - def transaction(self): - """Get a :class:`Transaction` object for interacting directly - with the underlying SQLite database. - """ - return Transaction(self) - - # Schema setup and migration. - - def _make_table(self, table, fields): - """Set up the schema of the database. `fields` is a mapping - from field names to `Type`s. Columns are added if necessary. - """ - # Get current schema. - with self.transaction() as tx: - rows = tx.query('PRAGMA table_info(%s)' % table) - current_fields = set([row[1] for row in rows]) - - field_names = set(fields.keys()) - if current_fields.issuperset(field_names): - # Table exists and has all the required columns. - return - - if not current_fields: - # No table exists. - columns = [] - for name, typ in fields.items(): - columns.append('{0} {1}'.format(name, typ.sql)) - setup_sql = 'CREATE TABLE {0} ({1});\n'.format(table, - ', '.join(columns)) - - else: - # Table exists does not match the field set. - setup_sql = '' - for name, typ in fields.items(): - if name in current_fields: - continue - setup_sql += 'ALTER TABLE {0} ADD COLUMN {1} {2};\n'.format( - table, name, typ.sql - ) - - with self.transaction() as tx: - tx.script(setup_sql) - - def _make_attribute_table(self, flex_table): - """Create a table and associated index for flexible attributes - for the given entity (if they don't exist). - """ - with self.transaction() as tx: - tx.script(""" - CREATE TABLE IF NOT EXISTS {0} ( - id INTEGER PRIMARY KEY, - entity_id INTEGER, - key TEXT, - value TEXT, - UNIQUE(entity_id, key) ON CONFLICT REPLACE); - CREATE INDEX IF NOT EXISTS {0}_by_entity - ON {0} (entity_id); - """.format(flex_table)) - - # Querying. - - def _fetch(self, model_cls, query=None, sort=None): - """Fetch the objects of type `model_cls` matching the given - query. The query may be given as a string, string sequence, a - Query object, or None (to fetch everything). `sort` is an - `Sort` object. - """ - query = query or TrueQuery() # A null query. - sort = sort or NullSort() # Unsorted. - where, subvals = query.clause() - order_by = sort.order_clause() - - sql = ("SELECT * FROM {0} WHERE {1} {2}").format( - model_cls._table, - where or '1', - "ORDER BY {0}".format(order_by) if order_by else '', - ) - - with self.transaction() as tx: - rows = tx.query(sql, subvals) - - return Results( - model_cls, rows, self, - None if where else query, # Slow query component. - sort if sort.is_slow() else None, # Slow sort component. - ) - - def _get(self, model_cls, id): - """Get a Model object by its id or None if the id does not - exist. - """ - return self._fetch(model_cls, MatchQuery('id', id)).get() diff --git a/lib/beets/dbcore/query.py b/lib/beets/dbcore/query.py deleted file mode 100644 index 5a116eb2..00000000 --- a/lib/beets/dbcore/query.py +++ /dev/null @@ -1,654 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""The Query type hierarchy for DBCore. -""" -import re -from operator import attrgetter -from beets import util -from datetime import datetime, timedelta - - -class Query(object): - """An abstract class representing a query into the item database. - """ - def clause(self): - """Generate an SQLite expression implementing the query. - Return a clause string, a sequence of substitution values for - the clause, and a Query object representing the "remainder" - Returns (clause, subvals) where clause is a valid sqlite - WHERE clause implementing the query and subvals is a list of - items to be substituted for ?s in the clause. - """ - return None, () - - def match(self, item): - """Check whether this query matches a given Item. Can be used to - perform queries on arbitrary sets of Items. - """ - raise NotImplementedError - - -class FieldQuery(Query): - """An abstract query that searches in a specific field for a - pattern. Subclasses must provide a `value_match` class method, which - determines whether a certain pattern string matches a certain value - string. Subclasses may also provide `col_clause` to implement the - same matching functionality in SQLite. - """ - def __init__(self, field, pattern, fast=True): - self.field = field - self.pattern = pattern - self.fast = fast - - def col_clause(self): - return None, () - - def clause(self): - if self.fast: - return self.col_clause() - else: - # Matching a flexattr. This is a slow query. - return None, () - - @classmethod - def value_match(cls, pattern, value): - """Determine whether the value matches the pattern. Both - arguments are strings. - """ - raise NotImplementedError() - - def match(self, item): - return self.value_match(self.pattern, item.get(self.field)) - - -class MatchQuery(FieldQuery): - """A query that looks for exact matches in an item field.""" - def col_clause(self): - return self.field + " = ?", [self.pattern] - - @classmethod - def value_match(cls, pattern, value): - return pattern == value - - -class NoneQuery(FieldQuery): - - def __init__(self, field, fast=True): - self.field = field - self.fast = fast - - def col_clause(self): - return self.field + " IS NULL", () - - @classmethod - def match(self, item): - try: - return item[self.field] is None - except KeyError: - return True - - -class StringFieldQuery(FieldQuery): - """A FieldQuery that converts values to strings before matching - them. - """ - @classmethod - def value_match(cls, pattern, value): - """Determine whether the value matches the pattern. The value - may have any type. - """ - return cls.string_match(pattern, util.as_string(value)) - - @classmethod - def string_match(cls, pattern, value): - """Determine whether the value matches the pattern. Both - arguments are strings. Subclasses implement this method. - """ - raise NotImplementedError() - - -class SubstringQuery(StringFieldQuery): - """A query that matches a substring in a specific item field.""" - def col_clause(self): - pattern = (self.pattern - .replace('\\', '\\\\') - .replace('%', '\\%') - .replace('_', '\\_')) - search = '%' + pattern + '%' - clause = self.field + " like ? escape '\\'" - subvals = [search] - return clause, subvals - - @classmethod - def string_match(cls, pattern, value): - return pattern.lower() in value.lower() - - -class RegexpQuery(StringFieldQuery): - """A query that matches a regular expression in a specific item - field. - """ - @classmethod - def string_match(cls, pattern, value): - try: - res = re.search(pattern, value) - except re.error: - # Invalid regular expression. - return False - return res is not None - - -class BooleanQuery(MatchQuery): - """Matches a boolean field. Pattern should either be a boolean or a - string reflecting a boolean. - """ - def __init__(self, field, pattern, fast=True): - super(BooleanQuery, self).__init__(field, pattern, fast) - if isinstance(pattern, basestring): - self.pattern = util.str2bool(pattern) - self.pattern = int(self.pattern) - - -class BytesQuery(MatchQuery): - """Match a raw bytes field (i.e., a path). This is a necessary hack - to work around the `sqlite3` module's desire to treat `str` and - `unicode` equivalently in Python 2. Always use this query instead of - `MatchQuery` when matching on BLOB values. - """ - def __init__(self, field, pattern): - super(BytesQuery, self).__init__(field, pattern) - - # Use a buffer representation of the pattern for SQLite - # matching. This instructs SQLite to treat the blob as binary - # rather than encoded Unicode. - if isinstance(self.pattern, basestring): - # Implicitly coerce Unicode strings to their bytes - # equivalents. - if isinstance(self.pattern, unicode): - self.pattern = self.pattern.encode('utf8') - self.buf_pattern = buffer(self.pattern) - elif isinstance(self.pattern, buffer): - self.buf_pattern = self.pattern - self.pattern = bytes(self.pattern) - - def col_clause(self): - return self.field + " = ?", [self.buf_pattern] - - -class NumericQuery(FieldQuery): - """Matches numeric fields. A syntax using Ruby-style range ellipses - (``..``) lets users specify one- or two-sided ranges. For example, - ``year:2001..`` finds music released since the turn of the century. - """ - def _convert(self, s): - """Convert a string to a numeric type (float or int). If the - string cannot be converted, return None. - """ - # This is really just a bit of fun premature optimization. - try: - return int(s) - except ValueError: - try: - return float(s) - except ValueError: - return None - - def __init__(self, field, pattern, fast=True): - super(NumericQuery, self).__init__(field, pattern, fast) - - parts = pattern.split('..', 1) - if len(parts) == 1: - # No range. - self.point = self._convert(parts[0]) - self.rangemin = None - self.rangemax = None - else: - # One- or two-sided range. - self.point = None - self.rangemin = self._convert(parts[0]) - self.rangemax = self._convert(parts[1]) - - def match(self, item): - if self.field not in item: - return False - value = item[self.field] - if isinstance(value, basestring): - value = self._convert(value) - - if self.point is not None: - return value == self.point - else: - if self.rangemin is not None and value < self.rangemin: - return False - if self.rangemax is not None and value > self.rangemax: - return False - return True - - def col_clause(self): - if self.point is not None: - return self.field + '=?', (self.point,) - else: - if self.rangemin is not None and self.rangemax is not None: - return (u'{0} >= ? AND {0} <= ?'.format(self.field), - (self.rangemin, self.rangemax)) - elif self.rangemin is not None: - return u'{0} >= ?'.format(self.field), (self.rangemin,) - elif self.rangemax is not None: - return u'{0} <= ?'.format(self.field), (self.rangemax,) - else: - return '1', () - - -class CollectionQuery(Query): - """An abstract query class that aggregates other queries. Can be - indexed like a list to access the sub-queries. - """ - def __init__(self, subqueries=()): - self.subqueries = subqueries - - # Act like a sequence. - - def __len__(self): - return len(self.subqueries) - - def __getitem__(self, key): - return self.subqueries[key] - - def __iter__(self): - return iter(self.subqueries) - - def __contains__(self, item): - return item in self.subqueries - - def clause_with_joiner(self, joiner): - """Returns a clause created by joining together the clauses of - all subqueries with the string joiner (padded by spaces). - """ - clause_parts = [] - subvals = [] - for subq in self.subqueries: - subq_clause, subq_subvals = subq.clause() - if not subq_clause: - # Fall back to slow query. - return None, () - clause_parts.append('(' + subq_clause + ')') - subvals += subq_subvals - clause = (' ' + joiner + ' ').join(clause_parts) - return clause, subvals - - -class AnyFieldQuery(CollectionQuery): - """A query that matches if a given FieldQuery subclass matches in - any field. The individual field query class is provided to the - constructor. - """ - def __init__(self, pattern, fields, cls): - self.pattern = pattern - self.fields = fields - self.query_class = cls - - subqueries = [] - for field in self.fields: - subqueries.append(cls(field, pattern, True)) - super(AnyFieldQuery, self).__init__(subqueries) - - def clause(self): - return self.clause_with_joiner('or') - - def match(self, item): - for subq in self.subqueries: - if subq.match(item): - return True - return False - - -class MutableCollectionQuery(CollectionQuery): - """A collection query whose subqueries may be modified after the - query is initialized. - """ - def __setitem__(self, key, value): - self.subqueries[key] = value - - def __delitem__(self, key): - del self.subqueries[key] - - -class AndQuery(MutableCollectionQuery): - """A conjunction of a list of other queries.""" - def clause(self): - return self.clause_with_joiner('and') - - def match(self, item): - return all([q.match(item) for q in self.subqueries]) - - -class OrQuery(MutableCollectionQuery): - """A conjunction of a list of other queries.""" - def clause(self): - return self.clause_with_joiner('or') - - def match(self, item): - return any([q.match(item) for q in self.subqueries]) - - -class TrueQuery(Query): - """A query that always matches.""" - def clause(self): - return '1', () - - def match(self, item): - return True - - -class FalseQuery(Query): - """A query that never matches.""" - def clause(self): - return '0', () - - def match(self, item): - return False - - -# Time/date queries. - -def _to_epoch_time(date): - """Convert a `datetime` object to an integer number of seconds since - the (local) Unix epoch. - """ - epoch = datetime.fromtimestamp(0) - delta = date - epoch - try: - return int(delta.total_seconds()) - except AttributeError: - # datetime.timedelta.total_seconds() is not available on Python 2.6 - return delta.seconds + delta.days * 24 * 3600 - - -def _parse_periods(pattern): - """Parse a string containing two dates separated by two dots (..). - Return a pair of `Period` objects. - """ - parts = pattern.split('..', 1) - if len(parts) == 1: - instant = Period.parse(parts[0]) - return (instant, instant) - else: - start = Period.parse(parts[0]) - end = Period.parse(parts[1]) - return (start, end) - - -class Period(object): - """A period of time given by a date, time and precision. - - Example: 2014-01-01 10:50:30 with precision 'month' represents all - instants of time during January 2014. - """ - - precisions = ('year', 'month', 'day') - date_formats = ('%Y', '%Y-%m', '%Y-%m-%d') - - def __init__(self, date, precision): - """Create a period with the given date (a `datetime` object) and - precision (a string, one of "year", "month", or "day"). - """ - if precision not in Period.precisions: - raise ValueError('Invalid precision ' + str(precision)) - self.date = date - self.precision = precision - - @classmethod - def parse(cls, string): - """Parse a date and return a `Period` object or `None` if the - string is empty. - """ - if not string: - return None - ordinal = string.count('-') - if ordinal >= len(cls.date_formats): - # Too many components. - return None - date_format = cls.date_formats[ordinal] - try: - date = datetime.strptime(string, date_format) - except ValueError: - # Parsing failed. - return None - precision = cls.precisions[ordinal] - return cls(date, precision) - - def open_right_endpoint(self): - """Based on the precision, convert the period to a precise - `datetime` for use as a right endpoint in a right-open interval. - """ - precision = self.precision - date = self.date - if 'year' == self.precision: - return date.replace(year=date.year + 1, month=1) - elif 'month' == precision: - if (date.month < 12): - return date.replace(month=date.month + 1) - else: - return date.replace(year=date.year + 1, month=1) - elif 'day' == precision: - return date + timedelta(days=1) - else: - raise ValueError('unhandled precision ' + str(precision)) - - -class DateInterval(object): - """A closed-open interval of dates. - - A left endpoint of None means since the beginning of time. - A right endpoint of None means towards infinity. - """ - - def __init__(self, start, end): - if start is not None and end is not None and not start < end: - raise ValueError("start date {0} is not before end date {1}" - .format(start, end)) - self.start = start - self.end = end - - @classmethod - def from_periods(cls, start, end): - """Create an interval with two Periods as the endpoints. - """ - end_date = end.open_right_endpoint() if end is not None else None - start_date = start.date if start is not None else None - return cls(start_date, end_date) - - def contains(self, date): - if self.start is not None and date < self.start: - return False - if self.end is not None and date >= self.end: - return False - return True - - def __str__(self): - return'[{0}, {1})'.format(self.start, self.end) - - -class DateQuery(FieldQuery): - """Matches date fields stored as seconds since Unix epoch time. - - Dates can be specified as ``year-month-day`` strings where only year - is mandatory. - - The value of a date field can be matched against a date interval by - using an ellipsis interval syntax similar to that of NumericQuery. - """ - def __init__(self, field, pattern, fast=True): - super(DateQuery, self).__init__(field, pattern, fast) - start, end = _parse_periods(pattern) - self.interval = DateInterval.from_periods(start, end) - - def match(self, item): - timestamp = float(item[self.field]) - date = datetime.utcfromtimestamp(timestamp) - return self.interval.contains(date) - - _clause_tmpl = "{0} {1} ?" - - def col_clause(self): - clause_parts = [] - subvals = [] - - if self.interval.start: - clause_parts.append(self._clause_tmpl.format(self.field, ">=")) - subvals.append(_to_epoch_time(self.interval.start)) - - if self.interval.end: - clause_parts.append(self._clause_tmpl.format(self.field, "<")) - subvals.append(_to_epoch_time(self.interval.end)) - - if clause_parts: - # One- or two-sided interval. - clause = ' AND '.join(clause_parts) - else: - # Match any date. - clause = '1' - return clause, subvals - - -# Sorting. - -class Sort(object): - """An abstract class representing a sort operation for a query into - the item database. - """ - - def order_clause(self): - """Generates a SQL fragment to be used in a ORDER BY clause, or - None if no fragment is used (i.e., this is a slow sort). - """ - return None - - def sort(self, items): - """Sort the list of objects and return a list. - """ - return sorted(items) - - def is_slow(self): - """Indicate whether this query is *slow*, meaning that it cannot - be executed in SQL and must be executed in Python. - """ - return False - - -class MultipleSort(Sort): - """Sort that encapsulates multiple sub-sorts. - """ - - def __init__(self, sorts=None): - self.sorts = sorts or [] - - def add_sort(self, sort): - self.sorts.append(sort) - - def _sql_sorts(self): - """Return the list of sub-sorts for which we can be (at least - partially) fast. - - A contiguous suffix of fast (SQL-capable) sub-sorts are - executable in SQL. The remaining, even if they are fast - independently, must be executed slowly. - """ - sql_sorts = [] - for sort in reversed(self.sorts): - if not sort.order_clause() is None: - sql_sorts.append(sort) - else: - break - sql_sorts.reverse() - return sql_sorts - - def order_clause(self): - order_strings = [] - for sort in self._sql_sorts(): - order = sort.order_clause() - order_strings.append(order) - - return ", ".join(order_strings) - - def is_slow(self): - for sort in self.sorts: - if sort.is_slow(): - return True - return False - - def sort(self, items): - slow_sorts = [] - switch_slow = False - for sort in reversed(self.sorts): - if switch_slow: - slow_sorts.append(sort) - elif sort.order_clause() is None: - switch_slow = True - slow_sorts.append(sort) - else: - pass - - for sort in slow_sorts: - items = sort.sort(items) - return items - - def __repr__(self): - return u'MultipleSort({0})'.format(repr(self.sorts)) - - -class FieldSort(Sort): - """An abstract sort criterion that orders by a specific field (of - any kind). - """ - def __init__(self, field, ascending=True): - self.field = field - self.ascending = ascending - - def sort(self, objs): - # TODO: Conversion and null-detection here. In Python 3, - # comparisons with None fail. We should also support flexible - # attributes with different types without falling over. - return sorted(objs, key=attrgetter(self.field), - reverse=not self.ascending) - - def __repr__(self): - return u'<{0}: {1}{2}>'.format( - type(self).__name__, - self.field, - '+' if self.ascending else '-', - ) - - -class FixedFieldSort(FieldSort): - """Sort object to sort on a fixed field. - """ - def order_clause(self): - order = "ASC" if self.ascending else "DESC" - return "{0} {1}".format(self.field, order) - - -class SlowFieldSort(FieldSort): - """A sort criterion by some model field other than a fixed field: - i.e., a computed or flexible field. - """ - def is_slow(self): - return True - - -class NullSort(Sort): - """No sorting. Leave results unsorted.""" - def sort(items): - return items diff --git a/lib/beets/dbcore/queryparse.py b/lib/beets/dbcore/queryparse.py deleted file mode 100644 index 90963696..00000000 --- a/lib/beets/dbcore/queryparse.py +++ /dev/null @@ -1,180 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Parsing of strings into DBCore queries. -""" -import re -import itertools -from . import query - - -PARSE_QUERY_PART_REGEX = re.compile( - # Non-capturing optional segment for the keyword. - r'(?:' - r'(\S+?)' # The field key. - r'(? (None, 'stapler', SubstringQuery) - 'color:red' -> ('color', 'red', SubstringQuery) - ':^Quiet' -> (None, '^Quiet', RegexpQuery) - 'color::b..e' -> ('color', 'b..e', RegexpQuery) - - Prefixes may be "escaped" with a backslash to disable the keying - behavior. - """ - part = part.strip() - match = PARSE_QUERY_PART_REGEX.match(part) - - assert match # Regex should always match. - key = match.group(1) - term = match.group(2).replace('\:', ':') - - # Match the search term against the list of prefixes. - for pre, query_class in prefixes.items(): - if term.startswith(pre): - return key, term[len(pre):], query_class - - # No matching prefix: use type-based or fallback/default query. - query_class = query_classes.get(key, default_class) - return key, term, query_class - - -def construct_query_part(model_cls, prefixes, query_part): - """Create a query from a single query component, `query_part`, for - querying instances of `model_cls`. Return a `Query` instance. - """ - # Shortcut for empty query parts. - if not query_part: - return query.TrueQuery() - - # Get the query classes for each possible field. - query_classes = {} - for k, t in itertools.chain(model_cls._fields.items(), - model_cls._types.items()): - query_classes[k] = t.query - - # Parse the string. - key, pattern, query_class = \ - parse_query_part(query_part, query_classes, prefixes) - - # No key specified. - if key is None: - if issubclass(query_class, query.FieldQuery): - # The query type matches a specific field, but none was - # specified. So we use a version of the query that matches - # any field. - return query.AnyFieldQuery(pattern, model_cls._search_fields, - query_class) - else: - # Other query type. - return query_class(pattern) - - key = key.lower() - return query_class(key.lower(), pattern, key in model_cls._fields) - - -def query_from_strings(query_cls, model_cls, prefixes, query_parts): - """Creates a collection query of type `query_cls` from a list of - strings in the format used by parse_query_part. `model_cls` - determines how queries are constructed from strings. - """ - subqueries = [] - for part in query_parts: - subqueries.append(construct_query_part(model_cls, prefixes, part)) - if not subqueries: # No terms in query. - subqueries = [query.TrueQuery()] - return query_cls(subqueries) - - -def construct_sort_part(model_cls, part): - """Create a `Sort` from a single string criterion. - - `model_cls` is the `Model` being queried. `part` is a single string - ending in ``+`` or ``-`` indicating the sort. - """ - assert part, "part must be a field name and + or -" - field = part[:-1] - assert field, "field is missing" - direction = part[-1] - assert direction in ('+', '-'), "part must end with + or -" - is_ascending = direction == '+' - - if field in model_cls._sorts: - sort = model_cls._sorts[field](model_cls, is_ascending) - elif field in model_cls._fields: - sort = query.FixedFieldSort(field, is_ascending) - else: - # Flexible or computed. - sort = query.SlowFieldSort(field, is_ascending) - return sort - - -def sort_from_strings(model_cls, sort_parts): - """Create a `Sort` from a list of sort criteria (strings). - """ - if not sort_parts: - return query.NullSort() - else: - sort = query.MultipleSort() - for part in sort_parts: - sort.add_sort(construct_sort_part(model_cls, part)) - return sort - - -def parse_sorted_query(model_cls, parts, prefixes={}, - query_cls=query.AndQuery): - """Given a list of strings, create the `Query` and `Sort` that they - represent. - """ - # Separate query token and sort token. - query_parts = [] - sort_parts = [] - for part in parts: - if part.endswith((u'+', u'-')) and u':' not in part: - sort_parts.append(part) - else: - query_parts.append(part) - - # Parse each. - q = query_from_strings( - query_cls, model_cls, prefixes, query_parts - ) - s = sort_from_strings(model_cls, sort_parts) - return q, s diff --git a/lib/beets/dbcore/types.py b/lib/beets/dbcore/types.py deleted file mode 100644 index 82346e70..00000000 --- a/lib/beets/dbcore/types.py +++ /dev/null @@ -1,208 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Representation of type information for DBCore model fields. -""" -from . import query -from beets.util import str2bool - - -# Abstract base. - -class Type(object): - """An object encapsulating the type of a model field. Includes - information about how to store, query, format, and parse a given - field. - """ - - sql = u'TEXT' - """The SQLite column type for the value. - """ - - query = query.SubstringQuery - """The `Query` subclass to be used when querying the field. - """ - - model_type = unicode - """The Python type that is used to represent the value in the model. - - The model is guaranteed to return a value of this type if the field - is accessed. To this end, the constructor is used by the `normalize` - and `from_sql` methods and the `default` property. - """ - - @property - def null(self): - """The value to be exposed when the underlying value is None. - """ - return self.model_type() - - def format(self, value): - """Given a value of this type, produce a Unicode string - representing the value. This is used in template evaluation. - """ - if value is None: - value = self.null - # `self.null` might be `None` - if value is None: - value = u'' - if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') - - return unicode(value) - - def parse(self, string): - """Parse a (possibly human-written) string and return the - indicated value of this type. - """ - try: - return self.model_type(string) - except ValueError: - return self.null - - def normalize(self, value): - """Given a value that will be assigned into a field of this - type, normalize the value to have the appropriate type. This - base implementation only reinterprets `None`. - """ - if value is None: - return self.null - else: - # TODO This should eventually be replaced by - # `self.model_type(value)` - return value - - def from_sql(self, sql_value): - """Receives the value stored in the SQL backend and return the - value to be stored in the model. - - For fixed fields the type of `value` is determined by the column - type affinity given in the `sql` property and the SQL to Python - mapping of the database adapter. For more information see: - http://www.sqlite.org/datatype3.html - https://docs.python.org/2/library/sqlite3.html#sqlite-and-python-types - - Flexible fields have the type afinity `TEXT`. This means the - `sql_value` is either a `buffer` or a `unicode` object` and the - method must handle these in addition. - """ - if isinstance(sql_value, buffer): - sql_value = bytes(sql_value).decode('utf8', 'ignore') - if isinstance(sql_value, unicode): - return self.parse(sql_value) - else: - return self.normalize(sql_value) - - def to_sql(self, model_value): - """Convert a value as stored in the model object to a value used - by the database adapter. - """ - return model_value - - -# Reusable types. - -class Default(Type): - null = None - - -class Integer(Type): - """A basic integer type. - """ - sql = u'INTEGER' - query = query.NumericQuery - model_type = int - - -class PaddedInt(Integer): - """An integer field that is formatted with a given number of digits, - padded with zeroes. - """ - def __init__(self, digits): - self.digits = digits - - def format(self, value): - return u'{0:0{1}d}'.format(value or 0, self.digits) - - -class ScaledInt(Integer): - """An integer whose formatting operation scales the number by a - constant and adds a suffix. Good for units with large magnitudes. - """ - def __init__(self, unit, suffix=u''): - self.unit = unit - self.suffix = suffix - - def format(self, value): - return u'{0}{1}'.format((value or 0) // self.unit, self.suffix) - - -class Id(Integer): - """An integer used as the row id or a foreign key in a SQLite table. - This type is nullable: None values are not translated to zero. - """ - null = None - - def __init__(self, primary=True): - if primary: - self.sql = u'INTEGER PRIMARY KEY' - - -class Float(Type): - """A basic floating-point type. - """ - sql = u'REAL' - query = query.NumericQuery - model_type = float - - def format(self, value): - return u'{0:.1f}'.format(value or 0.0) - - -class NullFloat(Float): - """Same as `Float`, but does not normalize `None` to `0.0`. - """ - null = None - - -class String(Type): - """A Unicode string type. - """ - sql = u'TEXT' - query = query.SubstringQuery - - -class Boolean(Type): - """A boolean type. - """ - sql = u'INTEGER' - query = query.BooleanQuery - model_type = bool - - def format(self, value): - return unicode(bool(value)) - - def parse(self, string): - return str2bool(string) - - -# Shared instances of common types. -DEFAULT = Default() -INTEGER = Integer() -PRIMARY_ID = Id(True) -FOREIGN_ID = Id(False) -FLOAT = Float() -NULL_FLOAT = NullFloat() -STRING = String() -BOOLEAN = Boolean() diff --git a/lib/beets/importer.py b/lib/beets/importer.py deleted file mode 100644 index 4a7bd997..00000000 --- a/lib/beets/importer.py +++ /dev/null @@ -1,1428 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Provides the basic, interface-agnostic workflow for importing and -autotagging music files. -""" -from __future__ import print_function - -import os -import re -import logging -import pickle -import itertools -from collections import defaultdict -from tempfile import mkdtemp -from bisect import insort, bisect_left -from contextlib import contextmanager -import shutil - -from beets import autotag -from beets import library -from beets import dbcore -from beets import plugins -from beets import util -from beets import config -from beets.util import pipeline, sorted_walk, ancestry -from beets.util import syspath, normpath, displayable_path -from enum import Enum -from beets import mediafile - -action = Enum('action', - ['SKIP', 'ASIS', 'TRACKS', 'MANUAL', 'APPLY', 'MANUAL_ID', - 'ALBUMS']) - -QUEUE_SIZE = 128 -SINGLE_ARTIST_THRESH = 0.25 -VARIOUS_ARTISTS = u'Various Artists' -PROGRESS_KEY = 'tagprogress' -HISTORY_KEY = 'taghistory' - -# Global logger. -log = logging.getLogger('beets') - - -class ImportAbort(Exception): - """Raised when the user aborts the tagging operation. - """ - pass - - -# Utilities. - -def _open_state(): - """Reads the state file, returning a dictionary.""" - try: - with open(config['statefile'].as_filename()) as f: - return pickle.load(f) - except Exception as exc: - # The `pickle` module can emit all sorts of exceptions during - # unpickling, including ImportError. We use a catch-all - # exception to avoid enumerating them all (the docs don't even have a - # full list!). - log.debug(u'state file could not be read: {0}'.format(exc)) - return {} - - -def _save_state(state): - """Writes the state dictionary out to disk.""" - try: - with open(config['statefile'].as_filename(), 'w') as f: - pickle.dump(state, f) - except IOError as exc: - log.error(u'state file could not be written: {0}'.format(exc)) - - -# Utilities for reading and writing the beets progress file, which -# allows long tagging tasks to be resumed when they pause (or crash). - -def progress_read(): - state = _open_state() - return state.setdefault(PROGRESS_KEY, {}) - - -@contextmanager -def progress_write(): - state = _open_state() - progress = state.setdefault(PROGRESS_KEY, {}) - yield progress - _save_state(state) - - -def progress_add(toppath, *paths): - """Record that the files under all of the `paths` have been imported - under `toppath`. - """ - with progress_write() as state: - imported = state.setdefault(toppath, []) - for path in paths: - # Normally `progress_add` will be called with the path - # argument increasing. This is because of the ordering in - # `albums_in_dir`. We take advantage of that to make the - # code faster - if imported and imported[len(imported) - 1] <= path: - imported.append(path) - else: - insort(imported, path) - - -def progress_element(toppath, path): - """Return whether `path` has been imported in `toppath`. - """ - state = progress_read() - if toppath not in state: - return False - imported = state[toppath] - i = bisect_left(imported, path) - return i != len(imported) and imported[i] == path - - -def has_progress(toppath): - """Return `True` if there exist paths that have already been - imported under `toppath`. - """ - state = progress_read() - return toppath in state - - -def progress_reset(toppath): - with progress_write() as state: - if toppath in state: - del state[toppath] - - -# Similarly, utilities for manipulating the "incremental" import log. -# This keeps track of all directories that were ever imported, which -# allows the importer to only import new stuff. - -def history_add(paths): - """Indicate that the import of the album in `paths` is completed and - should not be repeated in incremental imports. - """ - state = _open_state() - if HISTORY_KEY not in state: - state[HISTORY_KEY] = set() - - state[HISTORY_KEY].add(tuple(paths)) - - _save_state(state) - - -def history_get(): - """Get the set of completed path tuples in incremental imports. - """ - state = _open_state() - if HISTORY_KEY not in state: - return set() - return state[HISTORY_KEY] - - -# Abstract session class. - -class ImportSession(object): - """Controls an import action. Subclasses should implement methods to - communicate with the user or otherwise make decisions. - """ - def __init__(self, lib, logfile, paths, query): - """Create a session. `lib` is a Library object. `logfile` is a - file-like object open for writing or None if no logging is to be - performed. Either `paths` or `query` is non-null and indicates - the source of files to be imported. - """ - self.lib = lib - self.logfile = logfile - self.paths = paths - self.query = query - self.seen_idents = set() - self._is_resuming = dict() - - # Normalize the paths. - if self.paths: - self.paths = map(normpath, self.paths) - - def set_config(self, config): - """Set `config` property from global import config and make - implied changes. - """ - # FIXME: Maybe this function should not exist and should instead - # provide "decision wrappers" like "should_resume()", etc. - iconfig = dict(config) - self.config = iconfig - - # Incremental and progress are mutually exclusive. - if iconfig['incremental']: - iconfig['resume'] = False - - # When based on a query instead of directories, never - # save progress or try to resume. - if self.query is not None: - iconfig['resume'] = False - iconfig['incremental'] = False - - # Copy, move, and link are mutually exclusive. - if iconfig['move']: - iconfig['copy'] = False - iconfig['link'] = False - elif iconfig['link']: - iconfig['copy'] = False - iconfig['move'] = False - - # Only delete when copying. - if not iconfig['copy']: - iconfig['delete'] = False - - self.want_resume = config['resume'].as_choice([True, False, 'ask']) - - def tag_log(self, status, paths): - """Log a message about a given album to logfile. The status should - reflect the reason the album couldn't be tagged. - """ - if self.logfile: - print(u'{0} {1}'.format(status, displayable_path(paths)), - file=self.logfile) - self.logfile.flush() - - def log_choice(self, task, duplicate=False): - """Logs the task's current choice if it should be logged. If - ``duplicate``, then this is a secondary choice after a duplicate was - detected and a decision was made. - """ - paths = task.paths - if duplicate: - # Duplicate: log all three choices (skip, keep both, and trump). - if task.should_remove_duplicates: - self.tag_log('duplicate-replace', paths) - elif task.choice_flag in (action.ASIS, action.APPLY): - self.tag_log('duplicate-keep', paths) - elif task.choice_flag is (action.SKIP): - self.tag_log('duplicate-skip', paths) - else: - # Non-duplicate: log "skip" and "asis" choices. - if task.choice_flag is action.ASIS: - self.tag_log('asis', paths) - elif task.choice_flag is action.SKIP: - self.tag_log('skip', paths) - - def should_resume(self, path): - raise NotImplementedError - - def choose_match(self, task): - raise NotImplementedError - - def resolve_duplicate(self, task, found_duplicates): - raise NotImplementedError - - def choose_item(self, task): - raise NotImplementedError - - def run(self): - """Run the import task. - """ - self.set_config(config['import']) - - # Set up the pipeline. - if self.query is None: - stages = [read_tasks(self)] - else: - stages = [query_tasks(self)] - - if self.config['pretend']: - # Only log the imported files and end the pipeline - stages += [log_files(self)] - else: - if self.config['group_albums'] and \ - not self.config['singletons']: - # Split directory tasks into one task for each album - stages += [group_albums(self)] - if self.config['autotag']: - # FIXME We should also resolve duplicates when not - # autotagging. This is currently handled in `user_query` - stages += [lookup_candidates(self), user_query(self)] - else: - stages += [import_asis(self)] - stages += [apply_choices(self)] - - for stage_func in plugins.import_stages(): - stages.append(plugin_stage(self, stage_func)) - stages += [manipulate_files(self)] - pl = pipeline.Pipeline(stages) - - # Run the pipeline. - plugins.send('import_begin', session=self) - try: - if config['threaded']: - pl.run_parallel(QUEUE_SIZE) - else: - pl.run_sequential() - except ImportAbort: - # User aborted operation. Silently stop. - pass - - # Incremental and resumed imports - - def already_imported(self, toppath, paths): - """Returns true if the files belonging to this task have already - been imported in a previous session. - """ - if self.is_resuming(toppath) \ - and all(map(lambda p: progress_element(toppath, p), paths)): - return True - if self.config['incremental'] \ - and tuple(paths) in self.history_dirs: - return True - - return False - - @property - def history_dirs(self): - if not hasattr(self, '_history_dirs'): - self._history_dirs = history_get() - return self._history_dirs - - def is_resuming(self, toppath): - """Return `True` if user wants to resume import of this path. - - You have to call `ask_resume` first to determine the return value. - """ - return self._is_resuming.get(toppath, False) - - def ask_resume(self, toppath): - """If import of `toppath` was aborted in an earlier session, ask - user if she wants to resume the import. - - Determines the return value of `is_resuming(toppath)`. - """ - if self.want_resume and has_progress(toppath): - # Either accept immediately or prompt for input to decide. - if self.want_resume is True or \ - self.should_resume(toppath): - log.warn(u'Resuming interrupted import of {0}'.format( - util.displayable_path(toppath))) - self._is_resuming[toppath] = True - else: - # Clear progress; we're starting from the top. - progress_reset(toppath) - - -# The importer task class. - -class ImportTask(object): - """Represents a single set of items to be imported along with its - intermediate state. May represent an album or a single item. - - The import session and stages call the following methods in the - given order. - - * `lookup_candidates()` Sets the `common_artist`, `common_album`, - `candidates`, and `rec` attributes. `candidates` is a list of - `AlbumMatch` objects. - - * `choose_match()` Uses the session to set the `match` attribute - from the `candidates` list. - - * `find_duplicates()` Returns a list of albums from `lib` with the - same artist and album name as the task. - - * `apply_metadata()` Sets the attributes of the items from the - task's `match` attribute. - - * `add()` Add the imported items and album to the database. - - * `manipulate_files()` Copy, move, and write files depending on the - session configuration. - - * `finalize()` Update the import progress and cleanup the file - system. - """ - def __init__(self, toppath=None, paths=None, items=None): - self.toppath = toppath - self.paths = paths - self.items = items - self.choice_flag = None - - self.cur_album = None - self.cur_artist = None - self.candidates = [] - self.rec = None - # TODO remove this eventually - self.should_remove_duplicates = False - self.is_album = True - - def set_choice(self, choice): - """Given an AlbumMatch or TrackMatch object or an action constant, - indicates that an action has been selected for this task. - """ - # Not part of the task structure: - assert choice not in (action.MANUAL, action.MANUAL_ID) - assert choice != action.APPLY # Only used internally. - if choice in (action.SKIP, action.ASIS, action.TRACKS, action.ALBUMS): - self.choice_flag = choice - self.match = None - else: - self.choice_flag = action.APPLY # Implicit choice. - self.match = choice - - def save_progress(self): - """Updates the progress state to indicate that this album has - finished. - """ - if self.toppath: - progress_add(self.toppath, *self.paths) - - def save_history(self): - """Save the directory in the history for incremental imports. - """ - if self.paths: - history_add(self.paths) - - # Logical decisions. - - @property - def apply(self): - return self.choice_flag == action.APPLY - - @property - def skip(self): - return self.choice_flag == action.SKIP - - # Convenient data. - - def chosen_ident(self): - """Returns identifying metadata about the current choice. For - albums, this is an (artist, album) pair. For items, this is - (artist, title). May only be called when the choice flag is ASIS - (in which case the data comes from the files' current metadata) - or APPLY (data comes from the choice). - """ - if self.choice_flag is action.ASIS: - return (self.cur_artist, self.cur_album) - elif self.choice_flag is action.APPLY: - return (self.match.info.artist, self.match.info.album) - - def imported_items(self): - """Return a list of Items that should be added to the library. - - If the tasks applies an album match the method only returns the - matched items. - """ - if self.choice_flag == action.ASIS: - return list(self.items) - # FIXME this should be a simple attribute. There should be no - # need to retrieve the keys of `match.mapping`. This requires - # that we remove unmatched items from the list. - elif self.choice_flag == action.APPLY: - return self.match.mapping.keys() - else: - assert False - - def apply_metadata(self): - """Copy metadata from match info to the items. - """ - # TODO call should be more descriptive like - # apply_metadata(self.match, self.items) - autotag.apply_metadata(self.match.info, self.match.mapping) - - def duplicate_items(self, lib): - duplicate_items = [] - for album in self.find_duplicates(lib): - duplicate_items += album.items() - return duplicate_items - - def remove_duplicates(self, lib): - duplicate_items = self.duplicate_items(lib) - log.debug(u'removing {0} old duplicated items' - .format(len(duplicate_items))) - for item in duplicate_items: - item.remove() - if lib.directory in util.ancestry(item.path): - log.debug(u'deleting duplicate {0}' - .format(util.displayable_path(item.path))) - util.remove(item.path) - util.prune_dirs(os.path.dirname(item.path), - lib.directory) - - def finalize(self, session): - """Save progress, clean up files, and emit plugin event. - """ - # FIXME the session argument is unfortunate. It should be - # present as an attribute of the task. - - # Update progress. - if session.want_resume: - self.save_progress() - if session.config['incremental']: - self.save_history() - - self.cleanup(copy=session.config['copy'], - delete=session.config['delete'], - move=session.config['move']) - - if not self.skip: - self._emit_imported(session.lib) - - def cleanup(self, copy=False, delete=False, move=False): - """Remove and prune imported paths. - """ - # FIXME Maybe the keywords should be task properties. - - # Do not delete any files or prune directories when skipping. - if self.skip: - return - - items = self.imported_items() - - # When copying and deleting originals, delete old files. - if copy and delete: - new_paths = [os.path.realpath(item.path) for item in items] - for old_path in self.old_paths: - # Only delete files that were actually copied. - if old_path not in new_paths: - util.remove(syspath(old_path), False) - self.prune(old_path) - - # When moving, prune empty directories containing the original files. - elif move: - for old_path in self.old_paths: - self.prune(old_path) - - def _emit_imported(self, lib): - # FIXME This shouldn't be here. Skipping should be handled in - # the stages. - if self.skip: - return - plugins.send('album_imported', lib=lib, album=self.album) - - def lookup_candidates(self): - """Retrieve and store candidates for this album. - """ - artist, album, candidates, recommendation = \ - autotag.tag_album(self.items) - self.cur_artist = artist - self.cur_album = album - self.candidates = candidates - self.rec = recommendation - - def find_duplicates(self, lib): - """Return a list of albums from `lib` with the same artist and - album name as the task. - """ - artist, album = self.chosen_ident() - - if artist is None: - # As-is import with no artist. Skip check. - return [] - - duplicates = [] - task_paths = set(i.path for i in self.items if i) - duplicate_query = dbcore.AndQuery(( - dbcore.MatchQuery('albumartist', artist), - dbcore.MatchQuery('album', album), - )) - - for album in lib.albums(duplicate_query): - # Check whether the album is identical in contents, in which - # case it is not a duplicate (will be replaced). - album_paths = set(i.path for i in album.items()) - if album_paths != task_paths: - duplicates.append(album) - return duplicates - - def align_album_level_fields(self): - """Make the some album fields equal across `self.items` - """ - changes = {} - - if self.choice_flag == action.ASIS: - # Taking metadata "as-is". Guess whether this album is VA. - plur_albumartist, freq = util.plurality( - [i.albumartist or i.artist for i in self.items] - ) - if freq == len(self.items) or \ - (freq > 1 and - float(freq) / len(self.items) >= SINGLE_ARTIST_THRESH): - # Single-artist album. - changes['albumartist'] = plur_albumartist - changes['comp'] = False - else: - # VA. - changes['albumartist'] = VARIOUS_ARTISTS - changes['comp'] = True - - elif self.choice_flag == action.APPLY: - # Applying autotagged metadata. Just get AA from the first - # item. - if not self.items[0].albumartist: - changes['albumartist'] = self.items[0].artist - if not self.items[0].mb_albumartistid: - changes['mb_albumartistid'] = self.items[0].mb_artistid - - # Apply new metadata. - for item in self.items: - item.update(changes) - - def manipulate_files(self, move=False, copy=False, write=False, - link=False, session=None): - items = self.imported_items() - # Save the original paths of all items for deletion and pruning - # in the next step (finalization). - self.old_paths = [item.path for item in items] - for item in items: - if move or copy or link: - # In copy and link modes, treat re-imports specially: - # move in-library files. (Out-of-library files are - # copied/moved as usual). - old_path = item.path - if (copy or link) and self.replaced_items[item] and \ - session.lib.directory in util.ancestry(old_path): - item.move() - # We moved the item, so remove the - # now-nonexistent file from old_paths. - self.old_paths.remove(old_path) - else: - # A normal import. Just copy files and keep track of - # old paths. - item.move(copy, link) - - if write and self.apply: - item.try_write() - - with session.lib.transaction(): - for item in self.imported_items(): - item.store() - - plugins.send('import_task_files', session=session, task=self) - - def add(self, lib): - """Add the items as an album to the library and remove replaced items. - """ - self.align_album_level_fields() - with lib.transaction(): - self.record_replaced(lib) - self.remove_replaced(lib) - self.album = lib.add_album(self.imported_items()) - self.reimport_metadata(lib) - - def record_replaced(self, lib): - """Records the replaced items and albums in the `replaced_items` - and `replaced_albums` dictionaries. - """ - self.replaced_items = defaultdict(list) - self.replaced_albums = defaultdict(list) - replaced_album_ids = set() - for item in self.imported_items(): - dup_items = list(lib.items( - dbcore.query.BytesQuery('path', item.path) - )) - self.replaced_items[item] = dup_items - for dup_item in dup_items: - if (not dup_item.album_id or - dup_item.album_id in replaced_album_ids): - continue - replaced_album = dup_item.get_album() - if replaced_album: - replaced_album_ids.add(dup_item.album_id) - self.replaced_albums[replaced_album.path] = replaced_album - - def reimport_metadata(self, lib): - """For reimports, preserves metadata for reimported items and - albums. - """ - if self.is_album: - replaced_album = self.replaced_albums.get(self.album.path) - if replaced_album: - self.album.added = replaced_album.added - self.album.update(replaced_album._values_flex) - self.album.store() - log.debug( - u'Reimported album: added {0}, flexible ' - u'attributes {1} from album {2} for {3}'.format( - self.album.added, - replaced_album._values_flex.keys(), - replaced_album.id, - displayable_path(self.album.path), - ) - ) - - for item in self.imported_items(): - dup_items = self.replaced_items[item] - for dup_item in dup_items: - if dup_item.added and dup_item.added != item.added: - item.added = dup_item.added - log.debug( - u'Reimported item added {0} ' - u'from item {1} for {2}'.format( - item.added, - dup_item.id, - displayable_path(item.path), - ) - ) - item.update(dup_item._values_flex) - log.debug( - u'Reimported item flexible attributes {0} ' - u'from item {1} for {2}'.format( - dup_item._values_flex.keys(), - dup_item.id, - displayable_path(item.path), - ) - ) - item.store() - - def remove_replaced(self, lib): - """Removes all the items from the library that have the same - path as an item from this task. - """ - for item in self.imported_items(): - for dup_item in self.replaced_items[item]: - log.debug(u'Replacing item {0}: {1}' - .format(dup_item.id, - displayable_path(item.path))) - dup_item.remove() - log.debug(u'{0} of {1} items replaced' - .format(sum(bool(l) for l in self.replaced_items.values()), - len(self.imported_items()))) - - def choose_match(self, session): - """Ask the session which match should apply and apply it. - """ - choice = session.choose_match(self) - self.set_choice(choice) - session.log_choice(self) - - def reload(self): - """Reload albums and items from the database. - """ - for item in self.imported_items(): - item.load() - self.album.load() - - # Utilities. - - def prune(self, filename): - """Prune any empty directories above the given file. If this - task has no `toppath` or the file path provided is not within - the `toppath`, then this function has no effect. Similarly, if - the file still exists, no pruning is performed, so it's safe to - call when the file in question may not have been removed. - """ - if self.toppath and not os.path.exists(filename): - util.prune_dirs(os.path.dirname(filename), - self.toppath, - clutter=config['clutter'].as_str_seq()) - - -class SingletonImportTask(ImportTask): - """ImportTask for a single track that is not associated to an album. - """ - - def __init__(self, toppath, item): - super(SingletonImportTask, self).__init__(toppath, [item.path]) - self.item = item - self.is_album = False - self.paths = [item.path] - - def chosen_ident(self): - assert self.choice_flag in (action.ASIS, action.APPLY) - if self.choice_flag is action.ASIS: - return (self.item.artist, self.item.title) - elif self.choice_flag is action.APPLY: - return (self.match.info.artist, self.match.info.title) - - def imported_items(self): - return [self.item] - - def apply_metadata(self): - autotag.apply_item_metadata(self.item, self.match.info) - - def _emit_imported(self, lib): - # FIXME This shouldn't be here. Skipped tasks should be removed from - # the pipeline. - if self.skip: - return - for item in self.imported_items(): - plugins.send('item_imported', lib=lib, item=item) - - def lookup_candidates(self): - candidates, recommendation = autotag.tag_item(self.item) - self.candidates = candidates - self.rec = recommendation - - def find_duplicates(self, lib): - """Return a list of items from `lib` that have the same artist - and title as the task. - """ - artist, title = self.chosen_ident() - - found_items = [] - query = dbcore.AndQuery(( - dbcore.MatchQuery('artist', artist), - dbcore.MatchQuery('title', title), - )) - for other_item in lib.items(query): - # Existing items not considered duplicates. - if other_item.path != self.item.path: - found_items.append(other_item) - return found_items - - duplicate_items = find_duplicates - - def add(self, lib): - with lib.transaction(): - self.record_replaced(lib) - self.remove_replaced(lib) - lib.add(self.item) - self.reimport_metadata(lib) - - def infer_album_fields(self): - raise NotImplementedError - - def choose_match(self, session): - """Ask the session which match should apply and apply it. - """ - choice = session.choose_item(self) - self.set_choice(choice) - session.log_choice(self) - - def reload(self): - self.item.load() - - -# FIXME The inheritance relationships are inverted. This is why there -# are so many methods which pass. We should introduce a new -# BaseImportTask class. -class SentinelImportTask(ImportTask): - """This class marks the progress of an import and does not import - any items itself. - - If only `toppath` is set the task indicats the end of a top-level - directory import. If the `paths` argument is givent, too, the task - indicates the progress in the `toppath` import. - """ - - def __init__(self, toppath=None, paths=None): - self.toppath = toppath - self.paths = paths - # TODO Remove the remaining attributes eventually - self.items = None - self.should_remove_duplicates = False - self.is_album = True - self.choice_flag = None - - def save_history(self): - pass - - def save_progress(self): - if self.paths is None: - # "Done" sentinel. - progress_reset(self.toppath) - else: - # "Directory progress" sentinel for singletons - progress_add(self.toppath, *self.paths) - - def skip(self): - return True - - def set_choice(self, choice): - raise NotImplementedError - - def cleanup(self, **kwargs): - pass - - def _emit_imported(self, session): - pass - - -class ArchiveImportTask(SentinelImportTask): - """Additional methods for handling archives. - - Use when `toppath` points to a `zip`, `tar`, or `rar` archive. - """ - - def __init__(self, toppath): - super(ArchiveImportTask, self).__init__(toppath) - self.extracted = False - - @classmethod - def is_archive(cls, path): - """Returns true if the given path points to an archive that can - be handled. - """ - if not os.path.isfile(path): - return False - - for path_test, _ in cls.handlers(): - if path_test(path): - return True - return False - - @classmethod - def handlers(cls): - """Returns a list of archive handlers. - - Each handler is a `(path_test, ArchiveClass)` tuple. `path_test` - is a function that returns `True` if the given path can be - handled by `ArchiveClass`. `ArchiveClass` is a class that - implements the same interface as `tarfile.TarFile`. - """ - if not hasattr(cls, '_handlers'): - cls._handlers = [] - from zipfile import is_zipfile, ZipFile - cls._handlers.append((is_zipfile, ZipFile)) - from tarfile import is_tarfile, TarFile - cls._handlers.append((is_tarfile, TarFile)) - try: - from rarfile import is_rarfile, RarFile - except ImportError: - pass - else: - cls._handlers.append((is_rarfile, RarFile)) - - return cls._handlers - - def cleanup(self, **kwargs): - """Removes the temporary directory the archive was extracted to. - """ - if self.extracted: - shutil.rmtree(self.toppath) - - def extract(self): - """Extracts the archive to a temporary directory and sets - `toppath` to that directory. - """ - for path_test, handler_class in self.handlers(): - if path_test(self.toppath): - break - - try: - extract_to = mkdtemp() - archive = handler_class(self.toppath, mode='r') - archive.extractall(extract_to) - finally: - archive.close() - self.extracted = True - self.toppath = extract_to - - -class ImportTaskFactory(object): - """Create album and singleton import tasks for all media files in a - directory or path. - - Depending on the session's 'flat' and 'singleton' configuration, it - groups all media files contained in `toppath` into singleton or - album import tasks. - """ - def __init__(self, toppath, session): - self.toppath = toppath - self.session = session - self.skipped = 0 - - def tasks(self): - """Yield all import tasks for `self.toppath`. - - The behavior is configured by the session's 'flat', and - 'singleton' flags. - """ - for dirs, paths in self.paths(): - if self.session.config['singletons']: - for path in paths: - task = self.singleton(path) - if task: - yield task - yield self.sentinel(dirs) - - else: - task = self.album(paths, dirs) - if task: - yield task - - def paths(self): - """Walk `self.toppath` and yield pairs of directory lists and - path lists. - """ - if not os.path.isdir(syspath(self.toppath)): - yield ([self.toppath], [self.toppath]) - elif self.session.config['flat']: - paths = [] - for dirs, paths_in_dir in albums_in_dir(self.toppath): - paths += paths_in_dir - yield ([self.toppath], paths) - else: - for dirs, paths in albums_in_dir(self.toppath): - yield (dirs, paths) - - def singleton(self, path): - if self.session.already_imported(self.toppath, [path]): - log.debug(u'Skipping previously-imported path: {0}' - .format(displayable_path(path))) - self.skipped += 1 - return None - - item = self.read_item(path) - if item: - return SingletonImportTask(self.toppath, item) - else: - return None - - def album(self, paths, dirs=None): - """Return `ImportTask` with all media files from paths. - - `dirs` is a list of parent directories used to record already - imported albums. - """ - if not paths: - return None - - if dirs is None: - dirs = list(set(os.path.dirname(p) for p in paths)) - - if self.session.already_imported(self.toppath, dirs): - log.debug(u'Skipping previously-imported path: {0}' - .format(displayable_path(dirs))) - self.skipped += 1 - return None - - items = map(self.read_item, paths) - items = [item for item in items if item] - - if items: - return ImportTask(self.toppath, dirs, items) - else: - return None - - def sentinel(self, paths=None): - return SentinelImportTask(self.toppath, paths) - - def read_item(self, path): - """Return an item created from the path. - - If an item could not be read it returns None and logs an error. - """ - # TODO remove this method. Should be handled in ImportTask creation. - try: - return library.Item.from_path(path) - except library.ReadError as exc: - if isinstance(exc.reason, mediafile.FileTypeError): - # Silently ignore non-music files. - pass - elif isinstance(exc.reason, mediafile.UnreadableFileError): - log.warn(u'unreadable file: {0}'.format( - displayable_path(path)) - ) - else: - log.error(u'error reading {0}: {1}'.format( - displayable_path(path), - exc, - )) - - -# Full-album pipeline stages. - -def read_tasks(session): - """A generator yielding all the albums (as ImportTask objects) found - in the user-specified list of paths. In the case of a singleton - import, yields single-item tasks instead. - """ - skipped = 0 - for toppath in session.paths: - # Determine if we want to resume import of the toppath - session.ask_resume(toppath) - user_toppath = toppath - - # Extract archives. - archive_task = None - if ArchiveImportTask.is_archive(syspath(toppath)): - if not (session.config['move'] or session.config['copy']): - log.warn(u"Archive importing requires either " - "'copy' or 'move' to be enabled.") - continue - - log.debug(u'extracting archive {0}' - .format(displayable_path(toppath))) - archive_task = ArchiveImportTask(toppath) - try: - archive_task.extract() - except Exception as exc: - log.error(u'extraction failed: {0}'.format(exc)) - continue - - # Continue reading albums from the extracted directory. - toppath = archive_task.toppath - - task_factory = ImportTaskFactory(toppath, session) - imported = False - for t in task_factory.tasks(): - imported |= not t.skip - yield t - - # Indicate the directory is finished. - # FIXME hack to delete extracted archives - if archive_task is None: - yield task_factory.sentinel() - else: - yield archive_task - - if not imported: - log.warn(u'No files imported from {0}' - .format(displayable_path(user_toppath))) - - # Show skipped directories. - if skipped: - log.info(u'Skipped {0} directories.'.format(skipped)) - - -def query_tasks(session): - """A generator that works as a drop-in-replacement for read_tasks. - Instead of finding files from the filesystem, a query is used to - match items from the library. - """ - if session.config['singletons']: - # Search for items. - for item in session.lib.items(session.query): - yield SingletonImportTask(None, item) - - else: - # Search for albums. - for album in session.lib.albums(session.query): - log.debug(u'yielding album {0}: {1} - {2}' - .format(album.id, album.albumartist, album.album)) - items = list(album.items()) - - # Clear IDs from re-tagged items so they appear "fresh" when - # we add them back to the library. - for item in items: - item.id = None - item.album_id = None - - yield ImportTask(None, [album.item_dir()], items) - - -@pipeline.mutator_stage -def lookup_candidates(session, task): - """A coroutine for performing the initial MusicBrainz lookup for an - album. It accepts lists of Items and yields - (items, cur_artist, cur_album, candidates, rec) tuples. If no match - is found, all of the yielded parameters (except items) are None. - """ - if task.skip: - # FIXME This gets duplicated a lot. We need a better - # abstraction. - return - - plugins.send('import_task_start', session=session, task=task) - log.debug(u'Looking up: {0}'.format(displayable_path(task.paths))) - task.lookup_candidates() - - -@pipeline.stage -def user_query(session, task): - """A coroutine for interfacing with the user about the tagging - process. - - The coroutine accepts an ImportTask objects. It uses the - session's `choose_match` method to determine the `action` for - this task. Depending on the action additional stages are exectuted - and the processed task is yielded. - - It emits the ``import_task_choice`` event for plugins. Plugins have - acces to the choice via the ``taks.choice_flag`` property and may - choose to change it. - """ - if task.skip: - return task - - # Ask the user for a choice. - task.choose_match(session) - plugins.send('import_task_choice', session=session, task=task) - - # As-tracks: transition to singleton workflow. - if task.choice_flag is action.TRACKS: - # Set up a little pipeline for dealing with the singletons. - def emitter(task): - for item in task.items: - yield SingletonImportTask(task.toppath, item) - yield SentinelImportTask(task.toppath, task.paths) - - ipl = pipeline.Pipeline([ - emitter(task), - lookup_candidates(session), - user_query(session), - ]) - return pipeline.multiple(ipl.pull()) - - # As albums: group items by albums and create task for each album - if task.choice_flag is action.ALBUMS: - ipl = pipeline.Pipeline([ - iter([task]), - group_albums(session), - lookup_candidates(session), - user_query(session) - ]) - return pipeline.multiple(ipl.pull()) - - resolve_duplicates(session, task) - return task - - -def resolve_duplicates(session, task): - """Check if a task conflicts with items or albums already imported - and ask the session to resolve this. - """ - if task.choice_flag in (action.ASIS, action.APPLY): - ident = task.chosen_ident() - found_duplicates = task.find_duplicates(session.lib) - if ident in session.seen_idents or found_duplicates: - session.resolve_duplicate(task, found_duplicates) - session.log_choice(task, True) - session.seen_idents.add(ident) - - -@pipeline.mutator_stage -def import_asis(session, task): - """Select the `action.ASIS` choice for all tasks. - - This stage replaces the initial_lookup and user_query stages - when the importer is run without autotagging. - """ - if task.skip: - return - - log.info(displayable_path(task.paths)) - task.set_choice(action.ASIS) - - -@pipeline.mutator_stage -def apply_choices(session, task): - """A coroutine for applying changes to albums and singletons during - the autotag process. - """ - if task.skip: - return - - # Change metadata. - if task.apply: - task.apply_metadata() - plugins.send('import_task_apply', session=session, task=task) - - task.add(session.lib) - - -@pipeline.mutator_stage -def plugin_stage(session, func, task): - """A coroutine (pipeline stage) that calls the given function with - each non-skipped import task. These stages occur between applying - metadata changes and moving/copying/writing files. - """ - if task.skip: - return - - func(session, task) - - # Stage may modify DB, so re-load cached item data. - # FIXME Importer plugins should not modify the database but instead - # the albums and items attached to tasks. - task.reload() - - -@pipeline.stage -def manipulate_files(session, task): - """A coroutine (pipeline stage) that performs necessary file - manipulations *after* items have been added to the library and - finalizes each task. - """ - if not task.skip: - if task.should_remove_duplicates: - task.remove_duplicates(session.lib) - - task.manipulate_files( - move=session.config['move'], - copy=session.config['copy'], - write=session.config['write'], - link=session.config['link'], - session=session, - ) - - # Progress, cleanup, and event. - task.finalize(session) - - -@pipeline.stage -def log_files(session, task): - """A coroutine (pipeline stage) to log each file which will be imported - """ - if isinstance(task, SingletonImportTask): - log.info( - 'Singleton: {0}'.format(displayable_path(task.item['path']))) - elif task.items: - log.info('Album {0}'.format(displayable_path(task.paths[0]))) - for item in task.items: - log.info(' {0}'.format(displayable_path(item['path']))) - - -def group_albums(session): - """Group the items of a task by albumartist and album name and create a new - task for each album. Yield the tasks as a multi message. - """ - def group(item): - return (item.albumartist or item.artist, item.album) - - task = None - while True: - task = yield task - if task.skip: - continue - tasks = [] - for _, items in itertools.groupby(task.items, group): - tasks.append(ImportTask(items=list(items))) - tasks.append(SentinelImportTask(task.toppath, task.paths)) - - task = pipeline.multiple(tasks) - - -MULTIDISC_MARKERS = (r'dis[ck]', r'cd') -MULTIDISC_PAT_FMT = r'^(.*%s[\W_]*)\d' - - -def albums_in_dir(path): - """Recursively searches the given directory and returns an iterable - of (paths, items) where paths is a list of directories and items is - a list of Items that is probably an album. Specifically, any folder - containing any media files is an album. - """ - collapse_pat = collapse_paths = collapse_items = None - ignore = config['ignore'].as_str_seq() - - for root, dirs, files in sorted_walk(path, ignore=ignore, logger=log): - items = [os.path.join(root, f) for f in files] - # If we're currently collapsing the constituent directories in a - # multi-disc album, check whether we should continue collapsing - # and add the current directory. If so, just add the directory - # and move on to the next directory. If not, stop collapsing. - if collapse_paths: - if (not collapse_pat and collapse_paths[0] in ancestry(root)) or \ - (collapse_pat and - collapse_pat.match(os.path.basename(root))): - # Still collapsing. - collapse_paths.append(root) - collapse_items += items - continue - else: - # Collapse finished. Yield the collapsed directory and - # proceed to process the current one. - if collapse_items: - yield collapse_paths, collapse_items - collapse_pat = collapse_paths = collapse_items = None - - # Check whether this directory looks like the *first* directory - # in a multi-disc sequence. There are two indicators: the file - # is named like part of a multi-disc sequence (e.g., "Title Disc - # 1") or it contains no items but only directories that are - # named in this way. - start_collapsing = False - for marker in MULTIDISC_MARKERS: - marker_pat = re.compile(MULTIDISC_PAT_FMT % marker, re.I) - match = marker_pat.match(os.path.basename(root)) - - # Is this directory the root of a nested multi-disc album? - if dirs and not items: - # Check whether all subdirectories have the same prefix. - start_collapsing = True - subdir_pat = None - for subdir in dirs: - # The first directory dictates the pattern for - # the remaining directories. - if not subdir_pat: - match = marker_pat.match(subdir) - if match: - subdir_pat = re.compile( - r'^%s\d' % re.escape(match.group(1)), re.I - ) - else: - start_collapsing = False - break - - # Subsequent directories must match the pattern. - elif not subdir_pat.match(subdir): - start_collapsing = False - break - - # If all subdirectories match, don't check other - # markers. - if start_collapsing: - break - - # Is this directory the first in a flattened multi-disc album? - elif match: - start_collapsing = True - # Set the current pattern to match directories with the same - # prefix as this one, followed by a digit. - collapse_pat = re.compile( - r'^%s\d' % re.escape(match.group(1)), re.I - ) - break - - # If either of the above heuristics indicated that this is the - # beginning of a multi-disc album, initialize the collapsed - # directory and item lists and check the next directory. - if start_collapsing: - # Start collapsing; continue to the next iteration. - collapse_paths = [root] - collapse_items = items - continue - - # If it's nonempty, yield it. - if items: - yield [root], items - - # Clear out any unfinished collapse. - if collapse_paths and collapse_items: - yield collapse_paths, collapse_items diff --git a/lib/beets/library.py b/lib/beets/library.py deleted file mode 100644 index 1de1bba5..00000000 --- a/lib/beets/library.py +++ /dev/null @@ -1,1299 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""The core data store and collection logic for beets. -""" -import os -import sys -import logging -import shlex -import unicodedata -import time -import re -from unidecode import unidecode -from beets.mediafile import MediaFile, MutagenError, UnreadableFileError -from beets import plugins -from beets import util -from beets.util import bytestring_path, syspath, normpath, samefile -from beets.util.functemplate import Template -from beets import dbcore -from beets.dbcore import types -import beets - - -log = logging.getLogger('beets') - - -# Library-specific query types. - -class PathQuery(dbcore.FieldQuery): - """A query that matches all items under a given path.""" - - escape_re = re.compile(r'[\\_%]') - escape_char = '\\' - - def __init__(self, field, pattern, fast=True): - super(PathQuery, self).__init__(field, pattern, fast) - - # Match the path as a single file. - self.file_path = util.bytestring_path(util.normpath(pattern)) - # As a directory (prefix). - self.dir_path = util.bytestring_path(os.path.join(self.file_path, '')) - - def match(self, item): - return (item.path == self.file_path) or \ - item.path.startswith(self.dir_path) - - def clause(self): - escape = lambda m: self.escape_char + m.group(0) - dir_pattern = self.escape_re.sub(escape, self.dir_path) - dir_pattern = buffer(dir_pattern + '%') - file_blob = buffer(self.file_path) - return '({0} = ?) || ({0} LIKE ? ESCAPE ?)'.format(self.field), \ - (file_blob, dir_pattern, self.escape_char) - - -# Library-specific field types. - -class DateType(types.Float): - # TODO representation should be `datetime` object - # TODO distinguish beetween date and time types - query = dbcore.query.DateQuery - - def format(self, value): - return time.strftime(beets.config['time_format'].get(unicode), - time.localtime(value or 0)) - - def parse(self, string): - try: - # Try a formatted date string. - return time.mktime( - time.strptime(string, beets.config['time_format'].get(unicode)) - ) - except ValueError: - # Fall back to a plain timestamp number. - try: - return float(string) - except ValueError: - return self.null - - -class PathType(types.Type): - sql = u'BLOB' - query = PathQuery - model_type = bytes - - def format(self, value): - return util.displayable_path(value) - - def parse(self, string): - return normpath(bytestring_path(string)) - - def normalize(self, value): - if isinstance(value, unicode): - # Paths stored internally as encoded bytes. - return bytestring_path(value) - - elif isinstance(value, buffer): - # SQLite must store bytestings as buffers to avoid decoding. - # We unwrap buffers to bytes. - return bytes(value) - - else: - return value - - def from_sql(self, sql_value): - return self.normalize(sql_value) - - def to_sql(self, value): - if isinstance(value, str): - value = buffer(value) - return value - - -class MusicalKey(types.String): - """String representing the musical key of a song. - - The standard format is C, Cm, C#, C#m, etc. - """ - ENHARMONIC = { - r'db': 'c#', - r'eb': 'd#', - r'gb': 'f#', - r'ab': 'g#', - r'bb': 'a#', - } - - def parse(self, key): - key = key.lower() - for flat, sharp in self.ENHARMONIC.items(): - key = re.sub(flat, sharp, key) - key = re.sub(r'[\W\s]+minor', 'm', key) - return key.capitalize() - - def normalize(self, key): - if key is None: - return None - else: - return self.parse(key) - - -# Library-specific sort types. - -class SmartArtistSort(dbcore.query.Sort): - """Sort by artist (either album artist or track artist), - prioritizing the sort field over the raw field. - """ - def __init__(self, model_cls, ascending=True): - self.album = model_cls is Album - self.ascending = ascending - - def order_clause(self): - order = "ASC" if self.ascending else "DESC" - if self.album: - field = 'albumartist' - else: - field = 'artist' - return ('(CASE {0}_sort WHEN NULL THEN {0} ' - 'WHEN "" THEN {0} ' - 'ELSE {0}_sort END) {1}').format(field, order) - - def sort(self, objs): - if self.album: - key = lambda a: a.albumartist_sort or a.albumartist - else: - key = lambda i: i.artist_sort or i.artist - return sorted(objs, key=key, reverse=not self.ascending) - - -# Special path format key. -PF_KEY_DEFAULT = 'default' - - -# Exceptions. - -class FileOperationError(Exception): - """Indicates an error when interacting with a file on disk. - Possibilities include an unsupported media type, a permissions - error, and an unhandled Mutagen exception. - """ - def __init__(self, path, reason): - """Create an exception describing an operation on the file at - `path` with the underlying (chained) exception `reason`. - """ - super(FileOperationError, self).__init__(path, reason) - self.path = path - self.reason = reason - - def __unicode__(self): - """Get a string representing the error. Describes both the - underlying reason and the file path in question. - """ - return u'{0}: {1}'.format( - util.displayable_path(self.path), - unicode(self.reason) - ) - - def __str__(self): - return unicode(self).encode('utf8') - - -class ReadError(FileOperationError): - """An error while reading a file (i.e. in `Item.read`). - """ - def __unicode__(self): - return u'error reading ' + super(ReadError, self).__unicode__() - - -class WriteError(FileOperationError): - """An error while writing a file (i.e. in `Item.write`). - """ - def __unicode__(self): - return u'error writing ' + super(WriteError, self).__unicode__() - - -# Item and Album model classes. - -class LibModel(dbcore.Model): - """Shared concrete functionality for Items and Albums. - """ - - def _template_funcs(self): - funcs = DefaultTemplateFunctions(self, self._db).functions() - funcs.update(plugins.template_funcs()) - return funcs - - def store(self): - super(LibModel, self).store() - plugins.send('database_change', lib=self._db) - - def remove(self): - super(LibModel, self).remove() - plugins.send('database_change', lib=self._db) - - def add(self, lib=None): - super(LibModel, self).add(lib) - plugins.send('database_change', lib=self._db) - - -class FormattedItemMapping(dbcore.db.FormattedMapping): - """Add lookup for album-level fields. - - Album-level fields take precedence if `for_path` is true. - """ - - def __init__(self, item, for_path=False): - super(FormattedItemMapping, self).__init__(item, for_path) - self.album = item.get_album() - self.album_keys = [] - if self.album: - for key in self.album.keys(True): - if key in Album.item_keys or key not in item._fields.keys(): - self.album_keys.append(key) - self.all_keys = set(self.model_keys).union(self.album_keys) - - def _get(self, key): - """Get the value for a key, either from the album or the item. - Raise a KeyError for invalid keys. - """ - if self.for_path and key in self.album_keys: - return self._get_formatted(self.album, key) - elif key in self.model_keys: - return self._get_formatted(self.model, key) - elif key in self.album_keys: - return self._get_formatted(self.album, key) - else: - raise KeyError(key) - - def __getitem__(self, key): - """Get the value for a key. Certain unset values are remapped. - """ - value = self._get(key) - - # `artist` and `albumartist` fields fall back to one another. - # This is helpful in path formats when the album artist is unset - # on as-is imports. - if key == 'artist' and not value: - return self._get('albumartist') - elif key == 'albumartist' and not value: - return self._get('artist') - else: - return value - - def __iter__(self): - return iter(self.all_keys) - - def __len__(self): - return len(self.all_keys) - - -class Item(LibModel): - _table = 'items' - _flex_table = 'item_attributes' - _fields = { - 'id': types.PRIMARY_ID, - 'path': PathType(), - 'album_id': types.FOREIGN_ID, - - 'title': types.STRING, - 'artist': types.STRING, - 'artist_sort': types.STRING, - 'artist_credit': types.STRING, - 'album': types.STRING, - 'albumartist': types.STRING, - 'albumartist_sort': types.STRING, - 'albumartist_credit': types.STRING, - 'genre': types.STRING, - 'composer': types.STRING, - 'grouping': types.STRING, - 'year': types.PaddedInt(4), - 'month': types.PaddedInt(2), - 'day': types.PaddedInt(2), - 'track': types.PaddedInt(2), - 'tracktotal': types.PaddedInt(2), - 'disc': types.PaddedInt(2), - 'disctotal': types.PaddedInt(2), - 'lyrics': types.STRING, - 'comments': types.STRING, - 'bpm': types.INTEGER, - 'comp': types.BOOLEAN, - 'mb_trackid': types.STRING, - 'mb_albumid': types.STRING, - 'mb_artistid': types.STRING, - 'mb_albumartistid': types.STRING, - 'albumtype': types.STRING, - 'label': types.STRING, - 'acoustid_fingerprint': types.STRING, - 'acoustid_id': types.STRING, - 'mb_releasegroupid': types.STRING, - 'asin': types.STRING, - 'catalognum': types.STRING, - 'script': types.STRING, - 'language': types.STRING, - 'country': types.STRING, - 'albumstatus': types.STRING, - 'media': types.STRING, - 'albumdisambig': types.STRING, - 'disctitle': types.STRING, - 'encoder': types.STRING, - 'rg_track_gain': types.NULL_FLOAT, - 'rg_track_peak': types.NULL_FLOAT, - 'rg_album_gain': types.NULL_FLOAT, - 'rg_album_peak': types.NULL_FLOAT, - 'original_year': types.PaddedInt(4), - 'original_month': types.PaddedInt(2), - 'original_day': types.PaddedInt(2), - 'initial_key': MusicalKey(), - - 'length': types.FLOAT, - 'bitrate': types.ScaledInt(1000, u'kbps'), - 'format': types.STRING, - 'samplerate': types.ScaledInt(1000, u'kHz'), - 'bitdepth': types.INTEGER, - 'channels': types.INTEGER, - 'mtime': DateType(), - 'added': DateType(), - } - - _search_fields = ('artist', 'title', 'comments', - 'album', 'albumartist', 'genre') - - _media_fields = set(MediaFile.readable_fields()) \ - .intersection(_fields.keys()) - """Set of item fields that are backed by `MediaFile` fields. - - Any kind of field (fixed, flexible, and computed) may be a media - field. Only these fields are read from disk in `read` and written in - `write`. - """ - - _formatter = FormattedItemMapping - - _sorts = {'artist': SmartArtistSort} - - @classmethod - def _getters(cls): - getters = plugins.item_field_getters() - getters['singleton'] = lambda i: i.album_id is None - return getters - - @classmethod - def from_path(cls, path): - """Creates a new item from the media file at the specified path. - """ - # Initiate with values that aren't read from files. - i = cls(album_id=None) - i.read(path) - i.mtime = i.current_mtime() # Initial mtime. - return i - - def __setitem__(self, key, value): - """Set the item's value for a standard field or a flexattr. - """ - # Encode unicode paths and read buffers. - if key == 'path': - if isinstance(value, unicode): - value = bytestring_path(value) - elif isinstance(value, buffer): - value = str(value) - - if key in MediaFile.fields(): - self.mtime = 0 # Reset mtime on dirty. - - super(Item, self).__setitem__(key, value) - - def update(self, values): - """Set all key/value pairs in the mapping. If mtime is - specified, it is not reset (as it might otherwise be). - """ - super(Item, self).update(values) - if self.mtime == 0 and 'mtime' in values: - self.mtime = values['mtime'] - - def get_album(self): - """Get the Album object that this item belongs to, if any, or - None if the item is a singleton or is not associated with a - library. - """ - if not self._db: - return None - return self._db.get_album(self) - - # Interaction with file metadata. - - def read(self, read_path=None): - """Read the metadata from the associated file. - - If `read_path` is specified, read metadata from that file - instead. Updates all the properties in `_media_fields` - from the media file. - - Raises a `ReadError` if the file could not be read. - """ - if read_path is None: - read_path = self.path - else: - read_path = normpath(read_path) - try: - mediafile = MediaFile(syspath(read_path)) - except (OSError, IOError, UnreadableFileError) as exc: - raise ReadError(read_path, exc) - - for key in self._media_fields: - value = getattr(mediafile, key) - if isinstance(value, (int, long)): - # Filter values wider than 64 bits (in signed representation). - # SQLite cannot store them. py26: Post transition, we can use: - # value.bit_length() > 63 - if abs(value) >= 2 ** 63: - value = 0 - self[key] = value - - # Database's mtime should now reflect the on-disk value. - if read_path == self.path: - self.mtime = self.current_mtime() - - self.path = read_path - - def write(self, path=None): - """Write the item's metadata to a media file. - - All fields in `_media_fields` are written to disk according to - the values on this object. - - Can raise either a `ReadError` or a `WriteError`. - """ - if path is None: - path = self.path - else: - path = normpath(path) - - tags = dict(self) - plugins.send('write', item=self, path=path, tags=tags) - - try: - mediafile = MediaFile(syspath(path), - id3v23=beets.config['id3v23'].get(bool)) - except (OSError, IOError, UnreadableFileError) as exc: - raise ReadError(self.path, exc) - - mediafile.update(tags) - try: - mediafile.save() - except (OSError, IOError, MutagenError) as exc: - raise WriteError(self.path, exc) - - # The file has a new mtime. - if path == self.path: - self.mtime = self.current_mtime() - plugins.send('after_write', item=self, path=path) - - def try_write(self, path=None): - """Calls `write()` but catches and logs `FileOperationError` - exceptions. - - Returns `False` an exception was caught and `True` otherwise. - """ - try: - self.write(path) - return True - except FileOperationError as exc: - log.error(exc) - return False - - def try_sync(self, write=None): - """Synchronize the item with the database and the media file - tags, updating them with this object's current state. - - By default, the current `path` for the item is used to write - tags. If `write` is `False`, no tags are written. If `write` is - a path, tags are written to that file instead. - - Similar to calling :meth:`write` and :meth:`store`. - """ - if write is True: - write = None - if write is not False: - self.try_write(path=write) - self.store() - - # Files themselves. - - def move_file(self, dest, copy=False, link=False): - """Moves or copies the item's file, updating the path value if - the move succeeds. If a file exists at ``dest``, then it is - slightly modified to be unique. - """ - if not util.samefile(self.path, dest): - dest = util.unique_path(dest) - if copy: - util.copy(self.path, dest) - plugins.send("item_copied", item=self, source=self.path, - destination=dest) - elif link: - util.link(self.path, dest) - plugins.send("item_linked", item=self, source=self.path, - destination=dest) - else: - plugins.send("before_item_moved", item=self, source=self.path, - destination=dest) - util.move(self.path, dest) - plugins.send("item_moved", item=self, source=self.path, - destination=dest) - - # Either copying or moving succeeded, so update the stored path. - self.path = dest - - def current_mtime(self): - """Returns the current mtime of the file, rounded to the nearest - integer. - """ - return int(os.path.getmtime(syspath(self.path))) - - # Model methods. - - def remove(self, delete=False, with_album=True): - """Removes the item. If `delete`, then the associated file is - removed from disk. If `with_album`, then the item's album (if - any) is removed if it the item was the last in the album. - """ - super(Item, self).remove() - - # Remove the album if it is empty. - if with_album: - album = self.get_album() - if album and not album.items(): - album.remove(delete, False) - - # Send a 'item_removed' signal to plugins - plugins.send('item_removed', item=self) - - # Delete the associated file. - if delete: - util.remove(self.path) - util.prune_dirs(os.path.dirname(self.path), self._db.directory) - - self._db._memotable = {} - - def move(self, copy=False, link=False, basedir=None, with_album=True): - """Move the item to its designated location within the library - directory (provided by destination()). Subdirectories are - created as needed. If the operation succeeds, the item's path - field is updated to reflect the new location. - - If `copy` is true, moving the file is copied rather than moved. - Similarly, `link` creates a symlink instead. - - basedir overrides the library base directory for the - destination. - - If the item is in an album, the album is given an opportunity to - move its art. (This can be disabled by passing - with_album=False.) - - The item is stored to the database if it is in the database, so - any dirty fields prior to the move() call will be written as a - side effect. You probably want to call save() to commit the DB - transaction. - """ - self._check_db() - dest = self.destination(basedir=basedir) - - # Create necessary ancestry for the move. - util.mkdirall(dest) - - # Perform the move and store the change. - old_path = self.path - self.move_file(dest, copy, link) - self.store() - - # If this item is in an album, move its art. - if with_album: - album = self.get_album() - if album: - album.move_art(copy) - album.store() - - # Prune vacated directory. - if not copy: - util.prune_dirs(os.path.dirname(old_path), self._db.directory) - - # Templating. - - def destination(self, fragment=False, basedir=None, platform=None, - path_formats=None): - """Returns the path in the library directory designated for the - item (i.e., where the file ought to be). fragment makes this - method return just the path fragment underneath the root library - directory; the path is also returned as Unicode instead of - encoded as a bytestring. basedir can override the library's base - directory for the destination. - """ - self._check_db() - platform = platform or sys.platform - basedir = basedir or self._db.directory - path_formats = path_formats or self._db.path_formats - - # Use a path format based on a query, falling back on the - # default. - for query, path_format in path_formats: - if query == PF_KEY_DEFAULT: - continue - query, _ = parse_query_string(query, type(self)) - if query.match(self): - # The query matches the item! Use the corresponding path - # format. - break - else: - # No query matched; fall back to default. - for query, path_format in path_formats: - if query == PF_KEY_DEFAULT: - break - else: - assert False, "no default path format" - if isinstance(path_format, Template): - subpath_tmpl = path_format - else: - subpath_tmpl = Template(path_format) - - # Evaluate the selected template. - subpath = self.evaluate_template(subpath_tmpl, True) - - # Prepare path for output: normalize Unicode characters. - if platform == 'darwin': - subpath = unicodedata.normalize('NFD', subpath) - else: - subpath = unicodedata.normalize('NFC', subpath) - - if beets.config['asciify_paths']: - subpath = unidecode(subpath) - - # Truncate components and remove forbidden characters. - subpath = util.sanitize_path(subpath, self._db.replacements) - - # Encode for the filesystem. - if not fragment: - subpath = bytestring_path(subpath) - - # Preserve extension. - _, extension = os.path.splitext(self.path) - if fragment: - # Outputting Unicode. - extension = extension.decode('utf8', 'ignore') - subpath += extension.lower() - - # Truncate too-long components. - maxlen = beets.config['max_filename_length'].get(int) - if not maxlen: - # When zero, try to determine from filesystem. - maxlen = util.max_filename_length(self._db.directory) - subpath = util.truncate_path(subpath, maxlen) - - if fragment: - return subpath - else: - return normpath(os.path.join(basedir, subpath)) - - -class Album(LibModel): - """Provides access to information about albums stored in a - library. Reflects the library's "albums" table, including album - art. - """ - _table = 'albums' - _flex_table = 'album_attributes' - _always_dirty = True - _fields = { - 'id': types.PRIMARY_ID, - 'artpath': PathType(), - 'added': DateType(), - - 'albumartist': types.STRING, - 'albumartist_sort': types.STRING, - 'albumartist_credit': types.STRING, - 'album': types.STRING, - 'genre': types.STRING, - 'year': types.PaddedInt(4), - 'month': types.PaddedInt(2), - 'day': types.PaddedInt(2), - 'tracktotal': types.PaddedInt(2), - 'disctotal': types.PaddedInt(2), - 'comp': types.BOOLEAN, - 'mb_albumid': types.STRING, - 'mb_albumartistid': types.STRING, - 'albumtype': types.STRING, - 'label': types.STRING, - 'mb_releasegroupid': types.STRING, - 'asin': types.STRING, - 'catalognum': types.STRING, - 'script': types.STRING, - 'language': types.STRING, - 'country': types.STRING, - 'albumstatus': types.STRING, - 'albumdisambig': types.STRING, - 'rg_album_gain': types.NULL_FLOAT, - 'rg_album_peak': types.NULL_FLOAT, - 'original_year': types.PaddedInt(4), - 'original_month': types.PaddedInt(2), - 'original_day': types.PaddedInt(2), - } - - _search_fields = ('album', 'albumartist', 'genre') - - _sorts = { - 'albumartist': SmartArtistSort, - 'artist': SmartArtistSort, - } - - item_keys = [ - 'added', - 'albumartist', - 'albumartist_sort', - 'albumartist_credit', - 'album', - 'genre', - 'year', - 'month', - 'day', - 'tracktotal', - 'disctotal', - 'comp', - 'mb_albumid', - 'mb_albumartistid', - 'albumtype', - 'label', - 'mb_releasegroupid', - 'asin', - 'catalognum', - 'script', - 'language', - 'country', - 'albumstatus', - 'albumdisambig', - 'rg_album_gain', - 'rg_album_peak', - 'original_year', - 'original_month', - 'original_day', - ] - """List of keys that are set on an album's items. - """ - - @classmethod - def _getters(cls): - # In addition to plugin-provided computed fields, also expose - # the album's directory as `path`. - getters = plugins.album_field_getters() - getters['path'] = Album.item_dir - return getters - - def items(self): - """Returns an iterable over the items associated with this - album. - """ - return self._db.items(dbcore.MatchQuery('album_id', self.id)) - - def remove(self, delete=False, with_items=True): - """Removes this album and all its associated items from the - library. If delete, then the items' files are also deleted - from disk, along with any album art. The directories - containing the album are also removed (recursively) if empty. - Set with_items to False to avoid removing the album's items. - """ - super(Album, self).remove() - - # Delete art file. - if delete: - artpath = self.artpath - if artpath: - util.remove(artpath) - - # Remove (and possibly delete) the constituent items. - if with_items: - for item in self.items(): - item.remove(delete, False) - - def move_art(self, copy=False, link=False): - """Move or copy any existing album art so that it remains in the - same directory as the items. - """ - old_art = self.artpath - if not old_art: - return - - new_art = self.art_destination(old_art) - if new_art == old_art: - return - - new_art = util.unique_path(new_art) - log.debug(u'moving album art {0} to {1}' - .format(util.displayable_path(old_art), - util.displayable_path(new_art))) - if copy: - util.copy(old_art, new_art) - elif link: - util.link(old_art, new_art) - else: - util.move(old_art, new_art) - self.artpath = new_art - - # Prune old path when moving. - if not copy: - util.prune_dirs(os.path.dirname(old_art), - self._db.directory) - - def move(self, copy=False, link=False, basedir=None): - """Moves (or copies) all items to their destination. Any album - art moves along with them. basedir overrides the library base - directory for the destination. The album is stored to the - database, persisting any modifications to its metadata. - """ - basedir = basedir or self._db.directory - - # Ensure new metadata is available to items for destination - # computation. - self.store() - - # Move items. - items = list(self.items()) - for item in items: - item.move(copy, link, basedir=basedir, with_album=False) - - # Move art. - self.move_art(copy, link) - self.store() - - def item_dir(self): - """Returns the directory containing the album's first item, - provided that such an item exists. - """ - item = self.items().get() - if not item: - raise ValueError('empty album') - return os.path.dirname(item.path) - - def art_destination(self, image, item_dir=None): - """Returns a path to the destination for the album art image - for the album. `image` is the path of the image that will be - moved there (used for its extension). - - The path construction uses the existing path of the album's - items, so the album must contain at least one item or - item_dir must be provided. - """ - image = bytestring_path(image) - item_dir = item_dir or self.item_dir() - - filename_tmpl = Template(beets.config['art_filename'].get(unicode)) - subpath = self.evaluate_template(filename_tmpl, True) - if beets.config['asciify_paths']: - subpath = unidecode(subpath) - subpath = util.sanitize_path(subpath, - replacements=self._db.replacements) - subpath = bytestring_path(subpath) - - _, ext = os.path.splitext(image) - dest = os.path.join(item_dir, subpath + ext) - - return bytestring_path(dest) - - def set_art(self, path, copy=True): - """Sets the album's cover art to the image at the given path. - The image is copied (or moved) into place, replacing any - existing art. - """ - path = bytestring_path(path) - oldart = self.artpath - artdest = self.art_destination(path) - - if oldart and samefile(path, oldart): - # Art already set. - return - elif samefile(path, artdest): - # Art already in place. - self.artpath = path - return - - # Normal operation. - if oldart == artdest: - util.remove(oldart) - artdest = util.unique_path(artdest) - if copy: - util.copy(path, artdest) - else: - util.move(path, artdest) - self.artpath = artdest - - def store(self): - """Update the database with the album information. The album's - tracks are also updated. - """ - # Get modified track fields. - track_updates = {} - for key in self.item_keys: - if key in self._dirty: - track_updates[key] = self[key] - - with self._db.transaction(): - super(Album, self).store() - if track_updates: - for item in self.items(): - for key, value in track_updates.items(): - item[key] = value - item.store() - - def try_sync(self, write=True): - """Synchronize the album and its items with the database and - their files by updating them with this object's current state. - - `write` indicates whether to write tags to the item files. - """ - self.store() - for item in self.items(): - item.try_sync(bool(write)) - - -# Query construction helpers. - -def parse_query_parts(parts, model_cls): - """Given a beets query string as a list of components, return the - `Query` and `Sort` they represent. - - Like `dbcore.parse_sorted_query`, with beets query prefixes and - special path query detection. - """ - # Get query types and their prefix characters. - prefixes = {':': dbcore.query.RegexpQuery} - prefixes.update(plugins.queries()) - - # Special-case path-like queries, which are non-field queries - # containing path separators (/). - if 'path' in model_cls._fields: - path_parts = [] - non_path_parts = [] - for s in parts: - if s.find(os.sep, 0, s.find(':')) != -1: - # Separator precedes colon. - path_parts.append(s) - else: - non_path_parts.append(s) - else: - path_parts = () - non_path_parts = parts - - query, sort = dbcore.parse_sorted_query( - model_cls, non_path_parts, prefixes - ) - - # Add path queries to aggregate query. - if path_parts: - query.subqueries += [PathQuery('path', s) for s in path_parts] - return query, sort - - -def parse_query_string(s, model_cls): - """Given a beets query string, return the `Query` and `Sort` they - represent. - - The string is split into components using shell-like syntax. - """ - # A bug in Python < 2.7.3 prevents correct shlex splitting of - # Unicode strings. - # http://bugs.python.org/issue6988 - if isinstance(s, unicode): - s = s.encode('utf8') - parts = [p.decode('utf8') for p in shlex.split(s)] - return parse_query_parts(parts, model_cls) - - -# The Library: interface to the database. - -class Library(dbcore.Database): - """A database of music containing songs and albums. - """ - _models = (Item, Album) - - def __init__(self, path='library.blb', - directory='~/Music', - path_formats=((PF_KEY_DEFAULT, - '$artist/$album/$track $title'),), - replacements=None): - if path != ':memory:': - self.path = bytestring_path(normpath(path)) - super(Library, self).__init__(path) - - self.directory = bytestring_path(normpath(directory)) - self.path_formats = path_formats - self.replacements = replacements - - self._memotable = {} # Used for template substitution performance. - - # Adding objects to the database. - - def add(self, obj): - """Add the :class:`Item` or :class:`Album` object to the library - database. Return the object's new id. - """ - obj.add(self) - self._memotable = {} - return obj.id - - def add_album(self, items): - """Create a new album consisting of a list of items. - - The items are added to the database if they don't yet have an - ID. Return a new :class:`Album` object. The list items must not - be empty. - """ - if not items: - raise ValueError(u'need at least one item') - - # Create the album structure using metadata from the first item. - values = dict((key, items[0][key]) for key in Album.item_keys) - album = Album(self, **values) - - # Add the album structure and set the items' album_id fields. - # Store or add the items. - with self.transaction(): - album.add(self) - for item in items: - item.album_id = album.id - if item.id is None: - item.add(self) - else: - item.store() - - return album - - # Querying. - - def _fetch(self, model_cls, query, sort=None): - """Parse a query and fetch. If a order specification is present - in the query string the `sort` argument is ignored. - """ - # Parse the query, if necessary. - parsed_sort = None - if isinstance(query, basestring): - query, parsed_sort = parse_query_string(query, model_cls) - elif isinstance(query, (list, tuple)): - query, parsed_sort = parse_query_parts(query, model_cls) - - # Any non-null sort specified by the parsed query overrides the - # provided sort. - if parsed_sort and not isinstance(parsed_sort, dbcore.query.NullSort): - sort = parsed_sort - - return super(Library, self)._fetch( - model_cls, query, sort - ) - - def albums(self, query=None, sort=None): - """Get :class:`Album` objects matching the query. - """ - sort = sort or dbcore.sort_from_strings( - Album, beets.config['sort_album'].as_str_seq() - ) - return self._fetch(Album, query, sort) - - def items(self, query=None, sort=None): - """Get :class:`Item` objects matching the query. - """ - sort = sort or dbcore.sort_from_strings( - Item, beets.config['sort_item'].as_str_seq() - ) - return self._fetch(Item, query, sort) - - # Convenience accessors. - - def get_item(self, id): - """Fetch an :class:`Item` by its ID. Returns `None` if no match is - found. - """ - return self._get(Item, id) - - def get_album(self, item_or_id): - """Given an album ID or an item associated with an album, return - an :class:`Album` object for the album. If no such album exists, - returns `None`. - """ - if isinstance(item_or_id, int): - album_id = item_or_id - else: - album_id = item_or_id.album_id - if album_id is None: - return None - return self._get(Album, album_id) - - -# Default path template resources. - -def _int_arg(s): - """Convert a string argument to an integer for use in a template - function. May raise a ValueError. - """ - return int(s.strip()) - - -class DefaultTemplateFunctions(object): - """A container class for the default functions provided to path - templates. These functions are contained in an object to provide - additional context to the functions -- specifically, the Item being - evaluated. - """ - _prefix = 'tmpl_' - - def __init__(self, item=None, lib=None): - """Paramaterize the functions. If `item` or `lib` is None, then - some functions (namely, ``aunique``) will always evaluate to the - empty string. - """ - self.item = item - self.lib = lib - - def functions(self): - """Returns a dictionary containing the functions defined in this - object. The keys are function names (as exposed in templates) - and the values are Python functions. - """ - out = {} - for key in self._func_names: - out[key[len(self._prefix):]] = getattr(self, key) - return out - - @staticmethod - def tmpl_lower(s): - """Convert a string to lower case.""" - return s.lower() - - @staticmethod - def tmpl_upper(s): - """Covert a string to upper case.""" - return s.upper() - - @staticmethod - def tmpl_title(s): - """Convert a string to title case.""" - return s.title() - - @staticmethod - def tmpl_left(s, chars): - """Get the leftmost characters of a string.""" - return s[0:_int_arg(chars)] - - @staticmethod - def tmpl_right(s, chars): - """Get the rightmost characters of a string.""" - return s[-_int_arg(chars):] - - @staticmethod - def tmpl_if(condition, trueval, falseval=u''): - """If ``condition`` is nonempty and nonzero, emit ``trueval``; - otherwise, emit ``falseval`` (if provided). - """ - try: - int_condition = _int_arg(condition) - except ValueError: - if condition.lower() == "false": - return falseval - else: - condition = int_condition - - if condition: - return trueval - else: - return falseval - - @staticmethod - def tmpl_asciify(s): - """Translate non-ASCII characters to their ASCII equivalents. - """ - return unidecode(s) - - @staticmethod - def tmpl_time(s, format): - """Format a time value using `strftime`. - """ - cur_fmt = beets.config['time_format'].get(unicode) - return time.strftime(format, time.strptime(s, cur_fmt)) - - def tmpl_aunique(self, keys=None, disam=None): - """Generate a string that is guaranteed to be unique among all - albums in the library who share the same set of keys. A fields - from "disam" is used in the string if one is sufficient to - disambiguate the albums. Otherwise, a fallback opaque value is - used. Both "keys" and "disam" should be given as - whitespace-separated lists of field names. - """ - # Fast paths: no album, no item or library, or memoized value. - if not self.item or not self.lib: - return u'' - if self.item.album_id is None: - return u'' - memokey = ('aunique', keys, disam, self.item.album_id) - memoval = self.lib._memotable.get(memokey) - if memoval is not None: - return memoval - - keys = keys or 'albumartist album' - disam = disam or 'albumtype year label catalognum albumdisambig' - keys = keys.split() - disam = disam.split() - - album = self.lib.get_album(self.item) - if not album: - # Do nothing for singletons. - self.lib._memotable[memokey] = u'' - return u'' - - # Find matching albums to disambiguate with. - subqueries = [] - for key in keys: - value = getattr(album, key) - subqueries.append(dbcore.MatchQuery(key, value)) - albums = self.lib.albums(dbcore.AndQuery(subqueries)) - - # If there's only one album to matching these details, then do - # nothing. - if len(albums) == 1: - self.lib._memotable[memokey] = u'' - return u'' - - # Find the first disambiguator that distinguishes the albums. - for disambiguator in disam: - # Get the value for each album for the current field. - disam_values = set([getattr(a, disambiguator) for a in albums]) - - # If the set of unique values is equal to the number of - # albums in the disambiguation set, we're done -- this is - # sufficient disambiguation. - if len(disam_values) == len(albums): - break - - else: - # No disambiguator distinguished all fields. - res = u' {0}'.format(album.id) - self.lib._memotable[memokey] = res - return res - - # Flatten disambiguation value into a string. - disam_value = album.formatted(True).get(disambiguator) - res = u' [{0}]'.format(disam_value) - self.lib._memotable[memokey] = res - return res - - -# Get the name of tmpl_* functions in the above class. -DefaultTemplateFunctions._func_names = \ - [s for s in dir(DefaultTemplateFunctions) - if s.startswith(DefaultTemplateFunctions._prefix)] diff --git a/lib/beets/mediafile.py b/lib/beets/mediafile.py deleted file mode 100644 index 49ef1037..00000000 --- a/lib/beets/mediafile.py +++ /dev/null @@ -1,1929 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Handles low-level interfacing for files' tags. Wraps Mutagen to -automatically detect file types and provide a unified interface for a -useful subset of music files' tags. - -Usage: - - >>> f = MediaFile('Lucy.mp3') - >>> f.title - u'Lucy in the Sky with Diamonds' - >>> f.artist = 'The Beatles' - >>> f.save() - -A field will always return a reasonable value of the correct type, even -if no tag is present. If no value is available, the value will be false -(e.g., zero or the empty string). - -Internally ``MediaFile`` uses ``MediaField`` descriptors to access the -data from the tags. In turn ``MediaField`` uses a number of -``StorageStyle`` strategies to handle format specific logic. -""" -import mutagen -import mutagen.mp3 -import mutagen.oggopus -import mutagen.oggvorbis -import mutagen.mp4 -import mutagen.flac -import mutagen.monkeysaudio -import mutagen.asf -import mutagen.aiff -import datetime -import re -import base64 -import math -import struct -import imghdr -import os -import logging -import traceback -import enum - -from beets.util import displayable_path - - -__all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile'] - -log = logging.getLogger('beets') - -# Human-readable type names. -TYPES = { - 'mp3': 'MP3', - 'aac': 'AAC', - 'alac': 'ALAC', - 'ogg': 'OGG', - 'opus': 'Opus', - 'flac': 'FLAC', - 'ape': 'APE', - 'wv': 'WavPack', - 'mpc': 'Musepack', - 'asf': 'Windows Media', - 'aiff': 'AIFF', -} - - -# Exceptions. - -class UnreadableFileError(Exception): - """Mutagen is not able to extract information from the file. - """ - def __init__(self, path): - Exception.__init__(self, displayable_path(path)) - - -class FileTypeError(UnreadableFileError): - """Reading this type of file is not supported. - - If passed the `mutagen_type` argument this indicates that the - mutagen type is not supported by `Mediafile`. - """ - def __init__(self, path, mutagen_type=None): - path = displayable_path(path) - if mutagen_type is None: - msg = path - else: - msg = u'{0}: of mutagen type {1}'.format(path, mutagen_type) - Exception.__init__(self, msg) - - -class MutagenError(UnreadableFileError): - """Raised when Mutagen fails unexpectedly---probably due to a bug. - """ - def __init__(self, path, mutagen_exc): - msg = u'{0}: {1}'.format(displayable_path(path), mutagen_exc) - Exception.__init__(self, msg) - - -# Utility. - -def _safe_cast(out_type, val): - """Try to covert val to out_type but never raise an exception. If - the value can't be converted, then a sensible default value is - returned. out_type should be bool, int, or unicode; otherwise, the - value is just passed through. - """ - if val is None: - return None - - if out_type == int: - if isinstance(val, int) or isinstance(val, float): - # Just a number. - return int(val) - else: - # Process any other type as a string. - if not isinstance(val, basestring): - val = unicode(val) - # Get a number from the front of the string. - val = re.match(r'[0-9]*', val.strip()).group(0) - if not val: - return 0 - else: - return int(val) - - elif out_type == bool: - try: - # Should work for strings, bools, ints: - return bool(int(val)) - except ValueError: - return False - - elif out_type == unicode: - if isinstance(val, str): - return val.decode('utf8', 'ignore') - elif isinstance(val, unicode): - return val - else: - return unicode(val) - - elif out_type == float: - if isinstance(val, int) or isinstance(val, float): - return float(val) - else: - if not isinstance(val, basestring): - val = unicode(val) - match = re.match(r'[\+-]?[0-9\.]+', val.strip()) - if match: - val = match.group(0) - if val: - return float(val) - return 0.0 - - else: - return val - - -# Image coding for ASF/WMA. - -def _unpack_asf_image(data): - """Unpack image data from a WM/Picture tag. Return a tuple - containing the MIME type, the raw image data, a type indicator, and - the image's description. - - This function is treated as "untrusted" and could throw all manner - of exceptions (out-of-bounds, etc.). We should clean this up - sometime so that the failure modes are well-defined. - """ - type, size = struct.unpack_from(" 0: - gain = math.log10(maxgain / 1000.0) * -10 - else: - # Invalid gain value found. - gain = 0.0 - - # SoundCheck stores peak values as the actual value of the sample, - # and again separately for the left and right channels. We need to - # convert this to a percentage of full scale, which is 32768 for a - # 16 bit sample. Once again, we play it safe by using the larger of - # the two values. - peak = max(soundcheck[6:8]) / 32768.0 - - return round(gain, 2), round(peak, 6) - - -def _sc_encode(gain, peak): - """Encode ReplayGain gain/peak values as a Sound Check string. - """ - # SoundCheck stores the peak value as the actual value of the - # sample, rather than the percentage of full scale that RG uses, so - # we do a simple conversion assuming 16 bit samples. - peak *= 32768.0 - - # SoundCheck stores absolute RMS values in some unknown units rather - # than the dB values RG uses. We can calculate these absolute values - # from the gain ratio using a reference value of 1000 units. We also - # enforce the maximum value here, which is equivalent to about - # -18.2dB. - g1 = min(round((10 ** (gain / -10)) * 1000), 65534) - # Same as above, except our reference level is 2500 units. - g2 = min(round((10 ** (gain / -10)) * 2500), 65534) - - # The purpose of these values are unknown, but they also seem to be - # unused so we just use zero. - uk = 0 - values = (g1, g1, g2, g2, uk, uk, peak, peak, uk, uk) - return (u' %08X' * 10) % values - - -# Cover art and other images. - -def _image_mime_type(data): - """Return the MIME type of the image data (a bytestring). - """ - kind = imghdr.what(None, h=data) - if kind in ['gif', 'jpeg', 'png', 'tiff', 'bmp']: - return 'image/{0}'.format(kind) - elif kind == 'pgm': - return 'image/x-portable-graymap' - elif kind == 'pbm': - return 'image/x-portable-bitmap' - elif kind == 'ppm': - return 'image/x-portable-pixmap' - elif kind == 'xbm': - return 'image/x-xbitmap' - else: - return 'image/x-{0}'.format(kind) - - -class ImageType(enum.Enum): - """Indicates the kind of an `Image` stored in a file's tag. - """ - other = 0 - icon = 1 - other_icon = 2 - front = 3 - back = 4 - leaflet = 5 - media = 6 - lead_artist = 7 - artist = 8 - conductor = 9 - group = 10 - composer = 11 - lyricist = 12 - recording_location = 13 - recording_session = 14 - performance = 15 - screen_capture = 16 - fish = 17 - illustration = 18 - artist_logo = 19 - publisher_logo = 20 - - -class Image(object): - """Strucuture representing image data and metadata that can be - stored and retrieved from tags. - - The structure has four properties. - * ``data`` The binary data of the image - * ``desc`` An optional descritpion of the image - * ``type`` An instance of `ImageType` indicating the kind of image - * ``mime_type`` Read-only property that contains the mime type of - the binary data - """ - def __init__(self, data, desc=None, type=None): - self.data = data - self.desc = desc - if isinstance(type, int): - type = list(ImageType)[type] - self.type = type - - @property - def mime_type(self): - if self.data: - return _image_mime_type(self.data) - - @property - def type_index(self): - if self.type is None: - # This method is used when a tag format requires the type - # index to be set, so we return "other" as the default value. - return 0 - return self.type.value - - -# StorageStyle classes describe strategies for accessing values in -# Mutagen file objects. - -class StorageStyle(object): - """A strategy for storing a value for a certain tag format (or set - of tag formats). This basic StorageStyle describes simple 1:1 - mapping from raw values to keys in a Mutagen file object; subclasses - describe more sophisticated translations or format-specific access - strategies. - - MediaFile uses a StorageStyle via three methods: ``get()``, - ``set()``, and ``delete()``. It passes a Mutagen file object to - each. - - Internally, the StorageStyle implements ``get()`` and ``set()`` - using two steps that may be overridden by subtypes. To get a value, - the StorageStyle first calls ``fetch()`` to retrieve the value - corresponding to a key and then ``deserialize()`` to convert the raw - Mutagen value to a consumable Python value. Similarly, to set a - field, we call ``serialize()`` to encode the value and then - ``store()`` to assign the result into the Mutagen object. - - Each StorageStyle type has a class-level `formats` attribute that is - a list of strings indicating the formats that the style applies to. - MediaFile only uses StorageStyles that apply to the correct type for - a given audio file. - """ - - formats = ['FLAC', 'OggOpus', 'OggTheora', 'OggSpeex', 'OggVorbis', - 'OggFlac', 'APEv2File', 'WavPack', 'Musepack', 'MonkeysAudio'] - """List of mutagen classes the StorageStyle can handle. - """ - - def __init__(self, key, as_type=unicode, suffix=None, float_places=2): - """Create a basic storage strategy. Parameters: - - - `key`: The key on the Mutagen file object used to access the - field's data. - - `as_type`: The Python type that the value is stored as - internally (`unicode`, `int`, `bool`, or `bytes`). - - `suffix`: When `as_type` is a string type, append this before - storing the value. - - `float_places`: When the value is a floating-point number and - encoded as a string, the number of digits to store after the - decimal point. - """ - self.key = key - self.as_type = as_type - self.suffix = suffix - self.float_places = float_places - - # Convert suffix to correct string type. - if self.suffix and self.as_type is unicode: - self.suffix = self.as_type(self.suffix) - - # Getter. - - def get(self, mutagen_file): - """Get the value for the field using this style. - """ - return self.deserialize(self.fetch(mutagen_file)) - - def fetch(self, mutagen_file): - """Retrieve the raw value of for this tag from the Mutagen file - object. - """ - try: - return mutagen_file[self.key][0] - except (KeyError, IndexError): - return None - - def deserialize(self, mutagen_value): - """Given a raw value stored on a Mutagen object, decode and - return the represented value. - """ - if self.suffix and isinstance(mutagen_value, unicode) \ - and mutagen_value.endswith(self.suffix): - return mutagen_value[:-len(self.suffix)] - else: - return mutagen_value - - # Setter. - - def set(self, mutagen_file, value): - """Assign the value for the field using this style. - """ - self.store(mutagen_file, self.serialize(value)) - - def store(self, mutagen_file, value): - """Store a serialized value in the Mutagen file object. - """ - mutagen_file[self.key] = [value] - - def serialize(self, value): - """Convert the external Python value to a type that is suitable for - storing in a Mutagen file object. - """ - if isinstance(value, float) and self.as_type is unicode: - value = u'{0:.{1}f}'.format(value, self.float_places) - value = self.as_type(value) - elif self.as_type is unicode: - if isinstance(value, bool): - # Store bools as 1/0 instead of True/False. - value = unicode(int(bool(value))) - elif isinstance(value, str): - value = value.decode('utf8', 'ignore') - else: - value = unicode(value) - else: - value = self.as_type(value) - - if self.suffix: - value += self.suffix - - return value - - def delete(self, mutagen_file): - """Remove the tag from the file. - """ - if self.key in mutagen_file: - del mutagen_file[self.key] - - -class ListStorageStyle(StorageStyle): - """Abstract storage style that provides access to lists. - - The ListMediaField descriptor uses a ListStorageStyle via two - methods: ``get_list()`` and ``set_list()``. It passes a Mutagen file - object to each. - - Subclasses may overwrite ``fetch`` and ``store``. ``fetch`` must - return a (possibly empty) list and ``store`` receives a serialized - list of values as the second argument. - - The `serialize` and `deserialize` methods (from the base - `StorageStyle`) are still called with individual values. This class - handles packing and unpacking the values into lists. - """ - def get(self, mutagen_file): - """Get the first value in the field's value list. - """ - try: - return self.get_list(mutagen_file)[0] - except IndexError: - return None - - def get_list(self, mutagen_file): - """Get a list of all values for the field using this style. - """ - return [self.deserialize(item) for item in self.fetch(mutagen_file)] - - def fetch(self, mutagen_file): - """Get the list of raw (serialized) values. - """ - try: - return mutagen_file[self.key] - except KeyError: - return [] - - def set(self, mutagen_file, value): - """Set an individual value as the only value for the field using - this style. - """ - self.set_list(mutagen_file, [value]) - - def set_list(self, mutagen_file, values): - """Set all values for the field using this style. `values` - should be an iterable. - """ - self.store(mutagen_file, [self.serialize(value) for value in values]) - - def store(self, mutagen_file, values): - """Set the list of all raw (serialized) values for this field. - """ - mutagen_file[self.key] = values - - -class SoundCheckStorageStyleMixin(object): - """A mixin for storage styles that read and write iTunes SoundCheck - analysis values. The object must have an `index` field that - indicates which half of the gain/peak pair---0 or 1---the field - represents. - """ - def get(self, mutagen_file): - data = self.fetch(mutagen_file) - if data is not None: - return _sc_decode(data)[self.index] - - def set(self, mutagen_file, value): - data = self.fetch(mutagen_file) - if data is None: - gain_peak = [0, 0] - else: - gain_peak = list(_sc_decode(data)) - gain_peak[self.index] = value or 0 - data = self.serialize(_sc_encode(*gain_peak)) - self.store(mutagen_file, data) - - -class ASFStorageStyle(ListStorageStyle): - """A general storage style for Windows Media/ASF files. - """ - formats = ['ASF'] - - def deserialize(self, data): - if isinstance(data, mutagen.asf.ASFBaseAttribute): - data = data.value - return data - - -class MP4StorageStyle(StorageStyle): - """A general storage style for MPEG-4 tags. - """ - formats = ['MP4'] - - def serialize(self, value): - value = super(MP4StorageStyle, self).serialize(value) - if self.key.startswith('----:') and isinstance(value, unicode): - value = value.encode('utf8') - return value - - -class MP4TupleStorageStyle(MP4StorageStyle): - """A style for storing values as part of a pair of numbers in an - MPEG-4 file. - """ - def __init__(self, key, index=0, **kwargs): - super(MP4TupleStorageStyle, self).__init__(key, **kwargs) - self.index = index - - def deserialize(self, mutagen_value): - items = mutagen_value or [] - packing_length = 2 - return list(items) + [0] * (packing_length - len(items)) - - def get(self, mutagen_file): - value = super(MP4TupleStorageStyle, self).get(mutagen_file)[self.index] - if value == 0: - # The values are always present and saved as integers. So we - # assume that "0" indicates it is not set. - return None - else: - return value - - def set(self, mutagen_file, value): - if value is None: - value = 0 - items = self.deserialize(self.fetch(mutagen_file)) - items[self.index] = int(value) - self.store(mutagen_file, items) - - def delete(self, mutagen_file): - if self.index == 0: - super(MP4TupleStorageStyle, self).delete(mutagen_file) - else: - self.set(mutagen_file, None) - - -class MP4ListStorageStyle(ListStorageStyle, MP4StorageStyle): - pass - - -class MP4SoundCheckStorageStyle(SoundCheckStorageStyleMixin, MP4StorageStyle): - def __init__(self, key, index=0, **kwargs): - super(MP4SoundCheckStorageStyle, self).__init__(key, **kwargs) - self.index = index - - -class MP4BoolStorageStyle(MP4StorageStyle): - """A style for booleans in MPEG-4 files. (MPEG-4 has an atom type - specifically for representing booleans.) - """ - def get(self, mutagen_file): - try: - return mutagen_file[self.key] - except KeyError: - return None - - def get_list(self, mutagen_file): - raise NotImplementedError('MP4 bool storage does not support lists') - - def set(self, mutagen_file, value): - mutagen_file[self.key] = value - - def set_list(self, mutagen_file, values): - raise NotImplementedError('MP4 bool storage does not support lists') - - -class MP4ImageStorageStyle(MP4ListStorageStyle): - """Store images as MPEG-4 image atoms. Values are `Image` objects. - """ - def __init__(self, **kwargs): - super(MP4ImageStorageStyle, self).__init__(key='covr', **kwargs) - - def deserialize(self, data): - return Image(data) - - def serialize(self, image): - if image.mime_type == 'image/png': - kind = mutagen.mp4.MP4Cover.FORMAT_PNG - elif image.mime_type == 'image/jpeg': - kind = mutagen.mp4.MP4Cover.FORMAT_JPEG - else: - raise ValueError('MP4 files only supports PNG and JPEG images') - return mutagen.mp4.MP4Cover(image.data, kind) - - -class MP3StorageStyle(StorageStyle): - """Store data in ID3 frames. - """ - formats = ['MP3', 'AIFF'] - - def __init__(self, key, id3_lang=None, **kwargs): - """Create a new ID3 storage style. `id3_lang` is the value for - the language field of newly created frames. - """ - self.id3_lang = id3_lang - super(MP3StorageStyle, self).__init__(key, **kwargs) - - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].text[0] - except (KeyError, IndexError): - return None - - def store(self, mutagen_file, value): - frame = mutagen.id3.Frames[self.key](encoding=3, text=[value]) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3ListStorageStyle(ListStorageStyle, MP3StorageStyle): - """Store lists of data in multiple ID3 frames. - """ - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].text - except KeyError: - return [] - - def store(self, mutagen_file, values): - frame = mutagen.id3.Frames[self.key](encoding=3, text=values) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3UFIDStorageStyle(MP3StorageStyle): - """Store data in a UFID ID3 frame with a particular owner. - """ - def __init__(self, owner, **kwargs): - self.owner = owner - super(MP3UFIDStorageStyle, self).__init__('UFID:' + owner, **kwargs) - - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].data - except KeyError: - return None - - def store(self, mutagen_file, value): - frames = mutagen_file.tags.getall(self.key) - for frame in frames: - # Replace existing frame data. - if frame.owner == self.owner: - frame.data = value - else: - # New frame. - frame = mutagen.id3.UFID(owner=self.owner, data=value) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3DescStorageStyle(MP3StorageStyle): - """Store data in a TXXX (or similar) ID3 frame. The frame is - selected based its ``desc`` field. - """ - def __init__(self, desc=u'', key='TXXX', **kwargs): - self.description = desc - super(MP3DescStorageStyle, self).__init__(key=key, **kwargs) - - def store(self, mutagen_file, value): - frames = mutagen_file.tags.getall(self.key) - if self.key != 'USLT': - value = [value] - - # try modifying in place - found = False - for frame in frames: - if frame.desc.lower() == self.description.lower(): - frame.text = value - found = True - - # need to make a new frame? - if not found: - frame = mutagen.id3.Frames[self.key]( - desc=str(self.description), - text=value, - encoding=3 - ) - if self.id3_lang: - frame.lang = self.id3_lang - mutagen_file.tags.add(frame) - - def fetch(self, mutagen_file): - for frame in mutagen_file.tags.getall(self.key): - if frame.desc.lower() == self.description.lower(): - if self.key == 'USLT': - return frame.text - try: - return frame.text[0] - except IndexError: - return None - - def delete(self, mutagen_file): - found_frame = None - for frame in mutagen_file.tags.getall(self.key): - if frame.desc.lower() == self.description.lower(): - found_frame = frame - break - if found_frame is not None: - del mutagen_file[frame.HashKey] - - -class MP3SlashPackStorageStyle(MP3StorageStyle): - """Store value as part of pair that is serialized as a slash- - separated string. - """ - def __init__(self, key, pack_pos=0, **kwargs): - super(MP3SlashPackStorageStyle, self).__init__(key, **kwargs) - self.pack_pos = pack_pos - - def _fetch_unpacked(self, mutagen_file): - data = self.fetch(mutagen_file) - if data: - items = unicode(data).split('/') - else: - items = [] - packing_length = 2 - return list(items) + [None] * (packing_length - len(items)) - - def get(self, mutagen_file): - return self._fetch_unpacked(mutagen_file)[self.pack_pos] - - def set(self, mutagen_file, value): - items = self._fetch_unpacked(mutagen_file) - items[self.pack_pos] = value - if items[0] is None: - items[0] = '' - if items[1] is None: - items.pop() # Do not store last value - self.store(mutagen_file, '/'.join(map(unicode, items))) - - def delete(self, mutagen_file): - if self.pack_pos == 0: - super(MP3SlashPackStorageStyle, self).delete(mutagen_file) - else: - self.set(mutagen_file, None) - - -class MP3ImageStorageStyle(ListStorageStyle, MP3StorageStyle): - """Converts between APIC frames and ``Image`` instances. - - The `get_list` method inherited from ``ListStorageStyle`` returns a - list of ``Image``s. Similarly, the `set_list` method accepts a - list of ``Image``s as its ``values`` arguemnt. - """ - def __init__(self): - super(MP3ImageStorageStyle, self).__init__(key='APIC') - self.as_type = str - - def deserialize(self, apic_frame): - """Convert APIC frame into Image.""" - return Image(data=apic_frame.data, desc=apic_frame.desc, - type=apic_frame.type) - - def fetch(self, mutagen_file): - return mutagen_file.tags.getall(self.key) - - def store(self, mutagen_file, frames): - mutagen_file.tags.setall(self.key, frames) - - def delete(self, mutagen_file): - mutagen_file.tags.delall(self.key) - - def serialize(self, image): - """Return an APIC frame populated with data from ``image``. - """ - assert isinstance(image, Image) - frame = mutagen.id3.Frames[self.key]() - frame.data = image.data - frame.mime = image.mime_type - frame.desc = (image.desc or u'').encode('utf8') - frame.encoding = 3 # UTF-8 encoding of desc - frame.type = image.type_index - return frame - - -class MP3SoundCheckStorageStyle(SoundCheckStorageStyleMixin, - MP3DescStorageStyle): - def __init__(self, index=0, **kwargs): - super(MP3SoundCheckStorageStyle, self).__init__(**kwargs) - self.index = index - - -class ASFImageStorageStyle(ListStorageStyle): - """Store images packed into Windows Media/ASF byte array attributes. - Values are `Image` objects. - """ - formats = ['ASF'] - - def __init__(self): - super(ASFImageStorageStyle, self).__init__(key='WM/Picture') - - def deserialize(self, asf_picture): - mime, data, type, desc = _unpack_asf_image(asf_picture.value) - return Image(data, desc=desc, type=type) - - def serialize(self, image): - pic = mutagen.asf.ASFByteArrayAttribute() - pic.value = _pack_asf_image(image.mime_type, image.data, - type=image.type_index, - description=image.desc or u'') - return pic - - -class VorbisImageStorageStyle(ListStorageStyle): - """Store images in Vorbis comments. Both legacy COVERART fields and - modern METADATA_BLOCK_PICTURE tags are supported. Data is - base64-encoded. Values are `Image` objects. - """ - formats = ['OggOpus', 'OggTheora', 'OggSpeex', 'OggVorbis', - 'OggFlac'] - - def __init__(self): - super(VorbisImageStorageStyle, self).__init__( - key='metadata_block_picture' - ) - self.as_type = str - - def fetch(self, mutagen_file): - images = [] - if 'metadata_block_picture' not in mutagen_file: - # Try legacy COVERART tags. - if 'coverart' in mutagen_file: - for data in mutagen_file['coverart']: - images.append(Image(base64.b64decode(data))) - return images - for data in mutagen_file["metadata_block_picture"]: - try: - pic = mutagen.flac.Picture(base64.b64decode(data)) - except (TypeError, AttributeError): - continue - images.append(Image(data=pic.data, desc=pic.desc, - type=pic.type)) - return images - - def store(self, mutagen_file, image_data): - # Strip all art, including legacy COVERART. - if 'coverart' in mutagen_file: - del mutagen_file['coverart'] - if 'coverartmime' in mutagen_file: - del mutagen_file['coverartmime'] - super(VorbisImageStorageStyle, self).store(mutagen_file, image_data) - - def serialize(self, image): - """Turn a Image into a base64 encoded FLAC picture block. - """ - pic = mutagen.flac.Picture() - pic.data = image.data - pic.type = image.type_index - pic.mime = image.mime_type - pic.desc = image.desc or u'' - return base64.b64encode(pic.write()) - - -class FlacImageStorageStyle(ListStorageStyle): - """Converts between ``mutagen.flac.Picture`` and ``Image`` instances. - """ - formats = ['FLAC'] - - def __init__(self): - super(FlacImageStorageStyle, self).__init__(key='') - - def fetch(self, mutagen_file): - return mutagen_file.pictures - - def deserialize(self, flac_picture): - return Image(data=flac_picture.data, desc=flac_picture.desc, - type=flac_picture.type) - - def store(self, mutagen_file, pictures): - """``pictures`` is a list of mutagen.flac.Picture instances. - """ - mutagen_file.clear_pictures() - for pic in pictures: - mutagen_file.add_picture(pic) - - def serialize(self, image): - """Turn a Image into a mutagen.flac.Picture. - """ - pic = mutagen.flac.Picture() - pic.data = image.data - pic.type = image.type_index - pic.mime = image.mime_type - pic.desc = image.desc or u'' - return pic - - def delete(self, mutagen_file): - """Remove all images from the file. - """ - mutagen_file.clear_pictures() - - -class APEv2ImageStorageStyle(ListStorageStyle): - """Store images in APEv2 tags. Values are `Image` objects. - """ - formats = ['APEv2File', 'WavPack', 'Musepack', 'MonkeysAudio', 'OptimFROG'] - - TAG_NAMES = { - ImageType.other: 'Cover Art (other)', - ImageType.icon: 'Cover Art (icon)', - ImageType.other_icon: 'Cover Art (other icon)', - ImageType.front: 'Cover Art (front)', - ImageType.back: 'Cover Art (back)', - ImageType.leaflet: 'Cover Art (leaflet)', - ImageType.media: 'Cover Art (media)', - ImageType.lead_artist: 'Cover Art (lead)', - ImageType.artist: 'Cover Art (artist)', - ImageType.conductor: 'Cover Art (conductor)', - ImageType.group: 'Cover Art (band)', - ImageType.composer: 'Cover Art (composer)', - ImageType.lyricist: 'Cover Art (lyricist)', - ImageType.recording_location: 'Cover Art (studio)', - ImageType.recording_session: 'Cover Art (recording)', - ImageType.performance: 'Cover Art (performance)', - ImageType.screen_capture: 'Cover Art (movie scene)', - ImageType.fish: 'Cover Art (colored fish)', - ImageType.illustration: 'Cover Art (illustration)', - ImageType.artist_logo: 'Cover Art (band logo)', - ImageType.publisher_logo: 'Cover Art (publisher logo)', - } - - def __init__(self): - super(APEv2ImageStorageStyle, self).__init__(key='') - - def fetch(self, mutagen_file): - images = [] - for cover_type, cover_tag in self.TAG_NAMES.items(): - try: - frame = mutagen_file[cover_tag] - text_delimiter_index = frame.value.find('\x00') - comment = frame.value[0:text_delimiter_index] \ - if text_delimiter_index > 0 else None - image_data = frame.value[text_delimiter_index + 1:] - images.append(Image(data=image_data, type=cover_type, - desc=comment)) - except KeyError: - pass - - return images - - def set_list(self, mutagen_file, values): - self.delete(mutagen_file) - - for image in values: - image_type = image.type or ImageType.other - comment = image.desc or '' - image_data = comment + "\x00" + image.data - cover_tag = self.TAG_NAMES[image_type] - mutagen_file[cover_tag] = image_data - - def delete(self, mutagen_file): - """Remove all images from the file. - """ - for cover_tag in self.TAG_NAMES.values(): - try: - del mutagen_file[cover_tag] - except KeyError: - pass - - -# MediaField is a descriptor that represents a single logical field. It -# aggregates several StorageStyles describing how to access the data for -# each file type. - -class MediaField(object): - """A descriptor providing access to a particular (abstract) metadata - field. - """ - def __init__(self, *styles, **kwargs): - """Creates a new MediaField. - - :param styles: `StorageStyle` instances that describe the strategy - for reading and writing the field in particular - formats. There must be at least one style for - each possible file format. - - :param out_type: the type of the value that should be returned when - getting this property. - - """ - self.out_type = kwargs.get('out_type', unicode) - self._styles = styles - - def styles(self, mutagen_file): - """Yields the list of storage styles of this field that can - handle the MediaFile's format. - """ - for style in self._styles: - if mutagen_file.__class__.__name__ in style.formats: - yield style - - def __get__(self, mediafile, owner=None): - out = None - for style in self.styles(mediafile.mgfile): - out = style.get(mediafile.mgfile) - if out: - break - return _safe_cast(self.out_type, out) - - def __set__(self, mediafile, value): - if value is None: - value = self._none_value() - for style in self.styles(mediafile.mgfile): - style.set(mediafile.mgfile, value) - - def __delete__(self, mediafile): - for style in self.styles(mediafile.mgfile): - style.delete(mediafile.mgfile) - - def _none_value(self): - """Get an appropriate "null" value for this field's type. This - is used internally when setting the field to None. - """ - if self.out_type == int: - return 0 - elif self.out_type == float: - return 0.0 - elif self.out_type == bool: - return False - elif self.out_type == unicode: - return u'' - - -class ListMediaField(MediaField): - """Property descriptor that retrieves a list of multiple values from - a tag. - - Uses ``get_list`` and set_list`` methods of its ``StorageStyle`` - strategies to do the actual work. - """ - def __get__(self, mediafile, _): - values = [] - for style in self.styles(mediafile.mgfile): - values.extend(style.get_list(mediafile.mgfile)) - return [_safe_cast(self.out_type, value) for value in values] - - def __set__(self, mediafile, values): - for style in self.styles(mediafile.mgfile): - style.set_list(mediafile.mgfile, values) - - def single_field(self): - """Returns a ``MediaField`` descriptor that gets and sets the - first item. - """ - options = {'out_type': self.out_type} - return MediaField(*self._styles, **options) - - -class DateField(MediaField): - """Descriptor that handles serializing and deserializing dates - - The getter parses value from tags into a ``datetime.date`` instance - and setter serializes such an instance into a string. - - For granular access to year, month, and day, use the ``*_field`` - methods to create corresponding `DateItemField`s. - """ - def __init__(self, *date_styles, **kwargs): - """``date_styles`` is a list of ``StorageStyle``s to store and - retrieve the whole date from. The ``year`` option is an - additional list of fallback styles for the year. The year is - always set on this style, but is only retrieved if the main - storage styles do not return a value. - """ - super(DateField, self).__init__(*date_styles) - year_style = kwargs.get('year', None) - if year_style: - self._year_field = MediaField(*year_style) - - def __get__(self, mediafile, owner=None): - year, month, day = self._get_date_tuple(mediafile) - if not year: - return None - try: - return datetime.date( - year, - month or 1, - day or 1 - ) - except ValueError: # Out of range values. - return None - - def __set__(self, mediafile, date): - if date is None: - self._set_date_tuple(mediafile, None, None, None) - else: - self._set_date_tuple(mediafile, date.year, date.month, date.day) - - def __delete__(self, mediafile): - super(DateField, self).__delete__(mediafile) - if hasattr(self, '_year_field'): - self._year_field.__delete__(mediafile) - - def _get_date_tuple(self, mediafile): - """Get a 3-item sequence representing the date consisting of a - year, month, and day number. Each number is either an integer or - None. - """ - # Get the underlying data and split on hyphens and slashes. - datestring = super(DateField, self).__get__(mediafile, None) - if isinstance(datestring, basestring): - datestring = re.sub(r'[Tt ].*$', '', unicode(datestring)) - items = re.split('[-/]', unicode(datestring)) - else: - items = [] - - # Ensure that we have exactly 3 components, possibly by - # truncating or padding. - items = items[:3] - if len(items) < 3: - items += [None] * (3 - len(items)) - - # Use year field if year is missing. - if not items[0] and hasattr(self, '_year_field'): - items[0] = self._year_field.__get__(mediafile) - - # Convert each component to an integer if possible. - items_ = [] - for item in items: - try: - items_.append(int(item)) - except: - items_.append(None) - return items_ - - def _set_date_tuple(self, mediafile, year, month=None, day=None): - """Set the value of the field given a year, month, and day - number. Each number can be an integer or None to indicate an - unset component. - """ - if year is None: - self.__delete__(mediafile) - return - - date = [u'{0:04d}'.format(int(year))] - if month: - date.append(u'{0:02d}'.format(int(month))) - if month and day: - date.append(u'{0:02d}'.format(int(day))) - date = map(unicode, date) - super(DateField, self).__set__(mediafile, u'-'.join(date)) - - if hasattr(self, '_year_field'): - self._year_field.__set__(mediafile, year) - - def year_field(self): - return DateItemField(self, 0) - - def month_field(self): - return DateItemField(self, 1) - - def day_field(self): - return DateItemField(self, 2) - - -class DateItemField(MediaField): - """Descriptor that gets and sets constituent parts of a `DateField`: - the month, day, or year. - """ - def __init__(self, date_field, item_pos): - self.date_field = date_field - self.item_pos = item_pos - - def __get__(self, mediafile, _): - return self.date_field._get_date_tuple(mediafile)[self.item_pos] - - def __set__(self, mediafile, value): - items = self.date_field._get_date_tuple(mediafile) - items[self.item_pos] = value - self.date_field._set_date_tuple(mediafile, *items) - - def __delete__(self, mediafile): - self.__set__(mediafile, None) - - -class CoverArtField(MediaField): - """A descriptor that provides access to the *raw image data* for the - first image on a file. This is used for backwards compatibility: the - full `ImageListField` provides richer `Image` objects. - """ - def __init__(self): - pass - - def __get__(self, mediafile, _): - try: - return mediafile.images[0].data - except IndexError: - return None - - def __set__(self, mediafile, data): - if data: - mediafile.images = [Image(data=data)] - else: - mediafile.images = [] - - def __delete__(self, mediafile): - delattr(mediafile, 'images') - - -class ImageListField(ListMediaField): - """Descriptor to access the list of images embedded in tags. - - The getter returns a list of `Image` instances obtained from - the tags. The setter accepts a list of `Image` instances to be - written to the tags. - """ - def __init__(self): - # The storage styles used here must implement the - # `ListStorageStyle` interface and get and set lists of - # `Image`s. - super(ImageListField, self).__init__( - MP3ImageStorageStyle(), - MP4ImageStorageStyle(), - ASFImageStorageStyle(), - VorbisImageStorageStyle(), - FlacImageStorageStyle(), - APEv2ImageStorageStyle(), - out_type=Image, - ) - - -# MediaFile is a collection of fields. - -class MediaFile(object): - """Represents a multimedia file on disk and provides access to its - metadata. - """ - def __init__(self, path, id3v23=False): - """Constructs a new `MediaFile` reflecting the file at path. May - throw `UnreadableFileError`. - - By default, MP3 files are saved with ID3v2.4 tags. You can use - the older ID3v2.3 standard by specifying the `id3v23` option. - """ - self.path = path - - unreadable_exc = ( - mutagen.mp3.error, - mutagen.id3.error, - mutagen.flac.error, - mutagen.monkeysaudio.MonkeysAudioHeaderError, - mutagen.mp4.error, - mutagen.oggopus.error, - mutagen.oggvorbis.error, - mutagen.ogg.error, - mutagen.asf.error, - mutagen.apev2.error, - mutagen.aiff.error, - ) - try: - self.mgfile = mutagen.File(path) - except unreadable_exc as exc: - log.debug(u'header parsing failed: {0}'.format(unicode(exc))) - raise UnreadableFileError(path) - except IOError as exc: - if type(exc) == IOError: - # This is a base IOError, not a subclass from Mutagen or - # anywhere else. - raise - else: - log.debug(traceback.format_exc()) - raise MutagenError(path, exc) - except Exception as exc: - # Isolate bugs in Mutagen. - log.debug(traceback.format_exc()) - log.error(u'uncaught Mutagen exception in open: {0}'.format(exc)) - raise MutagenError(path, exc) - - if self.mgfile is None: - # Mutagen couldn't guess the type - raise FileTypeError(path) - elif (type(self.mgfile).__name__ == 'M4A' or - type(self.mgfile).__name__ == 'MP4'): - info = self.mgfile.info - if hasattr(info, 'codec'): - if info.codec and info.codec.startswith('alac'): - self.type = 'alac' - else: - self.type = 'aac' - else: - # This hack differentiates AAC and ALAC on versions of - # Mutagen < 1.26. Once Mutagen > 1.26 is out and - # required by beets, we can remove this. - if hasattr(self.mgfile.info, 'bitrate') and \ - self.mgfile.info.bitrate > 0: - self.type = 'aac' - else: - self.type = 'alac' - elif (type(self.mgfile).__name__ == 'ID3' or - type(self.mgfile).__name__ == 'MP3'): - self.type = 'mp3' - elif type(self.mgfile).__name__ == 'FLAC': - self.type = 'flac' - elif type(self.mgfile).__name__ == 'OggOpus': - self.type = 'opus' - elif type(self.mgfile).__name__ == 'OggVorbis': - self.type = 'ogg' - elif type(self.mgfile).__name__ == 'MonkeysAudio': - self.type = 'ape' - elif type(self.mgfile).__name__ == 'WavPack': - self.type = 'wv' - elif type(self.mgfile).__name__ == 'Musepack': - self.type = 'mpc' - elif type(self.mgfile).__name__ == 'ASF': - self.type = 'asf' - elif type(self.mgfile).__name__ == 'AIFF': - self.type = 'aiff' - else: - raise FileTypeError(path, type(self.mgfile).__name__) - - # Add a set of tags if it's missing. - if self.mgfile.tags is None: - self.mgfile.add_tags() - - # Set the ID3v2.3 flag only for MP3s. - self.id3v23 = id3v23 and self.type == 'mp3' - - def save(self): - """Write the object's tags back to the file. - """ - # Possibly save the tags to ID3v2.3. - kwargs = {} - if self.id3v23: - id3 = self.mgfile - if hasattr(id3, 'tags'): - # In case this is an MP3 object, not an ID3 object. - id3 = id3.tags - id3.update_to_v23() - kwargs['v2_version'] = 3 - - # Isolate bugs in Mutagen. - try: - self.mgfile.save(**kwargs) - except (IOError, OSError): - # Propagate these through: they don't represent Mutagen bugs. - raise - except Exception as exc: - log.debug(traceback.format_exc()) - log.error(u'uncaught Mutagen exception in save: {0}'.format(exc)) - raise MutagenError(self.path, exc) - - def delete(self): - """Remove the current metadata tag from the file. - """ - try: - self.mgfile.delete() - except NotImplementedError: - # For Mutagen types that don't support deletion (notably, - # ASF), just delete each tag individually. - for tag in self.mgfile.keys(): - del self.mgfile[tag] - - # Convenient access to the set of available fields. - - @classmethod - def fields(cls): - """Get the names of all writable properties that reflect - metadata tags (i.e., those that are instances of - :class:`MediaField`). - """ - for property, descriptor in cls.__dict__.items(): - if isinstance(descriptor, MediaField): - yield property - - @classmethod - def readable_fields(cls): - """Get all metadata fields: the writable ones from - :meth:`fields` and also other audio properties. - """ - for property in cls.fields(): - yield property - for property in ('length', 'samplerate', 'bitdepth', 'bitrate', - 'channels', 'format'): - yield property - - @classmethod - def add_field(cls, name, descriptor): - """Add a field to store custom tags. - - :param name: the name of the property the field is accessed - through. It must not already exist on this class. - - :param descriptor: an instance of :class:`MediaField`. - """ - if not isinstance(descriptor, MediaField): - raise ValueError( - u'{0} must be an instance of MediaField'.format(descriptor)) - if name in cls.__dict__: - raise ValueError( - u'property "{0}" already exists on MediaField'.format(name)) - setattr(cls, name, descriptor) - - def update(self, dict): - """Set all field values from a dictionary. - - For any key in `dict` that is also a field to store tags the - method retrieves the corresponding value from `dict` and updates - the `MediaFile`. If a key has the value `None`, the - corresponding property is deleted from the `MediaFile`. - """ - for field in self.fields(): - if field in dict: - if dict[field] is None: - delattr(self, field) - else: - setattr(self, field, dict[field]) - - # Field definitions. - - title = MediaField( - MP3StorageStyle('TIT2'), - MP4StorageStyle("\xa9nam"), - StorageStyle('TITLE'), - ASFStorageStyle('Title'), - ) - artist = MediaField( - MP3StorageStyle('TPE1'), - MP4StorageStyle("\xa9ART"), - StorageStyle('ARTIST'), - ASFStorageStyle('Author'), - ) - album = MediaField( - MP3StorageStyle('TALB'), - MP4StorageStyle("\xa9alb"), - StorageStyle('ALBUM'), - ASFStorageStyle('WM/AlbumTitle'), - ) - genres = ListMediaField( - MP3ListStorageStyle('TCON'), - MP4ListStorageStyle("\xa9gen"), - ListStorageStyle('GENRE'), - ASFStorageStyle('WM/Genre'), - ) - genre = genres.single_field() - - composer = MediaField( - MP3StorageStyle('TCOM'), - MP4StorageStyle("\xa9wrt"), - StorageStyle('COMPOSER'), - ASFStorageStyle('WM/Composer'), - ) - grouping = MediaField( - MP3StorageStyle('TIT1'), - MP4StorageStyle("\xa9grp"), - StorageStyle('GROUPING'), - ASFStorageStyle('WM/ContentGroupDescription'), - ) - track = MediaField( - MP3SlashPackStorageStyle('TRCK', pack_pos=0), - MP4TupleStorageStyle('trkn', index=0), - StorageStyle('TRACK'), - StorageStyle('TRACKNUMBER'), - ASFStorageStyle('WM/TrackNumber'), - out_type=int, - ) - tracktotal = MediaField( - MP3SlashPackStorageStyle('TRCK', pack_pos=1), - MP4TupleStorageStyle('trkn', index=1), - StorageStyle('TRACKTOTAL'), - StorageStyle('TRACKC'), - StorageStyle('TOTALTRACKS'), - ASFStorageStyle('TotalTracks'), - out_type=int, - ) - disc = MediaField( - MP3SlashPackStorageStyle('TPOS', pack_pos=0), - MP4TupleStorageStyle('disk', index=0), - StorageStyle('DISC'), - StorageStyle('DISCNUMBER'), - ASFStorageStyle('WM/PartOfSet'), - out_type=int, - ) - disctotal = MediaField( - MP3SlashPackStorageStyle('TPOS', pack_pos=1), - MP4TupleStorageStyle('disk', index=1), - StorageStyle('DISCTOTAL'), - StorageStyle('DISCC'), - StorageStyle('TOTALDISCS'), - ASFStorageStyle('TotalDiscs'), - out_type=int, - ) - lyrics = MediaField( - MP3DescStorageStyle(key='USLT'), - MP4StorageStyle("\xa9lyr"), - StorageStyle('LYRICS'), - ASFStorageStyle('WM/Lyrics'), - ) - comments = MediaField( - MP3DescStorageStyle(key='COMM'), - MP4StorageStyle("\xa9cmt"), - StorageStyle('DESCRIPTION'), - StorageStyle('COMMENT'), - ASFStorageStyle('WM/Comments'), - ASFStorageStyle('Description') - ) - bpm = MediaField( - MP3StorageStyle('TBPM'), - MP4StorageStyle('tmpo', as_type=int), - StorageStyle('BPM'), - ASFStorageStyle('WM/BeatsPerMinute'), - out_type=int, - ) - comp = MediaField( - MP3StorageStyle('TCMP'), - MP4BoolStorageStyle('cpil'), - StorageStyle('COMPILATION'), - ASFStorageStyle('WM/IsCompilation', as_type=bool), - out_type=bool, - ) - albumartist = MediaField( - MP3StorageStyle('TPE2'), - MP4StorageStyle('aART'), - StorageStyle('ALBUM ARTIST'), - StorageStyle('ALBUMARTIST'), - ASFStorageStyle('WM/AlbumArtist'), - ) - albumtype = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Type'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Type'), - StorageStyle('MUSICBRAINZ_ALBUMTYPE'), - ASFStorageStyle('MusicBrainz/Album Type'), - ) - label = MediaField( - MP3StorageStyle('TPUB'), - MP4StorageStyle('----:com.apple.iTunes:Label'), - MP4StorageStyle('----:com.apple.iTunes:publisher'), - StorageStyle('LABEL'), - StorageStyle('PUBLISHER'), # Traktor - ASFStorageStyle('WM/Publisher'), - ) - artist_sort = MediaField( - MP3StorageStyle('TSOP'), - MP4StorageStyle("soar"), - StorageStyle('ARTISTSORT'), - ASFStorageStyle('WM/ArtistSortOrder'), - ) - albumartist_sort = MediaField( - MP3DescStorageStyle(u'ALBUMARTISTSORT'), - MP4StorageStyle("soaa"), - StorageStyle('ALBUMARTISTSORT'), - ASFStorageStyle('WM/AlbumArtistSortOrder'), - ) - asin = MediaField( - MP3DescStorageStyle(u'ASIN'), - MP4StorageStyle("----:com.apple.iTunes:ASIN"), - StorageStyle('ASIN'), - ASFStorageStyle('MusicBrainz/ASIN'), - ) - catalognum = MediaField( - MP3DescStorageStyle(u'CATALOGNUMBER'), - MP4StorageStyle("----:com.apple.iTunes:CATALOGNUMBER"), - StorageStyle('CATALOGNUMBER'), - ASFStorageStyle('WM/CatalogNo'), - ) - disctitle = MediaField( - MP3StorageStyle('TSST'), - MP4StorageStyle("----:com.apple.iTunes:DISCSUBTITLE"), - StorageStyle('DISCSUBTITLE'), - ASFStorageStyle('WM/SetSubTitle'), - ) - encoder = MediaField( - MP3StorageStyle('TENC'), - MP4StorageStyle("\xa9too"), - StorageStyle('ENCODEDBY'), - StorageStyle('ENCODER'), - ASFStorageStyle('WM/EncodedBy'), - ) - script = MediaField( - MP3DescStorageStyle(u'Script'), - MP4StorageStyle("----:com.apple.iTunes:SCRIPT"), - StorageStyle('SCRIPT'), - ASFStorageStyle('WM/Script'), - ) - language = MediaField( - MP3StorageStyle('TLAN'), - MP4StorageStyle("----:com.apple.iTunes:LANGUAGE"), - StorageStyle('LANGUAGE'), - ASFStorageStyle('WM/Language'), - ) - country = MediaField( - MP3DescStorageStyle('MusicBrainz Album Release Country'), - MP4StorageStyle("----:com.apple.iTunes:MusicBrainz " - "Album Release Country"), - StorageStyle('RELEASECOUNTRY'), - ASFStorageStyle('MusicBrainz/Album Release Country'), - ) - albumstatus = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Status'), - MP4StorageStyle("----:com.apple.iTunes:MusicBrainz Album Status"), - StorageStyle('MUSICBRAINZ_ALBUMSTATUS'), - ASFStorageStyle('MusicBrainz/Album Status'), - ) - media = MediaField( - MP3StorageStyle('TMED'), - MP4StorageStyle("----:com.apple.iTunes:MEDIA"), - StorageStyle('MEDIA'), - ASFStorageStyle('WM/Media'), - ) - albumdisambig = MediaField( - # This tag mapping was invented for beets (not used by Picard, etc). - MP3DescStorageStyle(u'MusicBrainz Album Comment'), - MP4StorageStyle("----:com.apple.iTunes:MusicBrainz Album Comment"), - StorageStyle('MUSICBRAINZ_ALBUMCOMMENT'), - ASFStorageStyle('MusicBrainz/Album Comment'), - ) - - # Release date. - date = DateField( - MP3StorageStyle('TDRC'), - MP4StorageStyle("\xa9day"), - StorageStyle('DATE'), - ASFStorageStyle('WM/Year'), - year=(StorageStyle('YEAR'),)) - - year = date.year_field() - month = date.month_field() - day = date.day_field() - - # *Original* release date. - original_date = DateField( - MP3StorageStyle('TDOR'), - MP4StorageStyle('----:com.apple.iTunes:ORIGINAL YEAR'), - StorageStyle('ORIGINALDATE'), - ASFStorageStyle('WM/OriginalReleaseYear')) - - original_year = original_date.year_field() - original_month = original_date.month_field() - original_day = original_date.day_field() - - # Nonstandard metadata. - artist_credit = MediaField( - MP3DescStorageStyle(u'Artist Credit'), - MP4StorageStyle("----:com.apple.iTunes:Artist Credit"), - StorageStyle('ARTIST_CREDIT'), - ASFStorageStyle('beets/Artist Credit'), - ) - albumartist_credit = MediaField( - MP3DescStorageStyle(u'Album Artist Credit'), - MP4StorageStyle("----:com.apple.iTunes:Album Artist Credit"), - StorageStyle('ALBUMARTIST_CREDIT'), - ASFStorageStyle('beets/Album Artist Credit'), - ) - - # Legacy album art field - art = CoverArtField() - - # Image list - images = ImageListField() - - # MusicBrainz IDs. - mb_trackid = MediaField( - MP3UFIDStorageStyle(owner='http://musicbrainz.org'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Track Id'), - StorageStyle('MUSICBRAINZ_TRACKID'), - ASFStorageStyle('MusicBrainz/Track Id'), - ) - mb_albumid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Id'), - StorageStyle('MUSICBRAINZ_ALBUMID'), - ASFStorageStyle('MusicBrainz/Album Id'), - ) - mb_artistid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Artist Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Artist Id'), - StorageStyle('MUSICBRAINZ_ARTISTID'), - ASFStorageStyle('MusicBrainz/Artist Id'), - ) - mb_albumartistid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Artist Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Artist Id'), - StorageStyle('MUSICBRAINZ_ALBUMARTISTID'), - ASFStorageStyle('MusicBrainz/Album Artist Id'), - ) - mb_releasegroupid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Release Group Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Release Group Id'), - StorageStyle('MUSICBRAINZ_RELEASEGROUPID'), - ASFStorageStyle('MusicBrainz/Release Group Id'), - ) - - # Acoustid fields. - acoustid_fingerprint = MediaField( - MP3DescStorageStyle(u'Acoustid Fingerprint'), - MP4StorageStyle('----:com.apple.iTunes:Acoustid Fingerprint'), - StorageStyle('ACOUSTID_FINGERPRINT'), - ASFStorageStyle('Acoustid/Fingerprint'), - ) - acoustid_id = MediaField( - MP3DescStorageStyle(u'Acoustid Id'), - MP4StorageStyle('----:com.apple.iTunes:Acoustid Id'), - StorageStyle('ACOUSTID_ID'), - ASFStorageStyle('Acoustid/Id'), - ) - - # ReplayGain fields. - rg_track_gain = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_TRACK_GAIN', - float_places=2, suffix=u' dB' - ), - MP3DescStorageStyle( - u'replaygain_track_gain', - float_places=2, suffix=u' dB' - ), - MP3SoundCheckStorageStyle( - key='COMM', - index=0, desc=u'iTunNORM', - id3_lang='eng' - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_track_gain', - float_places=2, suffix=b' dB' - ), - MP4SoundCheckStorageStyle( - '----:com.apple.iTunes:iTunNORM', - index=0 - ), - StorageStyle( - u'REPLAYGAIN_TRACK_GAIN', - float_places=2, suffix=u' dB' - ), - ASFStorageStyle( - u'replaygain_track_gain', - float_places=2, suffix=u' dB' - ), - out_type=float - ) - rg_album_gain = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_ALBUM_GAIN', - float_places=2, suffix=u' dB' - ), - MP3DescStorageStyle( - u'replaygain_album_gain', - float_places=2, suffix=u' dB' - ), - MP4SoundCheckStorageStyle( - '----:com.apple.iTunes:iTunNORM', - index=1 - ), - StorageStyle( - u'REPLAYGAIN_ALBUM_GAIN', - float_places=2, suffix=u' dB' - ), - ASFStorageStyle( - u'replaygain_album_gain', - float_places=2, suffix=u' dB' - ), - out_type=float - ) - rg_track_peak = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_TRACK_PEAK', - float_places=6 - ), - MP3DescStorageStyle( - u'replaygain_track_peak', - float_places=6 - ), - MP3SoundCheckStorageStyle( - key=u'COMM', - index=1, desc=u'iTunNORM', - id3_lang='eng' - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_track_peak', - float_places=6 - ), - MP4SoundCheckStorageStyle( - '----:com.apple.iTunes:iTunNORM', - index=1 - ), - StorageStyle(u'REPLAYGAIN_TRACK_PEAK', float_places=6), - ASFStorageStyle(u'replaygain_track_peak', float_places=6), - out_type=float, - ) - rg_album_peak = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_ALBUM_PEAK', - float_places=6 - ), - MP3DescStorageStyle( - u'replaygain_album_peak', - float_places=6 - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_album_peak', - float_places=6 - ), - StorageStyle(u'REPLAYGAIN_ALBUM_PEAK', float_places=6), - ASFStorageStyle(u'replaygain_album_peak', float_places=6), - out_type=float, - ) - - initial_key = MediaField( - MP3StorageStyle('TKEY'), - MP4StorageStyle('----:com.apple.iTunes:initialkey'), - StorageStyle('INITIALKEY'), - ASFStorageStyle('INITIALKEY'), - ) - - @property - def length(self): - """The duration of the audio in seconds (a float).""" - return self.mgfile.info.length - - @property - def samplerate(self): - """The audio's sample rate (an int).""" - if hasattr(self.mgfile.info, 'sample_rate'): - return self.mgfile.info.sample_rate - elif self.type == 'opus': - # Opus is always 48kHz internally. - return 48000 - return 0 - - @property - def bitdepth(self): - """The number of bits per sample in the audio encoding (an int). - Only available for certain file formats (zero where - unavailable). - """ - if hasattr(self.mgfile.info, 'bits_per_sample'): - return self.mgfile.info.bits_per_sample - return 0 - - @property - def channels(self): - """The number of channels in the audio (an int).""" - if isinstance(self.mgfile.info, mutagen.mp3.MPEGInfo): - return { - mutagen.mp3.STEREO: 2, - mutagen.mp3.JOINTSTEREO: 2, - mutagen.mp3.DUALCHANNEL: 2, - mutagen.mp3.MONO: 1, - }[self.mgfile.info.mode] - if hasattr(self.mgfile.info, 'channels'): - return self.mgfile.info.channels - return 0 - - @property - def bitrate(self): - """The number of bits per seconds used in the audio coding (an - int). If this is provided explicitly by the compressed file - format, this is a precise reflection of the encoding. Otherwise, - it is estimated from the on-disk file size. In this case, some - imprecision is possible because the file header is incorporated - in the file size. - """ - if hasattr(self.mgfile.info, 'bitrate') and self.mgfile.info.bitrate: - # Many formats provide it explicitly. - return self.mgfile.info.bitrate - else: - # Otherwise, we calculate bitrate from the file size. (This - # is the case for all of the lossless formats.) - if not self.length: - # Avoid division by zero if length is not available. - return 0 - size = os.path.getsize(self.path) - return int(size * 8 / self.length) - - @property - def format(self): - """A string describing the file format/codec.""" - return TYPES[self.type] diff --git a/lib/beets/plugins.py b/lib/beets/plugins.py deleted file mode 100755 index 8611b92a..00000000 --- a/lib/beets/plugins.py +++ /dev/null @@ -1,435 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Support for beets plugins.""" - -import logging -import traceback -import inspect -import re -from collections import defaultdict - - -import beets -from beets import mediafile - -PLUGIN_NAMESPACE = 'beetsplug' - -# Plugins using the Last.fm API can share the same API key. -LASTFM_KEY = '2dc3914abf35f0d9c92d97d8f8e42b43' - -# Global logger. -log = logging.getLogger('beets') - - -class PluginConflictException(Exception): - """Indicates that the services provided by one plugin conflict with - those of another. - - For example two plugins may define different types for flexible fields. - """ - - -# Managing the plugins themselves. - -class BeetsPlugin(object): - """The base class for all beets plugins. Plugins provide - functionality by defining a subclass of BeetsPlugin and overriding - the abstract methods defined here. - """ - def __init__(self, name=None): - """Perform one-time plugin setup. - """ - self.import_stages = [] - self.name = name or self.__module__.split('.')[-1] - self.config = beets.config[self.name] - if not self.template_funcs: - self.template_funcs = {} - if not self.template_fields: - self.template_fields = {} - if not self.album_template_fields: - self.album_template_fields = {} - - def commands(self): - """Should return a list of beets.ui.Subcommand objects for - commands that should be added to beets' CLI. - """ - return () - - def queries(self): - """Should return a dict mapping prefixes to Query subclasses. - """ - return {} - - def track_distance(self, item, info): - """Should return a Distance object to be added to the - distance for every track comparison. - """ - return beets.autotag.hooks.Distance() - - def album_distance(self, items, album_info, mapping): - """Should return a Distance object to be added to the - distance for every album-level comparison. - """ - return beets.autotag.hooks.Distance() - - def candidates(self, items, artist, album, va_likely): - """Should return a sequence of AlbumInfo objects that match the - album whose items are provided. - """ - return () - - def item_candidates(self, item, artist, title): - """Should return a sequence of TrackInfo objects that match the - item provided. - """ - return () - - def album_for_id(self, album_id): - """Return an AlbumInfo object or None if no matching release was - found. - """ - return None - - def track_for_id(self, track_id): - """Return a TrackInfo object or None if no matching release was - found. - """ - return None - - def add_media_field(self, name, descriptor): - """Add a field that is synchronized between media files and items. - - When a media field is added ``item.write()`` will set the name - property of the item's MediaFile to ``item[name]`` and save the - changes. Similarly ``item.read()`` will set ``item[name]`` to - the value of the name property of the media file. - - ``descriptor`` must be an instance of ``mediafile.MediaField``. - """ - # Defer impor to prevent circular dependency - from beets import library - mediafile.MediaFile.add_field(name, descriptor) - library.Item._media_fields.add(name) - - listeners = None - - @classmethod - def register_listener(cls, event, func): - """Add a function as a listener for the specified event. (An - imperative alternative to the @listen decorator.) - """ - if cls.listeners is None: - cls.listeners = defaultdict(list) - cls.listeners[event].append(func) - - @classmethod - def listen(cls, event): - """Decorator that adds a function as an event handler for the - specified event (as a string). The parameters passed to function - will vary depending on what event occurred. - - The function should respond to named parameters. - function(**kwargs) will trap all arguments in a dictionary. - Example: - - >>> @MyPlugin.listen("imported") - >>> def importListener(**kwargs): - ... pass - """ - def helper(func): - if cls.listeners is None: - cls.listeners = defaultdict(list) - cls.listeners[event].append(func) - return func - return helper - - template_funcs = None - template_fields = None - album_template_fields = None - - @classmethod - def template_func(cls, name): - """Decorator that registers a path template function. The - function will be invoked as ``%name{}`` from path format - strings. - """ - def helper(func): - if cls.template_funcs is None: - cls.template_funcs = {} - cls.template_funcs[name] = func - return func - return helper - - @classmethod - def template_field(cls, name): - """Decorator that registers a path template field computation. - The value will be referenced as ``$name`` from path format - strings. The function must accept a single parameter, the Item - being formatted. - """ - def helper(func): - if cls.template_fields is None: - cls.template_fields = {} - cls.template_fields[name] = func - return func - return helper - - -_classes = set() - - -def load_plugins(names=()): - """Imports the modules for a sequence of plugin names. Each name - must be the name of a Python module under the "beetsplug" namespace - package in sys.path; the module indicated should contain the - BeetsPlugin subclasses desired. - """ - for name in names: - modname = '%s.%s' % (PLUGIN_NAMESPACE, name) - try: - try: - namespace = __import__(modname, None, None) - except ImportError as exc: - # Again, this is hacky: - if exc.args[0].endswith(' ' + name): - log.warn(u'** plugin {0} not found'.format(name)) - else: - raise - else: - for obj in getattr(namespace, name).__dict__.values(): - if isinstance(obj, type) and issubclass(obj, BeetsPlugin) \ - and obj != BeetsPlugin and obj not in _classes: - _classes.add(obj) - - except: - log.warn(u'** error loading plugin {0}'.format(name)) - log.warn(traceback.format_exc()) - - -_instances = {} - - -def find_plugins(): - """Returns a list of BeetsPlugin subclass instances from all - currently loaded beets plugins. Loads the default plugin set - first. - """ - load_plugins() - plugins = [] - for cls in _classes: - # Only instantiate each plugin class once. - if cls not in _instances: - _instances[cls] = cls() - plugins.append(_instances[cls]) - return plugins - - -# Communication with plugins. - -def commands(): - """Returns a list of Subcommand objects from all loaded plugins. - """ - out = [] - for plugin in find_plugins(): - out += plugin.commands() - return out - - -def queries(): - """Returns a dict mapping prefix strings to Query subclasses all loaded - plugins. - """ - out = {} - for plugin in find_plugins(): - out.update(plugin.queries()) - return out - - -def types(model_cls): - # Gives us `item_types` and `album_types` - attr_name = '{0}_types'.format(model_cls.__name__.lower()) - types = {} - for plugin in find_plugins(): - plugin_types = getattr(plugin, attr_name, {}) - for field in plugin_types: - if field in types and plugin_types[field] != types[field]: - raise PluginConflictException( - u'Plugin {0} defines flexible field {1} ' - 'which has already been defined with ' - 'another type.'.format(plugin.name, field) - ) - types.update(plugin_types) - return types - - -def track_distance(item, info): - """Gets the track distance calculated by all loaded plugins. - Returns a Distance object. - """ - from beets.autotag.hooks import Distance - dist = Distance() - for plugin in find_plugins(): - dist.update(plugin.track_distance(item, info)) - return dist - - -def album_distance(items, album_info, mapping): - """Returns the album distance calculated by plugins.""" - from beets.autotag.hooks import Distance - dist = Distance() - for plugin in find_plugins(): - dist.update(plugin.album_distance(items, album_info, mapping)) - return dist - - -def candidates(items, artist, album, va_likely): - """Gets MusicBrainz candidates for an album from each plugin. - """ - out = [] - for plugin in find_plugins(): - out.extend(plugin.candidates(items, artist, album, va_likely)) - return out - - -def item_candidates(item, artist, title): - """Gets MusicBrainz candidates for an item from the plugins. - """ - out = [] - for plugin in find_plugins(): - out.extend(plugin.item_candidates(item, artist, title)) - return out - - -def album_for_id(album_id): - """Get AlbumInfo objects for a given ID string. - """ - out = [] - for plugin in find_plugins(): - res = plugin.album_for_id(album_id) - if res: - out.append(res) - return out - - -def track_for_id(track_id): - """Get TrackInfo objects for a given ID string. - """ - out = [] - for plugin in find_plugins(): - res = plugin.track_for_id(track_id) - if res: - out.append(res) - return out - - -def template_funcs(): - """Get all the template functions declared by plugins as a - dictionary. - """ - funcs = {} - for plugin in find_plugins(): - if plugin.template_funcs: - funcs.update(plugin.template_funcs) - return funcs - - -def import_stages(): - """Get a list of import stage functions defined by plugins.""" - stages = [] - for plugin in find_plugins(): - if hasattr(plugin, 'import_stages'): - stages += plugin.import_stages - return stages - - -# New-style (lazy) plugin-provided fields. - -def item_field_getters(): - """Get a dictionary mapping field names to unary functions that - compute the field's value. - """ - funcs = {} - for plugin in find_plugins(): - if plugin.template_fields: - funcs.update(plugin.template_fields) - return funcs - - -def album_field_getters(): - """As above, for album fields. - """ - funcs = {} - for plugin in find_plugins(): - if plugin.album_template_fields: - funcs.update(plugin.album_template_fields) - return funcs - - -# Event dispatch. - -def event_handlers(): - """Find all event handlers from plugins as a dictionary mapping - event names to sequences of callables. - """ - all_handlers = defaultdict(list) - for plugin in find_plugins(): - if plugin.listeners: - for event, handlers in plugin.listeners.items(): - all_handlers[event] += handlers - return all_handlers - - -def send(event, **arguments): - """Sends an event to all assigned event listeners. Event is the - name of the event to send, all other named arguments go to the - event handler(s). - - Returns a list of return values from the handlers. - """ - log.debug(u'Sending event: {0}'.format(event)) - for handler in event_handlers()[event]: - # Don't break legacy plugins if we want to pass more arguments - argspec = inspect.getargspec(handler).args - args = dict((k, v) for k, v in arguments.items() if k in argspec) - handler(**args) - - -def feat_tokens(for_artist=True): - """Return a regular expression that matches phrases like "featuring" - that separate a main artist or a song title from secondary artists. - The `for_artist` option determines whether the regex should be - suitable for matching artist fields (the default) or title fields. - """ - feat_words = ['ft', 'featuring', 'feat', 'feat.', 'ft.'] - if for_artist: - feat_words += ['with', 'vs', 'and', 'con', '&'] - return '(?<=\s)(?:{0})(?=\s)'.format( - '|'.join(re.escape(x) for x in feat_words) - ) - - -def sanitize_choices(choices, choices_all): - """Clean up a stringlist configuration attribute: keep only choices - elements present in choices_all, remove duplicate elements, expand '*' - wildcard while keeping original stringlist order. - """ - seen = set() - others = [x for x in choices_all if x not in choices] - res = [] - for s in choices: - if s in list(choices_all) + ['*']: - if not (s in seen or seen.add(s)): - res.extend(list(others) if s == '*' else [s]) - return res diff --git a/lib/beets/ui/__init__.py b/lib/beets/ui/__init__.py deleted file mode 100644 index 8978ff54..00000000 --- a/lib/beets/ui/__init__.py +++ /dev/null @@ -1,970 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""This module contains all of the core logic for beets' command-line -interface. To invoke the CLI, just call beets.ui.main(). The actual -CLI commands are implemented in the ui.commands module. -""" -from __future__ import print_function - -import locale -import optparse -import textwrap -import sys -from difflib import SequenceMatcher -import logging -import sqlite3 -import errno -import re -import struct -import traceback -import os.path - -from beets import library -from beets import plugins -from beets import util -from beets.util.functemplate import Template -from beets import config -from beets.util import confit -from beets.autotag import mb - -# On Windows platforms, use colorama to support "ANSI" terminal colors. -if sys.platform == 'win32': - try: - import colorama - except ImportError: - pass - else: - colorama.init() - - -log = logging.getLogger('beets') -if not log.handlers: - log.addHandler(logging.StreamHandler()) -log.propagate = False # Don't propagate to root handler. - - -PF_KEY_QUERIES = { - 'comp': 'comp:true', - 'singleton': 'singleton:true', -} - - -class UserError(Exception): - """UI exception. Commands should throw this in order to display - nonrecoverable errors to the user. - """ - - -# Utilities. - -def _encoding(): - """Tries to guess the encoding used by the terminal.""" - # Configured override? - encoding = config['terminal_encoding'].get() - if encoding: - return encoding - - # Determine from locale settings. - try: - return locale.getdefaultlocale()[1] or 'utf8' - except ValueError: - # Invalid locale environment variable setting. To avoid - # failing entirely for no good reason, assume UTF-8. - return 'utf8' - - -def decargs(arglist): - """Given a list of command-line argument bytestrings, attempts to - decode them to Unicode strings. - """ - return [s.decode(_encoding()) for s in arglist] - - -def print_(*strings): - """Like print, but rather than raising an error when a character - is not in the terminal's encoding's character set, just silently - replaces it. - """ - if strings: - if isinstance(strings[0], unicode): - txt = u' '.join(strings) - else: - txt = ' '.join(strings) - else: - txt = u'' - if isinstance(txt, unicode): - txt = txt.encode(_encoding(), 'replace') - print(txt) - - -def input_(prompt=None): - """Like `raw_input`, but decodes the result to a Unicode string. - Raises a UserError if stdin is not available. The prompt is sent to - stdout rather than stderr. A printed between the prompt and the - input cursor. - """ - # raw_input incorrectly sends prompts to stderr, not stdout, so we - # use print() explicitly to display prompts. - # http://bugs.python.org/issue1927 - if prompt: - if isinstance(prompt, unicode): - prompt = prompt.encode(_encoding(), 'replace') - print(prompt, end=' ') - - try: - resp = raw_input() - except EOFError: - raise UserError('stdin stream ended while input required') - - return resp.decode(sys.stdin.encoding or 'utf8', 'ignore') - - -def input_options(options, require=False, prompt=None, fallback_prompt=None, - numrange=None, default=None, max_width=72): - """Prompts a user for input. The sequence of `options` defines the - choices the user has. A single-letter shortcut is inferred for each - option; the user's choice is returned as that single, lower-case - letter. The options should be provided as lower-case strings unless - a particular shortcut is desired; in that case, only that letter - should be capitalized. - - By default, the first option is the default. `default` can be provided to - override this. If `require` is provided, then there is no default. The - prompt and fallback prompt are also inferred but can be overridden. - - If numrange is provided, it is a pair of `(high, low)` (both ints) - indicating that, in addition to `options`, the user may enter an - integer in that inclusive range. - - `max_width` specifies the maximum number of columns in the - automatically generated prompt string. - """ - # Assign single letters to each option. Also capitalize the options - # to indicate the letter. - letters = {} - display_letters = [] - capitalized = [] - first = True - for option in options: - # Is a letter already capitalized? - for letter in option: - if letter.isalpha() and letter.upper() == letter: - found_letter = letter - break - else: - # Infer a letter. - for letter in option: - if not letter.isalpha(): - continue # Don't use punctuation. - if letter not in letters: - found_letter = letter - break - else: - raise ValueError('no unambiguous lettering found') - - letters[found_letter.lower()] = option - index = option.index(found_letter) - - # Mark the option's shortcut letter for display. - if not require and ( - (default is None and not numrange and first) or - (isinstance(default, basestring) and - found_letter.lower() == default.lower())): - # The first option is the default; mark it. - show_letter = '[%s]' % found_letter.upper() - is_default = True - else: - show_letter = found_letter.upper() - is_default = False - - # Colorize the letter shortcut. - show_letter = colorize('turquoise' if is_default else 'blue', - show_letter) - - # Insert the highlighted letter back into the word. - capitalized.append( - option[:index] + show_letter + option[index + 1:] - ) - display_letters.append(found_letter.upper()) - - first = False - - # The default is just the first option if unspecified. - if require: - default = None - elif default is None: - if numrange: - default = numrange[0] - else: - default = display_letters[0].lower() - - # Make a prompt if one is not provided. - if not prompt: - prompt_parts = [] - prompt_part_lengths = [] - if numrange: - if isinstance(default, int): - default_name = str(default) - default_name = colorize('turquoise', default_name) - tmpl = '# selection (default %s)' - prompt_parts.append(tmpl % default_name) - prompt_part_lengths.append(len(tmpl % str(default))) - else: - prompt_parts.append('# selection') - prompt_part_lengths.append(len(prompt_parts[-1])) - prompt_parts += capitalized - prompt_part_lengths += [len(s) for s in options] - - # Wrap the query text. - prompt = '' - line_length = 0 - for i, (part, length) in enumerate(zip(prompt_parts, - prompt_part_lengths)): - # Add punctuation. - if i == len(prompt_parts) - 1: - part += '?' - else: - part += ',' - length += 1 - - # Choose either the current line or the beginning of the next. - if line_length + length + 1 > max_width: - prompt += '\n' - line_length = 0 - - if line_length != 0: - # Not the beginning of the line; need a space. - part = ' ' + part - length += 1 - - prompt += part - line_length += length - - # Make a fallback prompt too. This is displayed if the user enters - # something that is not recognized. - if not fallback_prompt: - fallback_prompt = 'Enter one of ' - if numrange: - fallback_prompt += '%i-%i, ' % numrange - fallback_prompt += ', '.join(display_letters) + ':' - - resp = input_(prompt) - while True: - resp = resp.strip().lower() - - # Try default option. - if default is not None and not resp: - resp = default - - # Try an integer input if available. - if numrange: - try: - resp = int(resp) - except ValueError: - pass - else: - low, high = numrange - if low <= resp <= high: - return resp - else: - resp = None - - # Try a normal letter input. - if resp: - resp = resp[0] - if resp in letters: - return resp - - # Prompt for new input. - resp = input_(fallback_prompt) - - -def input_yn(prompt, require=False): - """Prompts the user for a "yes" or "no" response. The default is - "yes" unless `require` is `True`, in which case there is no default. - """ - sel = input_options( - ('y', 'n'), require, prompt, 'Enter Y or N:' - ) - return sel == 'y' - - -def human_bytes(size): - """Formats size, a number of bytes, in a human-readable way.""" - suffices = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB', 'HB'] - for suffix in suffices: - if size < 1024: - return "%3.1f %s" % (size, suffix) - size /= 1024.0 - return "big" - - -def human_seconds(interval): - """Formats interval, a number of seconds, as a human-readable time - interval using English words. - """ - units = [ - (1, 'second'), - (60, 'minute'), - (60, 'hour'), - (24, 'day'), - (7, 'week'), - (52, 'year'), - (10, 'decade'), - ] - for i in range(len(units) - 1): - increment, suffix = units[i] - next_increment, _ = units[i + 1] - interval /= float(increment) - if interval < next_increment: - break - else: - # Last unit. - increment, suffix = units[-1] - interval /= float(increment) - - return "%3.1f %ss" % (interval, suffix) - - -def human_seconds_short(interval): - """Formats a number of seconds as a short human-readable M:SS - string. - """ - interval = int(interval) - return u'%i:%02i' % (interval // 60, interval % 60) - - -# ANSI terminal colorization code heavily inspired by pygments: -# http://dev.pocoo.org/hg/pygments-main/file/b2deea5b5030/pygments/console.py -# (pygments is by Tim Hatch, Armin Ronacher, et al.) -COLOR_ESCAPE = "\x1b[" -DARK_COLORS = ["black", "darkred", "darkgreen", "brown", "darkblue", - "purple", "teal", "lightgray"] -LIGHT_COLORS = ["darkgray", "red", "green", "yellow", "blue", - "fuchsia", "turquoise", "white"] -RESET_COLOR = COLOR_ESCAPE + "39;49;00m" - - -def _colorize(color, text): - """Returns a string that prints the given text in the given color - in a terminal that is ANSI color-aware. The color must be something - in DARK_COLORS or LIGHT_COLORS. - """ - if color in DARK_COLORS: - escape = COLOR_ESCAPE + "%im" % (DARK_COLORS.index(color) + 30) - elif color in LIGHT_COLORS: - escape = COLOR_ESCAPE + "%i;01m" % (LIGHT_COLORS.index(color) + 30) - else: - raise ValueError('no such color %s', color) - return escape + text + RESET_COLOR - - -def colorize(color, text): - """Colorize text if colored output is enabled. (Like _colorize but - conditional.) - """ - if config['color']: - return _colorize(color, text) - else: - return text - - -def _colordiff(a, b, highlight='red', minor_highlight='lightgray'): - """Given two values, return the same pair of strings except with - their differences highlighted in the specified color. Strings are - highlighted intelligently to show differences; other values are - stringified and highlighted in their entirety. - """ - if not isinstance(a, basestring) or not isinstance(b, basestring): - # Non-strings: use ordinary equality. - a = unicode(a) - b = unicode(b) - if a == b: - return a, b - else: - return colorize(highlight, a), colorize(highlight, b) - - if isinstance(a, bytes) or isinstance(b, bytes): - # A path field. - a = util.displayable_path(a) - b = util.displayable_path(b) - - a_out = [] - b_out = [] - - matcher = SequenceMatcher(lambda x: False, a, b) - for op, a_start, a_end, b_start, b_end in matcher.get_opcodes(): - if op == 'equal': - # In both strings. - a_out.append(a[a_start:a_end]) - b_out.append(b[b_start:b_end]) - elif op == 'insert': - # Right only. - b_out.append(colorize(highlight, b[b_start:b_end])) - elif op == 'delete': - # Left only. - a_out.append(colorize(highlight, a[a_start:a_end])) - elif op == 'replace': - # Right and left differ. Colorise with second highlight if - # it's just a case change. - if a[a_start:a_end].lower() != b[b_start:b_end].lower(): - color = highlight - else: - color = minor_highlight - a_out.append(colorize(color, a[a_start:a_end])) - b_out.append(colorize(color, b[b_start:b_end])) - else: - assert(False) - - return u''.join(a_out), u''.join(b_out) - - -def colordiff(a, b, highlight='red'): - """Colorize differences between two values if color is enabled. - (Like _colordiff but conditional.) - """ - if config['color']: - return _colordiff(a, b, highlight) - else: - return unicode(a), unicode(b) - - -def get_path_formats(subview=None): - """Get the configuration's path formats as a list of query/template - pairs. - """ - path_formats = [] - subview = subview or config['paths'] - for query, view in subview.items(): - query = PF_KEY_QUERIES.get(query, query) # Expand common queries. - path_formats.append((query, Template(view.get(unicode)))) - return path_formats - - -def get_replacements(): - """Confit validation function that reads regex/string pairs. - """ - replacements = [] - for pattern, repl in config['replace'].get(dict).items(): - repl = repl or '' - try: - replacements.append((re.compile(pattern), repl)) - except re.error: - raise UserError( - u'malformed regular expression in replace: {0}'.format( - pattern - ) - ) - return replacements - - -def _pick_format(album, fmt=None): - """Pick a format string for printing Album or Item objects, - falling back to config options and defaults. - """ - if fmt: - return fmt - if album: - return config['list_format_album'].get(unicode) - else: - return config['list_format_item'].get(unicode) - - -def print_obj(obj, lib, fmt=None): - """Print an Album or Item object. If `fmt` is specified, use that - format string. Otherwise, use the configured template. - """ - album = isinstance(obj, library.Album) - fmt = _pick_format(album, fmt) - if isinstance(fmt, Template): - template = fmt - else: - template = Template(fmt) - print_(obj.evaluate_template(template)) - - -def term_width(): - """Get the width (columns) of the terminal.""" - fallback = config['ui']['terminal_width'].get(int) - - # The fcntl and termios modules are not available on non-Unix - # platforms, so we fall back to a constant. - try: - import fcntl - import termios - except ImportError: - return fallback - - try: - buf = fcntl.ioctl(0, termios.TIOCGWINSZ, ' ' * 4) - except IOError: - return fallback - try: - height, width = struct.unpack('hh', buf) - except struct.error: - return fallback - return width - - -FLOAT_EPSILON = 0.01 - - -def _field_diff(field, old, new): - """Given two Model objects, format their values for `field` and - highlight changes among them. Return a human-readable string. If the - value has not changed, return None instead. - """ - oldval = old.get(field) - newval = new.get(field) - - # If no change, abort. - if isinstance(oldval, float) and isinstance(newval, float) and \ - abs(oldval - newval) < FLOAT_EPSILON: - return None - elif oldval == newval: - return None - - # Get formatted values for output. - oldstr = old.formatted().get(field, u'') - newstr = new.formatted().get(field, u'') - - # For strings, highlight changes. For others, colorize the whole - # thing. - if isinstance(oldval, basestring): - oldstr, newstr = colordiff(oldval, newstr) - else: - oldstr, newstr = colorize('red', oldstr), colorize('red', newstr) - - return u'{0} -> {1}'.format(oldstr, newstr) - - -def show_model_changes(new, old=None, fields=None, always=False): - """Given a Model object, print a list of changes from its pristine - version stored in the database. Return a boolean indicating whether - any changes were found. - - `old` may be the "original" object to avoid using the pristine - version from the database. `fields` may be a list of fields to - restrict the detection to. `always` indicates whether the object is - always identified, regardless of whether any changes are present. - """ - old = old or new._db._get(type(new), new.id) - - # Build up lines showing changed fields. - changes = [] - for field in old: - # Subset of the fields. Never show mtime. - if field == 'mtime' or (fields and field not in fields): - continue - - # Detect and show difference for this field. - line = _field_diff(field, old, new) - if line: - changes.append(u' {0}: {1}'.format(field, line)) - - # New fields. - for field in set(new) - set(old): - if fields and field not in fields: - continue - - changes.append(u' {0}: {1}'.format( - field, - colorize('red', new.formatted()[field]) - )) - - # Print changes. - if changes or always: - print_obj(old, old._db) - if changes: - print_(u'\n'.join(changes)) - - return bool(changes) - - -# Subcommand parsing infrastructure. -# -# This is a fairly generic subcommand parser for optparse. It is -# maintained externally here: -# http://gist.github.com/462717 -# There you will also find a better description of the code and a more -# succinct example program. - -class Subcommand(object): - """A subcommand of a root command-line application that may be - invoked by a SubcommandOptionParser. - """ - def __init__(self, name, parser=None, help='', aliases=(), hide=False): - """Creates a new subcommand. name is the primary way to invoke - the subcommand; aliases are alternate names. parser is an - OptionParser responsible for parsing the subcommand's options. - help is a short description of the command. If no parser is - given, it defaults to a new, empty OptionParser. - """ - self.name = name - self.parser = parser or optparse.OptionParser() - self.aliases = aliases - self.help = help - self.hide = hide - self._root_parser = None - - def print_help(self): - self.parser.print_help() - - def parse_args(self, args): - return self.parser.parse_args(args) - - @property - def root_parser(self): - return self._root_parser - - @root_parser.setter - def root_parser(self, root_parser): - self._root_parser = root_parser - self.parser.prog = '{0} {1}'.format(root_parser.get_prog_name(), - self.name) - - -class SubcommandsOptionParser(optparse.OptionParser): - """A variant of OptionParser that parses subcommands and their - arguments. - """ - - def __init__(self, *args, **kwargs): - """Create a new subcommand-aware option parser. All of the - options to OptionParser.__init__ are supported in addition - to subcommands, a sequence of Subcommand objects. - """ - # A more helpful default usage. - if 'usage' not in kwargs: - kwargs['usage'] = """ - %prog COMMAND [ARGS...] - %prog help COMMAND""" - kwargs['add_help_option'] = False - - # Super constructor. - optparse.OptionParser.__init__(self, *args, **kwargs) - - # Our root parser needs to stop on the first unrecognized argument. - self.disable_interspersed_args() - - self.subcommands = [] - - def add_subcommand(self, *cmds): - """Adds a Subcommand object to the parser's list of commands. - """ - for cmd in cmds: - cmd.root_parser = self - self.subcommands.append(cmd) - - # Add the list of subcommands to the help message. - def format_help(self, formatter=None): - # Get the original help message, to which we will append. - out = optparse.OptionParser.format_help(self, formatter) - if formatter is None: - formatter = self.formatter - - # Subcommands header. - result = ["\n"] - result.append(formatter.format_heading('Commands')) - formatter.indent() - - # Generate the display names (including aliases). - # Also determine the help position. - disp_names = [] - help_position = 0 - subcommands = [c for c in self.subcommands if not c.hide] - subcommands.sort(key=lambda c: c.name) - for subcommand in subcommands: - name = subcommand.name - if subcommand.aliases: - name += ' (%s)' % ', '.join(subcommand.aliases) - disp_names.append(name) - - # Set the help position based on the max width. - proposed_help_position = len(name) + formatter.current_indent + 2 - if proposed_help_position <= formatter.max_help_position: - help_position = max(help_position, proposed_help_position) - - # Add each subcommand to the output. - for subcommand, name in zip(subcommands, disp_names): - # Lifted directly from optparse.py. - name_width = help_position - formatter.current_indent - 2 - if len(name) > name_width: - name = "%*s%s\n" % (formatter.current_indent, "", name) - indent_first = help_position - else: - name = "%*s%-*s " % (formatter.current_indent, "", - name_width, name) - indent_first = 0 - result.append(name) - help_width = formatter.width - help_position - help_lines = textwrap.wrap(subcommand.help, help_width) - result.append("%*s%s\n" % (indent_first, "", help_lines[0])) - result.extend(["%*s%s\n" % (help_position, "", line) - for line in help_lines[1:]]) - formatter.dedent() - - # Concatenate the original help message with the subcommand - # list. - return out + "".join(result) - - def _subcommand_for_name(self, name): - """Return the subcommand in self.subcommands matching the - given name. The name may either be the name of a subcommand or - an alias. If no subcommand matches, returns None. - """ - for subcommand in self.subcommands: - if name == subcommand.name or \ - name in subcommand.aliases: - return subcommand - return None - - def parse_global_options(self, args): - """Parse options up to the subcommand argument. Returns a tuple - of the options object and the remaining arguments. - """ - options, subargs = self.parse_args(args) - - # Force the help command - if options.help: - subargs = ['help'] - elif options.version: - subargs = ['version'] - return options, subargs - - def parse_subcommand(self, args): - """Given the `args` left unused by a `parse_global_options`, - return the invoked subcommand, the subcommand options, and the - subcommand arguments. - """ - # Help is default command - if not args: - args = ['help'] - - cmdname = args.pop(0) - subcommand = self._subcommand_for_name(cmdname) - if not subcommand: - raise UserError("unknown command '{0}'".format(cmdname)) - - suboptions, subargs = subcommand.parse_args(args) - return subcommand, suboptions, subargs - - -optparse.Option.ALWAYS_TYPED_ACTIONS += ('callback',) - - -def vararg_callback(option, opt_str, value, parser): - """Callback for an option with variable arguments. - Manually collect arguments right of a callback-action - option (ie. with action="callback"), and add the resulting - list to the destination var. - - Usage: - parser.add_option("-c", "--callback", dest="vararg_attr", - action="callback", callback=vararg_callback) - - Details: - http://docs.python.org/2/library/optparse.html#callback-example-6-variable - -arguments - """ - value = [value] - - def floatable(str): - try: - float(str) - return True - except ValueError: - return False - - for arg in parser.rargs: - # stop on --foo like options - if arg[:2] == "--" and len(arg) > 2: - break - # stop on -a, but not on -3 or -3.0 - if arg[:1] == "-" and len(arg) > 1 and not floatable(arg): - break - value.append(arg) - - del parser.rargs[:len(value) - 1] - setattr(parser.values, option.dest, value) - - -# The main entry point and bootstrapping. - -def _load_plugins(config): - """Load the plugins specified in the configuration. - """ - paths = config['pluginpath'].get(confit.StrSeq(split=False)) - paths = map(util.normpath, paths) - - import beetsplug - beetsplug.__path__ = paths + beetsplug.__path__ - # For backwards compatibility. - sys.path += paths - - plugins.load_plugins(config['plugins'].as_str_seq()) - plugins.send("pluginload") - return plugins - - -def _setup(options, lib=None): - """Prepare and global state and updates it with command line options. - - Returns a list of subcommands, a list of plugins, and a library instance. - """ - # Configure the MusicBrainz API. - mb.configure() - - config = _configure(options) - - plugins = _load_plugins(config) - - # Get the default subcommands. - from beets.ui.commands import default_commands - - subcommands = list(default_commands) - subcommands.extend(plugins.commands()) - - if lib is None: - lib = _open_library(config) - plugins.send("library_opened", lib=lib) - library.Item._types = plugins.types(library.Item) - library.Album._types = plugins.types(library.Album) - - return subcommands, plugins, lib - - -def _configure(options): - """Amend the global configuration object with command line options. - """ - # Add any additional config files specified with --config. This - # special handling lets specified plugins get loaded before we - # finish parsing the command line. - if getattr(options, 'config', None) is not None: - config_path = options.config - del options.config - config.set_file(config_path) - config.set_args(options) - - # Configure the logger. - if config['verbose'].get(bool): - log.setLevel(logging.DEBUG) - else: - log.setLevel(logging.INFO) - - config_path = config.user_config_path() - if os.path.isfile(config_path): - log.debug(u'user configuration: {0}'.format( - util.displayable_path(config_path))) - else: - log.debug(u'no user configuration found at {0}'.format( - util.displayable_path(config_path))) - - log.debug(u'data directory: {0}' - .format(util.displayable_path(config.config_dir()))) - return config - - -def _open_library(config): - """Create a new library instance from the configuration. - """ - dbpath = config['library'].as_filename() - try: - lib = library.Library( - dbpath, - config['directory'].as_filename(), - get_path_formats(), - get_replacements(), - ) - lib.get_item(0) # Test database connection. - except (sqlite3.OperationalError, sqlite3.DatabaseError): - log.debug(traceback.format_exc()) - raise UserError(u"database file {0} could not be opened".format( - util.displayable_path(dbpath) - )) - log.debug(u'library database: {0}\n' - u'library directory: {1}' - .format(util.displayable_path(lib.path), - util.displayable_path(lib.directory))) - return lib - - -def _raw_main(args, lib=None): - """A helper function for `main` without top-level exception - handling. - """ - parser = SubcommandsOptionParser() - parser.add_option('-l', '--library', dest='library', - help='library database file to use') - parser.add_option('-d', '--directory', dest='directory', - help="destination music directory") - parser.add_option('-v', '--verbose', dest='verbose', action='store_true', - help='print debugging information') - parser.add_option('-c', '--config', dest='config', - help='path to configuration file') - parser.add_option('-h', '--help', dest='help', action='store_true', - help='how this help message and exit') - parser.add_option('--version', dest='version', action='store_true', - help=optparse.SUPPRESS_HELP) - - options, subargs = parser.parse_global_options(args) - - # Special case for the `config --edit` command: bypass _setup so - # that an invalid configuration does not prevent the editor from - # starting. - if subargs[0] == 'config' and ('-e' in subargs or '--edit' in subargs): - from beets.ui.commands import config_edit - return config_edit() - - subcommands, plugins, lib = _setup(options, lib) - parser.add_subcommand(*subcommands) - - subcommand, suboptions, subargs = parser.parse_subcommand(subargs) - subcommand.func(lib, suboptions, subargs) - - plugins.send('cli_exit', lib=lib) - - -def main(args=None): - """Run the main command-line interface for beets. Includes top-level - exception handlers that print friendly error messages. - """ - try: - _raw_main(args) - except UserError as exc: - message = exc.args[0] if exc.args else None - log.error(u'error: {0}'.format(message)) - sys.exit(1) - except util.HumanReadableException as exc: - exc.log(log) - sys.exit(1) - except library.FileOperationError as exc: - # These errors have reasonable human-readable descriptions, but - # we still want to log their tracebacks for debugging. - log.debug(traceback.format_exc()) - log.error(exc) - sys.exit(1) - except confit.ConfigError as exc: - log.error(u'configuration error: {0}'.format(exc)) - sys.exit(1) - except IOError as exc: - if exc.errno == errno.EPIPE: - # "Broken pipe". End silently. - pass - else: - raise - except KeyboardInterrupt: - # Silently ignore ^C except in verbose mode. - log.debug(traceback.format_exc()) diff --git a/lib/beets/ui/commands.py b/lib/beets/ui/commands.py deleted file mode 100644 index 4dfac11c..00000000 --- a/lib/beets/ui/commands.py +++ /dev/null @@ -1,1646 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""This module provides the default commands for beets' command-line -interface. -""" -from __future__ import print_function - -import logging -import os -import time -import codecs -import platform -import re -import shlex - -import beets -from beets import ui -from beets.ui import print_, input_, decargs -from beets import autotag -from beets.autotag import Recommendation -from beets.autotag import hooks -from beets import plugins -from beets import importer -from beets import util -from beets.util import syspath, normpath, ancestry, displayable_path -from beets.util.functemplate import Template -from beets import library -from beets import config -from beets.util.confit import _package_path - -VARIOUS_ARTISTS = u'Various Artists' - -# Global logger. -log = logging.getLogger('beets') - -# The list of default subcommands. This is populated with Subcommand -# objects that can be fed to a SubcommandsOptionParser. -default_commands = [] - - -# Utilities. - -def _do_query(lib, query, album, also_items=True): - """For commands that operate on matched items, performs a query - and returns a list of matching items and a list of matching - albums. (The latter is only nonempty when album is True.) Raises - a UserError if no items match. also_items controls whether, when - fetching albums, the associated items should be fetched also. - """ - if album: - albums = list(lib.albums(query)) - items = [] - if also_items: - for al in albums: - items += al.items() - - else: - albums = [] - items = list(lib.items(query)) - - if album and not albums: - raise ui.UserError('No matching albums found.') - elif not album and not items: - raise ui.UserError('No matching items found.') - - return items, albums - - -# fields: Shows a list of available fields for queries and format strings. - -def fields_func(lib, opts, args): - def _print_rows(names): - print(" " + "\n ".join(names)) - - def _show_plugin_fields(album): - plugin_fields = [] - for plugin in plugins.find_plugins(): - if album: - fdict = plugin.album_template_fields - else: - fdict = plugin.template_fields - plugin_fields += fdict.keys() - if plugin_fields: - print("Template fields from plugins:") - _print_rows(plugin_fields) - - print("Item fields:") - _print_rows(library.Item._fields.keys()) - _show_plugin_fields(False) - - print("\nAlbum fields:") - _print_rows(library.Album._fields.keys()) - _show_plugin_fields(True) - - -fields_cmd = ui.Subcommand( - 'fields', - help='show fields available for queries and format strings' -) -fields_cmd.func = fields_func -default_commands.append(fields_cmd) - - -# help: Print help text for commands - -class HelpCommand(ui.Subcommand): - - def __init__(self): - super(HelpCommand, self).__init__( - 'help', aliases=('?',), - help='give detailed help on a specific sub-command', - ) - - def func(self, lib, opts, args): - if args: - cmdname = args[0] - helpcommand = self.root_parser._subcommand_for_name(cmdname) - if not helpcommand: - raise ui.UserError("unknown command '{0}'".format(cmdname)) - helpcommand.print_help() - else: - self.root_parser.print_help() - - -default_commands.append(HelpCommand()) - - -# import: Autotagger and importer. - -# Importer utilities and support. - -def disambig_string(info): - """Generate a string for an AlbumInfo or TrackInfo object that - provides context that helps disambiguate similar-looking albums and - tracks. - """ - disambig = [] - if info.data_source and info.data_source != 'MusicBrainz': - disambig.append(info.data_source) - - if isinstance(info, hooks.AlbumInfo): - if info.media: - if info.mediums > 1: - disambig.append(u'{0}x{1}'.format( - info.mediums, info.media - )) - else: - disambig.append(info.media) - if info.year: - disambig.append(unicode(info.year)) - if info.country: - disambig.append(info.country) - if info.label: - disambig.append(info.label) - if info.albumdisambig: - disambig.append(info.albumdisambig) - - if disambig: - return u', '.join(disambig) - - -def dist_string(dist): - """Formats a distance (a float) as a colorized similarity percentage - string. - """ - out = '%.1f%%' % ((1 - dist) * 100) - if dist <= config['match']['strong_rec_thresh'].as_number(): - out = ui.colorize('green', out) - elif dist <= config['match']['medium_rec_thresh'].as_number(): - out = ui.colorize('yellow', out) - else: - out = ui.colorize('red', out) - return out - - -def penalty_string(distance, limit=None): - """Returns a colorized string that indicates all the penalties - applied to a distance object. - """ - penalties = [] - for key in distance.keys(): - key = key.replace('album_', '') - key = key.replace('track_', '') - key = key.replace('_', ' ') - penalties.append(key) - if penalties: - if limit and len(penalties) > limit: - penalties = penalties[:limit] + ['...'] - return ui.colorize('yellow', '(%s)' % ', '.join(penalties)) - - -def show_change(cur_artist, cur_album, match): - """Print out a representation of the changes that will be made if an - album's tags are changed according to `match`, which must be an AlbumMatch - object. - """ - def show_album(artist, album): - if artist: - album_description = u' %s - %s' % (artist, album) - elif album: - album_description = u' %s' % album - else: - album_description = u' (unknown album)' - print_(album_description) - - def format_index(track_info): - """Return a string representing the track index of the given - TrackInfo or Item object. - """ - if isinstance(track_info, hooks.TrackInfo): - index = track_info.index - medium_index = track_info.medium_index - medium = track_info.medium - mediums = match.info.mediums - else: - index = medium_index = track_info.track - medium = track_info.disc - mediums = track_info.disctotal - if config['per_disc_numbering']: - if mediums > 1: - return u'{0}-{1}'.format(medium, medium_index) - else: - return unicode(medium_index) - else: - return unicode(index) - - # Identify the album in question. - if cur_artist != match.info.artist or \ - (cur_album != match.info.album and - match.info.album != VARIOUS_ARTISTS): - artist_l, artist_r = cur_artist or '', match.info.artist - album_l, album_r = cur_album or '', match.info.album - if artist_r == VARIOUS_ARTISTS: - # Hide artists for VA releases. - artist_l, artist_r = u'', u'' - - artist_l, artist_r = ui.colordiff(artist_l, artist_r) - album_l, album_r = ui.colordiff(album_l, album_r) - - print_("Correcting tags from:") - show_album(artist_l, album_l) - print_("To:") - show_album(artist_r, album_r) - else: - print_(u"Tagging:\n {0.artist} - {0.album}".format(match.info)) - - # Data URL. - if match.info.data_url: - print_('URL:\n %s' % match.info.data_url) - - # Info line. - info = [] - # Similarity. - info.append('(Similarity: %s)' % dist_string(match.distance)) - # Penalties. - penalties = penalty_string(match.distance) - if penalties: - info.append(penalties) - # Disambiguation. - disambig = disambig_string(match.info) - if disambig: - info.append(ui.colorize('lightgray', '(%s)' % disambig)) - print_(' '.join(info)) - - # Tracks. - pairs = match.mapping.items() - pairs.sort(key=lambda (_, track_info): track_info.index) - - # Build up LHS and RHS for track difference display. The `lines` list - # contains ``(lhs, rhs, width)`` tuples where `width` is the length (in - # characters) of the uncolorized LHS. - lines = [] - medium = disctitle = None - for item, track_info in pairs: - - # Medium number and title. - if medium != track_info.medium or disctitle != track_info.disctitle: - media = match.info.media or 'Media' - if match.info.mediums > 1 and track_info.disctitle: - lhs = '%s %s: %s' % (media, track_info.medium, - track_info.disctitle) - elif match.info.mediums > 1: - lhs = '%s %s' % (media, track_info.medium) - elif track_info.disctitle: - lhs = '%s: %s' % (media, track_info.disctitle) - else: - lhs = None - if lhs: - lines.append((lhs, '', 0)) - medium, disctitle = track_info.medium, track_info.disctitle - - # Titles. - new_title = track_info.title - if not item.title.strip(): - # If there's no title, we use the filename. - cur_title = displayable_path(os.path.basename(item.path)) - lhs, rhs = cur_title, new_title - else: - cur_title = item.title.strip() - lhs, rhs = ui.colordiff(cur_title, new_title) - lhs_width = len(cur_title) - - # Track number change. - cur_track, new_track = format_index(item), format_index(track_info) - if cur_track != new_track: - if item.track in (track_info.index, track_info.medium_index): - color = 'lightgray' - else: - color = 'red' - templ = ui.colorize(color, u' (#{0})') - lhs += templ.format(cur_track) - rhs += templ.format(new_track) - lhs_width += len(cur_track) + 4 - - # Length change. - if item.length and track_info.length and \ - abs(item.length - track_info.length) > \ - config['ui']['length_diff_thresh'].as_number(): - cur_length = ui.human_seconds_short(item.length) - new_length = ui.human_seconds_short(track_info.length) - templ = ui.colorize('red', u' ({0})') - lhs += templ.format(cur_length) - rhs += templ.format(new_length) - lhs_width += len(cur_length) + 3 - - # Penalties. - penalties = penalty_string(match.distance.tracks[track_info]) - if penalties: - rhs += ' %s' % penalties - - if lhs != rhs: - lines.append((' * %s' % lhs, rhs, lhs_width)) - elif config['import']['detail']: - lines.append((' * %s' % lhs, '', lhs_width)) - - # Print each track in two columns, or across two lines. - col_width = (ui.term_width() - len(''.join([' * ', ' -> ']))) // 2 - if lines: - max_width = max(w for _, _, w in lines) - for lhs, rhs, lhs_width in lines: - if not rhs: - print_(lhs) - elif max_width > col_width: - print_(u'%s ->\n %s' % (lhs, rhs)) - else: - pad = max_width - lhs_width - print_(u'%s%s -> %s' % (lhs, ' ' * pad, rhs)) - - # Missing and unmatched tracks. - if match.extra_tracks: - print_('Missing tracks:') - for track_info in match.extra_tracks: - line = ' ! %s (#%s)' % (track_info.title, format_index(track_info)) - if track_info.length: - line += ' (%s)' % ui.human_seconds_short(track_info.length) - print_(ui.colorize('yellow', line)) - if match.extra_items: - print_('Unmatched tracks:') - for item in match.extra_items: - line = ' ! %s (#%s)' % (item.title, format_index(item)) - if item.length: - line += ' (%s)' % ui.human_seconds_short(item.length) - print_(ui.colorize('yellow', line)) - - -def show_item_change(item, match): - """Print out the change that would occur by tagging `item` with the - metadata from `match`, a TrackMatch object. - """ - cur_artist, new_artist = item.artist, match.info.artist - cur_title, new_title = item.title, match.info.title - - if cur_artist != new_artist or cur_title != new_title: - cur_artist, new_artist = ui.colordiff(cur_artist, new_artist) - cur_title, new_title = ui.colordiff(cur_title, new_title) - - print_("Correcting track tags from:") - print_(" %s - %s" % (cur_artist, cur_title)) - print_("To:") - print_(" %s - %s" % (new_artist, new_title)) - - else: - print_("Tagging track: %s - %s" % (cur_artist, cur_title)) - - # Data URL. - if match.info.data_url: - print_('URL:\n %s' % match.info.data_url) - - # Info line. - info = [] - # Similarity. - info.append('(Similarity: %s)' % dist_string(match.distance)) - # Penalties. - penalties = penalty_string(match.distance) - if penalties: - info.append(penalties) - # Disambiguation. - disambig = disambig_string(match.info) - if disambig: - info.append(ui.colorize('lightgray', '(%s)' % disambig)) - print_(' '.join(info)) - - -def summarize_items(items, singleton): - """Produces a brief summary line describing a set of items. Used for - manually resolving duplicates during import. - - `items` is a list of `Item` objects. `singleton` indicates whether - this is an album or single-item import (if the latter, them `items` - should only have one element). - """ - summary_parts = [] - if not singleton: - summary_parts.append("{0} items".format(len(items))) - - format_counts = {} - for item in items: - format_counts[item.format] = format_counts.get(item.format, 0) + 1 - if len(format_counts) == 1: - # A single format. - summary_parts.append(items[0].format) - else: - # Enumerate all the formats. - for format, count in format_counts.iteritems(): - summary_parts.append('{0} {1}'.format(format, count)) - - average_bitrate = sum([item.bitrate for item in items]) / len(items) - total_duration = sum([item.length for item in items]) - summary_parts.append('{0}kbps'.format(int(average_bitrate / 1000))) - summary_parts.append(ui.human_seconds_short(total_duration)) - - return ', '.join(summary_parts) - - -def _summary_judment(rec): - """Determines whether a decision should be made without even asking - the user. This occurs in quiet mode and when an action is chosen for - NONE recommendations. Return an action or None if the user should be - queried. May also print to the console if a summary judgment is - made. - """ - if config['import']['quiet']: - if rec == Recommendation.strong: - return importer.action.APPLY - else: - action = config['import']['quiet_fallback'].as_choice({ - 'skip': importer.action.SKIP, - 'asis': importer.action.ASIS, - }) - - elif rec == Recommendation.none: - action = config['import']['none_rec_action'].as_choice({ - 'skip': importer.action.SKIP, - 'asis': importer.action.ASIS, - 'ask': None, - }) - - else: - return None - - if action == importer.action.SKIP: - print_('Skipping.') - elif action == importer.action.ASIS: - print_('Importing as-is.') - return action - - -def choose_candidate(candidates, singleton, rec, cur_artist=None, - cur_album=None, item=None, itemcount=None): - """Given a sorted list of candidates, ask the user for a selection - of which candidate to use. Applies to both full albums and - singletons (tracks). Candidates are either AlbumMatch or TrackMatch - objects depending on `singleton`. for albums, `cur_artist`, - `cur_album`, and `itemcount` must be provided. For singletons, - `item` must be provided. - - Returns the result of the choice, which may SKIP, ASIS, TRACKS, or - MANUAL or a candidate (an AlbumMatch/TrackMatch object). - """ - # Sanity check. - if singleton: - assert item is not None - else: - assert cur_artist is not None - assert cur_album is not None - - # Zero candidates. - if not candidates: - if singleton: - print_("No matching recordings found.") - opts = ('Use as-is', 'Skip', 'Enter search', 'enter Id', - 'aBort') - else: - print_("No matching release found for {0} tracks." - .format(itemcount)) - print_('For help, see: ' - 'http://beets.readthedocs.org/en/latest/faq.html#nomatch') - opts = ('Use as-is', 'as Tracks', 'Group albums', 'Skip', - 'Enter search', 'enter Id', 'aBort') - sel = ui.input_options(opts) - if sel == 'u': - return importer.action.ASIS - elif sel == 't': - assert not singleton - return importer.action.TRACKS - elif sel == 'e': - return importer.action.MANUAL - elif sel == 's': - return importer.action.SKIP - elif sel == 'b': - raise importer.ImportAbort() - elif sel == 'i': - return importer.action.MANUAL_ID - elif sel == 'g': - return importer.action.ALBUMS - else: - assert False - - # Is the change good enough? - bypass_candidates = False - if rec != Recommendation.none: - match = candidates[0] - bypass_candidates = True - - while True: - # Display and choose from candidates. - require = rec <= Recommendation.low - - if not bypass_candidates: - # Display list of candidates. - print_(u'Finding tags for {0} "{1} - {2}".'.format( - u'track' if singleton else u'album', - item.artist if singleton else cur_artist, - item.title if singleton else cur_album, - )) - - print_(u'Candidates:') - for i, match in enumerate(candidates): - # Index, metadata, and distance. - line = [ - u'{0}.'.format(i + 1), - u'{0} - {1}'.format( - match.info.artist, - match.info.title if singleton else match.info.album, - ), - u'({0})'.format(dist_string(match.distance)), - ] - - # Penalties. - penalties = penalty_string(match.distance, 3) - if penalties: - line.append(penalties) - - # Disambiguation - disambig = disambig_string(match.info) - if disambig: - line.append(ui.colorize('lightgray', '(%s)' % disambig)) - - print_(' '.join(line)) - - # Ask the user for a choice. - if singleton: - opts = ('Skip', 'Use as-is', 'Enter search', 'enter Id', - 'aBort') - else: - opts = ('Skip', 'Use as-is', 'as Tracks', 'Group albums', - 'Enter search', 'enter Id', 'aBort') - sel = ui.input_options(opts, numrange=(1, len(candidates))) - if sel == 's': - return importer.action.SKIP - elif sel == 'u': - return importer.action.ASIS - elif sel == 'm': - pass - elif sel == 'e': - return importer.action.MANUAL - elif sel == 't': - assert not singleton - return importer.action.TRACKS - elif sel == 'b': - raise importer.ImportAbort() - elif sel == 'i': - return importer.action.MANUAL_ID - elif sel == 'g': - return importer.action.ALBUMS - else: # Numerical selection. - match = candidates[sel - 1] - if sel != 1: - # When choosing anything but the first match, - # disable the default action. - require = True - bypass_candidates = False - - # Show what we're about to do. - if singleton: - show_item_change(item, match) - else: - show_change(cur_artist, cur_album, match) - - # Exact match => tag automatically if we're not in timid mode. - if rec == Recommendation.strong and not config['import']['timid']: - return match - - # Ask for confirmation. - if singleton: - opts = ('Apply', 'More candidates', 'Skip', 'Use as-is', - 'Enter search', 'enter Id', 'aBort') - else: - opts = ('Apply', 'More candidates', 'Skip', 'Use as-is', - 'as Tracks', 'Group albums', 'Enter search', 'enter Id', - 'aBort') - default = config['import']['default_action'].as_choice({ - 'apply': 'a', - 'skip': 's', - 'asis': 'u', - 'none': None, - }) - if default is None: - require = True - sel = ui.input_options(opts, require=require, default=default) - if sel == 'a': - return match - elif sel == 'g': - return importer.action.ALBUMS - elif sel == 's': - return importer.action.SKIP - elif sel == 'u': - return importer.action.ASIS - elif sel == 't': - assert not singleton - return importer.action.TRACKS - elif sel == 'e': - return importer.action.MANUAL - elif sel == 'b': - raise importer.ImportAbort() - elif sel == 'i': - return importer.action.MANUAL_ID - - -def manual_search(singleton): - """Input either an artist and album (for full albums) or artist and - track name (for singletons) for manual search. - """ - artist = input_('Artist:') - name = input_('Track:' if singleton else 'Album:') - return artist.strip(), name.strip() - - -def manual_id(singleton): - """Input an ID, either for an album ("release") or a track ("recording"). - """ - prompt = u'Enter {0} ID:'.format('recording' if singleton else 'release') - return input_(prompt).strip() - - -class TerminalImportSession(importer.ImportSession): - """An import session that runs in a terminal. - """ - def choose_match(self, task): - """Given an initial autotagging of items, go through an interactive - dance with the user to ask for a choice of metadata. Returns an - AlbumMatch object, ASIS, or SKIP. - """ - # Show what we're tagging. - print_() - print_(displayable_path(task.paths, u'\n') + - u' ({0} items)'.format(len(task.items))) - - # Take immediate action if appropriate. - action = _summary_judment(task.rec) - if action == importer.action.APPLY: - match = task.candidates[0] - show_change(task.cur_artist, task.cur_album, match) - return match - elif action is not None: - return action - - # Loop until we have a choice. - candidates, rec = task.candidates, task.rec - while True: - # Ask for a choice from the user. - choice = choose_candidate( - candidates, False, rec, task.cur_artist, task.cur_album, - itemcount=len(task.items) - ) - - # Choose which tags to use. - if choice in (importer.action.SKIP, importer.action.ASIS, - importer.action.TRACKS, importer.action.ALBUMS): - # Pass selection to main control flow. - return choice - elif choice is importer.action.MANUAL: - # Try again with manual search terms. - search_artist, search_album = manual_search(False) - _, _, candidates, rec = autotag.tag_album( - task.items, search_artist, search_album - ) - elif choice is importer.action.MANUAL_ID: - # Try a manually-entered ID. - search_id = manual_id(False) - if search_id: - _, _, candidates, rec = autotag.tag_album( - task.items, search_id=search_id - ) - else: - # We have a candidate! Finish tagging. Here, choice is an - # AlbumMatch object. - assert isinstance(choice, autotag.AlbumMatch) - return choice - - def choose_item(self, task): - """Ask the user for a choice about tagging a single item. Returns - either an action constant or a TrackMatch object. - """ - print_() - print_(task.item.path) - candidates, rec = task.candidates, task.rec - - # Take immediate action if appropriate. - action = _summary_judment(task.rec) - if action == importer.action.APPLY: - match = candidates[0] - show_item_change(task.item, match) - return match - elif action is not None: - return action - - while True: - # Ask for a choice. - choice = choose_candidate(candidates, True, rec, item=task.item) - - if choice in (importer.action.SKIP, importer.action.ASIS): - return choice - elif choice == importer.action.TRACKS: - assert False # TRACKS is only legal for albums. - elif choice == importer.action.MANUAL: - # Continue in the loop with a new set of candidates. - search_artist, search_title = manual_search(True) - candidates, rec = autotag.tag_item(task.item, search_artist, - search_title) - elif choice == importer.action.MANUAL_ID: - # Ask for a track ID. - search_id = manual_id(True) - if search_id: - candidates, rec = autotag.tag_item(task.item, - search_id=search_id) - else: - # Chose a candidate. - assert isinstance(choice, autotag.TrackMatch) - return choice - - def resolve_duplicate(self, task, found_duplicates): - """Decide what to do when a new album or item seems similar to one - that's already in the library. - """ - log.warn(u"This {0} is already in the library!" - .format("album" if task.is_album else "item")) - - if config['import']['quiet']: - # In quiet mode, don't prompt -- just skip. - log.info(u'Skipping.') - sel = 's' - else: - # Print some detail about the existing and new items so the - # user can make an informed decision. - for duplicate in found_duplicates: - print("Old: " + summarize_items( - list(duplicate.items()) if task.is_album else [duplicate], - not task.is_album, - )) - print("New: " + summarize_items( - task.imported_items(), - not task.is_album, - )) - - sel = ui.input_options( - ('Skip new', 'Keep both', 'Remove old') - ) - - if sel == 's': - # Skip new. - task.set_choice(importer.action.SKIP) - elif sel == 'k': - # Keep both. Do nothing; leave the choice intact. - pass - elif sel == 'r': - # Remove old. - task.should_remove_duplicates = True - else: - assert False - - def should_resume(self, path): - return ui.input_yn(u"Import of the directory:\n{0}\n" - "was interrupted. Resume (Y/n)?" - .format(displayable_path(path))) - -# The import command. - - -def import_files(lib, paths, query): - """Import the files in the given list of paths or matching the - query. - """ - # Check the user-specified directories. - for path in paths: - if not os.path.exists(syspath(normpath(path))): - raise ui.UserError(u'no such file or directory: {0}'.format( - displayable_path(path))) - - # Check parameter consistency. - if config['import']['quiet'] and config['import']['timid']: - raise ui.UserError("can't be both quiet and timid") - - # Open the log. - if config['import']['log'].get() is not None: - logpath = config['import']['log'].as_filename() - try: - logfile = codecs.open(syspath(logpath), 'a', 'utf8') - except IOError: - raise ui.UserError(u"could not open log file for writing: %s" % - displayable_path(logpath)) - print(u'import started', time.asctime(), file=logfile) - else: - logfile = None - - # Never ask for input in quiet mode. - if config['import']['resume'].get() == 'ask' and \ - config['import']['quiet']: - config['import']['resume'] = False - - session = TerminalImportSession(lib, logfile, paths, query) - try: - session.run() - finally: - # If we were logging, close the file. - if logfile: - print(u'', file=logfile) - logfile.close() - - # Emit event. - plugins.send('import', lib=lib, paths=paths) - - -def import_func(lib, opts, args): - config['import'].set_args(opts) - - # Special case: --copy flag suppresses import_move (which would - # otherwise take precedence). - if opts.copy: - config['import']['move'] = False - - if opts.library: - query = decargs(args) - paths = [] - else: - query = None - paths = args - if not paths: - raise ui.UserError('no path specified') - - import_files(lib, paths, query) - - -import_cmd = ui.Subcommand( - 'import', help='import new music', aliases=('imp', 'im') -) -import_cmd.parser.add_option( - '-c', '--copy', action='store_true', default=None, - help="copy tracks into library directory (default)" -) -import_cmd.parser.add_option( - '-C', '--nocopy', action='store_false', dest='copy', - help="don't copy tracks (opposite of -c)" -) -import_cmd.parser.add_option( - '-w', '--write', action='store_true', default=None, - help="write new metadata to files' tags (default)" -) -import_cmd.parser.add_option( - '-W', '--nowrite', action='store_false', dest='write', - help="don't write metadata (opposite of -w)" -) -import_cmd.parser.add_option( - '-a', '--autotag', action='store_true', dest='autotag', - help="infer tags for imported files (default)" -) -import_cmd.parser.add_option( - '-A', '--noautotag', action='store_false', dest='autotag', - help="don't infer tags for imported files (opposite of -a)" -) -import_cmd.parser.add_option( - '-p', '--resume', action='store_true', default=None, - help="resume importing if interrupted" -) -import_cmd.parser.add_option( - '-P', '--noresume', action='store_false', dest='resume', - help="do not try to resume importing" -) -import_cmd.parser.add_option( - '-q', '--quiet', action='store_true', dest='quiet', - help="never prompt for input: skip albums instead" -) -import_cmd.parser.add_option( - '-l', '--log', dest='log', - help='file to log untaggable albums for later review' -) -import_cmd.parser.add_option( - '-s', '--singletons', action='store_true', - help='import individual tracks instead of full albums' -) -import_cmd.parser.add_option( - '-t', '--timid', dest='timid', action='store_true', - help='always confirm all actions' -) -import_cmd.parser.add_option( - '-L', '--library', dest='library', action='store_true', - help='retag items matching a query' -) -import_cmd.parser.add_option( - '-i', '--incremental', dest='incremental', action='store_true', - help='skip already-imported directories' -) -import_cmd.parser.add_option( - '-I', '--noincremental', dest='incremental', action='store_false', - help='do not skip already-imported directories' -) -import_cmd.parser.add_option( - '--flat', dest='flat', action='store_true', - help='import an entire tree as a single album' -) -import_cmd.parser.add_option( - '-g', '--group-albums', dest='group_albums', action='store_true', - help='group tracks in a folder into separate albums' -) -import_cmd.parser.add_option( - '--pretend', dest='pretend', action='store_true', - help='just print the files to import' -) -import_cmd.func = import_func -default_commands.append(import_cmd) - - -# list: Query and show library contents. - -def list_items(lib, query, album, fmt): - """Print out items in lib matching query. If album, then search for - albums instead of single items. - """ - tmpl = Template(ui._pick_format(album, fmt)) - if album: - for album in lib.albums(query): - ui.print_obj(album, lib, tmpl) - else: - for item in lib.items(query): - ui.print_obj(item, lib, tmpl) - - -def list_func(lib, opts, args): - if opts.path: - fmt = '$path' - else: - fmt = opts.format - list_items(lib, decargs(args), opts.album, fmt) - - -list_cmd = ui.Subcommand('list', help='query the library', aliases=('ls',)) -list_cmd.parser.add_option( - '-a', '--album', action='store_true', - help='show matching albums instead of tracks' -) -list_cmd.parser.add_option( - '-p', '--path', action='store_true', - help='print paths for matched items or albums' -) -list_cmd.parser.add_option( - '-f', '--format', action='store', - help='print with custom format', default=None -) -list_cmd.func = list_func -default_commands.append(list_cmd) - - -# update: Update library contents according to on-disk tags. - -def update_items(lib, query, album, move, pretend): - """For all the items matched by the query, update the library to - reflect the item's embedded tags. - """ - with lib.transaction(): - items, _ = _do_query(lib, query, album) - - # Walk through the items and pick up their changes. - affected_albums = set() - for item in items: - # Item deleted? - if not os.path.exists(syspath(item.path)): - ui.print_obj(item, lib) - ui.print_(ui.colorize('red', u' deleted')) - if not pretend: - item.remove(True) - affected_albums.add(item.album_id) - continue - - # Did the item change since last checked? - if item.current_mtime() <= item.mtime: - log.debug(u'skipping {0} because mtime is up to date ({1})' - .format(displayable_path(item.path), item.mtime)) - continue - - # Read new data. - try: - item.read() - except library.ReadError as exc: - log.error(u'error reading {0}: {1}'.format( - displayable_path(item.path), exc)) - continue - - # Special-case album artist when it matches track artist. (Hacky - # but necessary for preserving album-level metadata for non- - # autotagged imports.) - if not item.albumartist: - old_item = lib.get_item(item.id) - if old_item.albumartist == old_item.artist == item.artist: - item.albumartist = old_item.albumartist - item._dirty.discard('albumartist') - - # Check for and display changes. - changed = ui.show_model_changes(item, - fields=library.Item._media_fields) - - # Save changes. - if not pretend: - if changed: - # Move the item if it's in the library. - if move and lib.directory in ancestry(item.path): - item.move() - - item.store() - affected_albums.add(item.album_id) - else: - # The file's mtime was different, but there were no - # changes to the metadata. Store the new mtime, - # which is set in the call to read(), so we don't - # check this again in the future. - item.store() - - # Skip album changes while pretending. - if pretend: - return - - # Modify affected albums to reflect changes in their items. - for album_id in affected_albums: - if album_id is None: # Singletons. - continue - album = lib.get_album(album_id) - if not album: # Empty albums have already been removed. - log.debug(u'emptied album {0}'.format(album_id)) - continue - first_item = album.items().get() - - # Update album structure to reflect an item in it. - for key in library.Album.item_keys: - album[key] = first_item[key] - album.store() - - # Move album art (and any inconsistent items). - if move and lib.directory in ancestry(first_item.path): - log.debug(u'moving album {0}'.format(album_id)) - album.move() - - -def update_func(lib, opts, args): - update_items(lib, decargs(args), opts.album, opts.move, opts.pretend) - - -update_cmd = ui.Subcommand( - 'update', help='update the library', aliases=('upd', 'up',) -) -update_cmd.parser.add_option( - '-a', '--album', action='store_true', - help='match albums instead of tracks' -) -update_cmd.parser.add_option( - '-M', '--nomove', action='store_false', default=True, dest='move', - help="don't move files in library" -) -update_cmd.parser.add_option( - '-p', '--pretend', action='store_true', - help="show all changes but do nothing" -) -update_cmd.parser.add_option( - '-f', '--format', action='store', - help='print with custom format', default=None -) -update_cmd.func = update_func -default_commands.append(update_cmd) - - -# remove: Remove items from library, delete files. - -def remove_items(lib, query, album, delete): - """Remove items matching query from lib. If album, then match and - remove whole albums. If delete, also remove files from disk. - """ - # Get the matching items. - items, albums = _do_query(lib, query, album) - - # Prepare confirmation with user. - print_() - if delete: - fmt = u'$path - $title' - prompt = 'Really DELETE %i files (y/n)?' % len(items) - else: - fmt = None - prompt = 'Really remove %i items from the library (y/n)?' % \ - len(items) - - # Show all the items. - for item in items: - ui.print_obj(item, lib, fmt) - - # Confirm with user. - if not ui.input_yn(prompt, True): - return - - # Remove (and possibly delete) items. - with lib.transaction(): - for obj in (albums if album else items): - obj.remove(delete) - - -def remove_func(lib, opts, args): - remove_items(lib, decargs(args), opts.album, opts.delete) - - -remove_cmd = ui.Subcommand( - 'remove', help='remove matching items from the library', aliases=('rm',) -) -remove_cmd.parser.add_option( - "-d", "--delete", action="store_true", - help="also remove files from disk" -) -remove_cmd.parser.add_option( - '-a', '--album', action='store_true', - help='match albums instead of tracks' -) -remove_cmd.func = remove_func -default_commands.append(remove_cmd) - - -# stats: Show library/query statistics. - -def show_stats(lib, query, exact): - """Shows some statistics about the matched items.""" - items = lib.items(query) - - total_size = 0 - total_time = 0.0 - total_items = 0 - artists = set() - albums = set() - album_artists = set() - - for item in items: - if exact: - total_size += os.path.getsize(item.path) - else: - total_size += int(item.length * item.bitrate / 8) - total_time += item.length - total_items += 1 - artists.add(item.artist) - album_artists.add(item.albumartist) - if item.album_id: - albums.add(item.album_id) - - size_str = '' + ui.human_bytes(total_size) - if exact: - size_str += ' ({0} bytes)'.format(total_size) - - print_("""Tracks: {0} -Total time: {1}{2} -{3}: {4} -Artists: {5} -Albums: {6} -Album artists: {7}""".format( - total_items, - ui.human_seconds(total_time), - ' ({0:.2f} seconds)'.format(total_time) if exact else '', - 'Total size' if exact else 'Approximate total size', - size_str, - len(artists), - len(albums), - len(album_artists)), - ) - - -def stats_func(lib, opts, args): - show_stats(lib, decargs(args), opts.exact) - - -stats_cmd = ui.Subcommand( - 'stats', help='show statistics about the library or a query' -) -stats_cmd.parser.add_option( - '-e', '--exact', action='store_true', - help='exact size and time' -) -stats_cmd.func = stats_func -default_commands.append(stats_cmd) - - -# version: Show current beets version. - -def show_version(lib, opts, args): - print_('beets version %s' % beets.__version__) - # Show plugins. - names = [p.name for p in plugins.find_plugins()] - if names: - print_('plugins:', ', '.join(names)) - else: - print_('no plugins loaded') - - -version_cmd = ui.Subcommand( - 'version', help='output version information' -) -version_cmd.func = show_version -default_commands.append(version_cmd) - - -# modify: Declaratively change metadata. - -def modify_items(lib, mods, dels, query, write, move, album, confirm): - """Modifies matching items according to user-specified assignments and - deletions. - - `mods` is a dictionary of field and value pairse indicating - assignments. `dels` is a list of fields to be deleted. - """ - # Parse key=value specifications into a dictionary. - model_cls = library.Album if album else library.Item - - for key, value in mods.items(): - mods[key] = model_cls._parse(key, value) - - # Get the items to modify. - items, albums = _do_query(lib, query, album, False) - objs = albums if album else items - - # Apply changes *temporarily*, preview them, and collect modified - # objects. - print_('Modifying {0} {1}s.' - .format(len(objs), 'album' if album else 'item')) - changed = set() - for obj in objs: - obj.update(mods) - for field in dels: - try: - del obj[field] - except KeyError: - pass - if ui.show_model_changes(obj): - changed.add(obj) - - # Still something to do? - if not changed: - print_('No changes to make.') - return - - # Confirm action. - if confirm: - if write and move: - extra = ', move and write tags' - elif write: - extra = ' and write tags' - elif move: - extra = ' and move' - else: - extra = '' - - if not ui.input_yn('Really modify%s (Y/n)?' % extra): - return - - # Apply changes to database and files - with lib.transaction(): - for obj in changed: - if move: - cur_path = obj.path - if lib.directory in ancestry(cur_path): # In library? - log.debug(u'moving object {0}' - .format(displayable_path(cur_path))) - obj.move() - - obj.try_sync(write) - - -def modify_parse_args(args): - """Split the arguments for the modify subcommand into query parts, - assignments (field=value), and deletions (field!). Returns the result as - a three-tuple in that order. - """ - mods = {} - dels = [] - query = [] - for arg in args: - if arg.endswith('!') and '=' not in arg and ':' not in arg: - dels.append(arg[:-1]) # Strip trailing !. - elif '=' in arg and ':' not in arg.split('=', 1)[0]: - key, val = arg.split('=', 1) - mods[key] = val - else: - query.append(arg) - return query, mods, dels - - -def modify_func(lib, opts, args): - query, mods, dels = modify_parse_args(decargs(args)) - if not mods and not dels: - raise ui.UserError('no modifications specified') - write = opts.write if opts.write is not None else \ - config['import']['write'].get(bool) - modify_items(lib, mods, dels, query, write, opts.move, opts.album, - not opts.yes) - - -modify_cmd = ui.Subcommand( - 'modify', help='change metadata fields', aliases=('mod',) -) -modify_cmd.parser.add_option( - '-M', '--nomove', action='store_false', default=True, dest='move', - help="don't move files in library" -) -modify_cmd.parser.add_option( - '-w', '--write', action='store_true', default=None, - help="write new metadata to files' tags (default)" -) -modify_cmd.parser.add_option( - '-W', '--nowrite', action='store_false', dest='write', - help="don't write metadata (opposite of -w)" -) -modify_cmd.parser.add_option( - '-a', '--album', action='store_true', - help='modify whole albums instead of tracks' -) -modify_cmd.parser.add_option( - '-y', '--yes', action='store_true', - help='skip confirmation' -) -modify_cmd.parser.add_option( - '-f', '--format', action='store', - help='print with custom format', default=None -) -modify_cmd.func = modify_func -default_commands.append(modify_cmd) - - -# move: Move/copy files to the library or a new base directory. - -def move_items(lib, dest, query, copy, album): - """Moves or copies items to a new base directory, given by dest. If - dest is None, then the library's base directory is used, making the - command "consolidate" files. - """ - items, albums = _do_query(lib, query, album, False) - objs = albums if album else items - - action = 'Copying' if copy else 'Moving' - entity = 'album' if album else 'item' - log.info(u'{0} {1} {2}s.'.format(action, len(objs), entity)) - for obj in objs: - log.debug(u'moving: {0}'.format(util.displayable_path(obj.path))) - - obj.move(copy, basedir=dest) - obj.store() - - -def move_func(lib, opts, args): - dest = opts.dest - if dest is not None: - dest = normpath(dest) - if not os.path.isdir(dest): - raise ui.UserError('no such directory: %s' % dest) - - move_items(lib, dest, decargs(args), opts.copy, opts.album) - - -move_cmd = ui.Subcommand( - 'move', help='move or copy items', aliases=('mv',) -) -move_cmd.parser.add_option( - '-d', '--dest', metavar='DIR', dest='dest', - help='destination directory' -) -move_cmd.parser.add_option( - '-c', '--copy', default=False, action='store_true', - help='copy instead of moving' -) -move_cmd.parser.add_option( - '-a', '--album', default=False, action='store_true', - help='match whole albums instead of tracks' -) -move_cmd.func = move_func -default_commands.append(move_cmd) - - -# write: Write tags into files. - -def write_items(lib, query, pretend, force): - """Write tag information from the database to the respective files - in the filesystem. - """ - items, albums = _do_query(lib, query, False, False) - - for item in items: - # Item deleted? - if not os.path.exists(syspath(item.path)): - log.info(u'missing file: {0}'.format( - util.displayable_path(item.path) - )) - continue - - # Get an Item object reflecting the "clean" (on-disk) state. - try: - clean_item = library.Item.from_path(item.path) - except library.ReadError as exc: - log.error(u'error reading {0}: {1}'.format( - displayable_path(item.path), exc - )) - continue - - # Check for and display changes. - changed = ui.show_model_changes(item, clean_item, - library.Item._media_fields, force) - if (changed or force) and not pretend: - item.try_sync() - - -def write_func(lib, opts, args): - write_items(lib, decargs(args), opts.pretend, opts.force) - - -write_cmd = ui.Subcommand('write', help='write tag information to files') -write_cmd.parser.add_option( - '-p', '--pretend', action='store_true', - help="show all changes but do nothing" -) -write_cmd.parser.add_option( - '-f', '--force', action='store_true', - help="write tags even if the existing tags match the database" -) -write_cmd.func = write_func -default_commands.append(write_cmd) - - -# config: Show and edit user configuration. - -def config_func(lib, opts, args): - # Make sure lazy configuration is loaded - config.resolve() - - # Print paths. - if opts.paths: - filenames = [] - for source in config.sources: - if not opts.defaults and source.default: - continue - if source.filename: - filenames.append(source.filename) - - # In case the user config file does not exist, prepend it to the - # list. - user_path = config.user_config_path() - if user_path not in filenames: - filenames.insert(0, user_path) - - for filename in filenames: - print(filename) - - # Open in editor. - elif opts.edit: - config_edit() - - # Dump configuration. - else: - print(config.dump(full=opts.defaults)) - - -def config_edit(): - """Open a program to edit the user configuration. - """ - path = config.user_config_path() - - if 'EDITOR' in os.environ: - editor = os.environ['EDITOR'] - try: - editor = shlex.split(editor) - except ValueError: # Malformed shell tokens. - editor = [editor] - args = editor + [path] - args.insert(1, args[0]) - elif platform.system() == 'Darwin': - args = ['open', 'open', '-n', path] - elif platform.system() == 'Windows': - # On windows we can execute arbitrary files. The os will - # take care of starting an appropriate application - args = [path, path] - else: - # Assume Unix - args = ['xdg-open', 'xdg-open', path] - - try: - os.execlp(*args) - except OSError: - raise ui.UserError("Could not edit configuration. Please " - "set the EDITOR environment variable.") - - -config_cmd = ui.Subcommand('config', - help='show or edit the user configuration') -config_cmd.parser.add_option( - '-p', '--paths', action='store_true', - help='show files that configuration was loaded from' -) -config_cmd.parser.add_option( - '-e', '--edit', action='store_true', - help='edit user configuration with $EDITOR' -) -config_cmd.parser.add_option( - '-d', '--defaults', action='store_true', - help='include the default configuration' -) -config_cmd.func = config_func -default_commands.append(config_cmd) - - -# completion: print completion script - -def print_completion(*args): - for line in completion_script(default_commands + plugins.commands()): - print(line, end='') - if not any(map(os.path.isfile, BASH_COMPLETION_PATHS)): - log.warn(u'Warning: Unable to find the bash-completion package. ' - u'Command line completion might not work.') - -BASH_COMPLETION_PATHS = map(syspath, [ - u'/etc/bash_completion', - u'/usr/share/bash-completion/bash_completion', - u'/usr/share/local/bash-completion/bash_completion', - u'/opt/local/share/bash-completion/bash_completion', # SmartOS - u'/usr/local/etc/bash_completion', # Homebrew -]) - - -def completion_script(commands): - """Yield the full completion shell script as strings. - - ``commands`` is alist of ``ui.Subcommand`` instances to generate - completion data for. - """ - base_script = os.path.join(_package_path('beets.ui'), 'completion_base.sh') - with open(base_script, 'r') as base_script: - yield base_script.read() - - options = {} - aliases = {} - command_names = [] - - # Collect subcommands - for cmd in commands: - name = cmd.name - command_names.append(name) - - for alias in cmd.aliases: - if re.match(r'^\w+$', alias): - aliases[alias] = name - - options[name] = {'flags': [], 'opts': []} - for opts in cmd.parser._get_all_options()[1:]: - if opts.action in ('store_true', 'store_false'): - option_type = 'flags' - else: - option_type = 'opts' - - options[name][option_type].extend( - opts._short_opts + opts._long_opts - ) - - # Add global options - options['_global'] = { - 'flags': ['-v', '--verbose'], - 'opts': '-l --library -c --config -d --directory -h --help'.split(' ') - } - - # Add flags common to all commands - options['_common'] = { - 'flags': ['-h', '--help'] - } - - # Start generating the script - yield "_beet() {\n" - - # Command names - yield " local commands='%s'\n" % ' '.join(command_names) - yield "\n" - - # Command aliases - yield " local aliases='%s'\n" % ' '.join(aliases.keys()) - for alias, cmd in aliases.items(): - yield " local alias__%s=%s\n" % (alias, cmd) - yield '\n' - - # Fields - yield " fields='%s'\n" % ' '.join( - set(library.Item._fields.keys() + library.Album._fields.keys()) - ) - - # Command options - for cmd, opts in options.items(): - for option_type, option_list in opts.items(): - if option_list: - option_list = ' '.join(option_list) - yield " local %s__%s='%s'\n" % (option_type, cmd, option_list) - - yield ' _beet_dispatch\n' - yield '}\n' - - -completion_cmd = ui.Subcommand( - 'completion', - help='print shell script that provides command line completion' -) -completion_cmd.func = print_completion -completion_cmd.hide = True -default_commands.append(completion_cmd) diff --git a/lib/beets/ui/completion_base.sh b/lib/beets/ui/completion_base.sh deleted file mode 100644 index ce3fb6e2..00000000 --- a/lib/beets/ui/completion_base.sh +++ /dev/null @@ -1,162 +0,0 @@ -# This file is part of beets. -# Copyright (c) 2014, Thomas Scholtes. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - - - -# Completion for the `beet` command -# ================================= -# -# Load this script to complete beets subcommands, options, and -# queries. -# -# If a beets command is found on the command line it completes filenames and -# the subcommand's options. Otherwise it will complete global options and -# subcommands. If the previous option on the command line expects an argument, -# it also completes filenames or directories. Options are only -# completed if '-' has already been typed on the command line. -# -# Note that completion of plugin commands only works for those plugins -# that were enabled when running `beet completion`. It does not check -# plugins dynamically -# -# Currently, only Bash 3.2 and newer is supported and the -# `bash-completion` package is requied. -# -# TODO -# ---- -# -# * There are some issues with arguments that are quoted on the command line. -# -# * Complete arguments for the `--format` option by expanding field variables. -# -# beet ls -f "$tit[TAB] -# beet ls -f "$title -# -# * Support long options with `=`, e.g. `--config=file`. Debian's bash -# completion package can handle this. -# - - -# Determines the beets subcommand and dispatches the completion -# accordingly. -_beet_dispatch() { - local cur prev cmd= - - COMPREPLY=() - _get_comp_words_by_ref -n : cur prev - - # Look for the beets subcommand - local arg - for (( i=1; i < COMP_CWORD; i++ )); do - arg="${COMP_WORDS[i]}" - if _list_include_item "${opts___global}" $arg; then - ((i++)) - elif [[ "$arg" != -* ]]; then - cmd="$arg" - break - fi - done - - # Replace command shortcuts - if [[ -n $cmd ]] && _list_include_item "$aliases" "$cmd"; then - eval "cmd=\$alias__$cmd" - fi - - case $cmd in - help) - COMPREPLY+=( $(compgen -W "$commands" -- $cur) ) - ;; - list|remove|move|update|write|stats) - _beet_complete_query - ;; - "") - _beet_complete_global - ;; - *) - _beet_complete - ;; - esac -} - - -# Adds option and file completion to COMPREPLY for the subcommand $cmd -_beet_complete() { - if [[ $cur == -* ]]; then - local opts flags completions - eval "opts=\$opts__$cmd" - eval "flags=\$flags__$cmd" - completions="${flags___common} ${opts} ${flags}" - COMPREPLY+=( $(compgen -W "$completions" -- $cur) ) - else - _filedir - fi -} - - -# Add global options and subcommands to the completion -_beet_complete_global() { - case $prev in - -h|--help) - # Complete commands - COMPREPLY+=( $(compgen -W "$commands" -- $cur) ) - return - ;; - -l|--library|-c|--config) - # Filename completion - _filedir - return - ;; - -d|--directory) - # Directory completion - _filedir -d - return - ;; - esac - - if [[ $cur == -* ]]; then - local completions="$opts___global $flags___global" - COMPREPLY+=( $(compgen -W "$completions" -- $cur) ) - elif [[ -n $cur ]] && _list_include_item "$aliases" "$cur"; then - local cmd - eval "cmd=\$alias__$cur" - COMPREPLY+=( "$cmd" ) - else - COMPREPLY+=( $(compgen -W "$commands" -- $cur) ) - fi -} - -_beet_complete_query() { - local opts - eval "opts=\$opts__$cmd" - - if [[ $cur == -* ]] || _list_include_item "$opts" "$prev"; then - _beet_complete - elif [[ $cur != \'* && $cur != \"* && - $cur != *:* ]]; then - # Do not complete quoted queries or those who already have a field - # set. - compopt -o nospace - COMPREPLY+=( $(compgen -S : -W "$fields" -- $cur) ) - return 0 - fi -} - -# Returns true if the space separated list $1 includes $2 -_list_include_item() { - [[ " $1 " == *[[:space:]]$2[[:space:]]* ]] -} - -# This is where beets dynamically adds the _beet function. This -# function sets the variables $flags, $opts, $commands, and $aliases. -complete -o filenames -F _beet beet diff --git a/lib/beets/util/__init__.py b/lib/beets/util/__init__.py deleted file mode 100644 index 38cecd70..00000000 --- a/lib/beets/util/__init__.py +++ /dev/null @@ -1,686 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Miscellaneous utility functions.""" -from __future__ import division - -import os -import sys -import re -import shutil -import fnmatch -from collections import defaultdict -import traceback -import subprocess -import platform - - -MAX_FILENAME_LENGTH = 200 -WINDOWS_MAGIC_PREFIX = u'\\\\?\\' - - -class HumanReadableException(Exception): - """An Exception that can include a human-readable error message to - be logged without a traceback. Can preserve a traceback for - debugging purposes as well. - - Has at least two fields: `reason`, the underlying exception or a - string describing the problem; and `verb`, the action being - performed during the error. - - If `tb` is provided, it is a string containing a traceback for the - associated exception. (Note that this is not necessary in Python 3.x - and should be removed when we make the transition.) - """ - error_kind = 'Error' # Human-readable description of error type. - - def __init__(self, reason, verb, tb=None): - self.reason = reason - self.verb = verb - self.tb = tb - super(HumanReadableException, self).__init__(self.get_message()) - - def _gerund(self): - """Generate a (likely) gerund form of the English verb. - """ - if ' ' in self.verb: - return self.verb - gerund = self.verb[:-1] if self.verb.endswith('e') else self.verb - gerund += 'ing' - return gerund - - def _reasonstr(self): - """Get the reason as a string.""" - if isinstance(self.reason, unicode): - return self.reason - elif isinstance(self.reason, basestring): # Byte string. - return self.reason.decode('utf8', 'ignore') - elif hasattr(self.reason, 'strerror'): # i.e., EnvironmentError - return self.reason.strerror - else: - return u'"{0}"'.format(unicode(self.reason)) - - def get_message(self): - """Create the human-readable description of the error, sans - introduction. - """ - raise NotImplementedError - - def log(self, logger): - """Log to the provided `logger` a human-readable message as an - error and a verbose traceback as a debug message. - """ - if self.tb: - logger.debug(self.tb) - logger.error(u'{0}: {1}'.format(self.error_kind, self.args[0])) - - -class FilesystemError(HumanReadableException): - """An error that occurred while performing a filesystem manipulation - via a function in this module. The `paths` field is a sequence of - pathnames involved in the operation. - """ - def __init__(self, reason, verb, paths, tb=None): - self.paths = paths - super(FilesystemError, self).__init__(reason, verb, tb) - - def get_message(self): - # Use a nicer English phrasing for some specific verbs. - if self.verb in ('move', 'copy', 'rename'): - clause = u'while {0} {1} to {2}'.format( - self._gerund(), - displayable_path(self.paths[0]), - displayable_path(self.paths[1]) - ) - elif self.verb in ('delete', 'write', 'create', 'read'): - clause = u'while {0} {1}'.format( - self._gerund(), - displayable_path(self.paths[0]) - ) - else: - clause = u'during {0} of paths {1}'.format( - self.verb, u', '.join(displayable_path(p) for p in self.paths) - ) - - return u'{0} {1}'.format(self._reasonstr(), clause) - - -def normpath(path): - """Provide the canonical form of the path suitable for storing in - the database. - """ - path = syspath(path, prefix=False) - path = os.path.normpath(os.path.abspath(os.path.expanduser(path))) - return bytestring_path(path) - - -def ancestry(path): - """Return a list consisting of path's parent directory, its - grandparent, and so on. For instance: - - >>> ancestry('/a/b/c') - ['/', '/a', '/a/b'] - - The argument should *not* be the result of a call to `syspath`. - """ - out = [] - last_path = None - while path: - path = os.path.dirname(path) - - if path == last_path: - break - last_path = path - - if path: - # don't yield '' - out.insert(0, path) - return out - - -def sorted_walk(path, ignore=(), logger=None): - """Like `os.walk`, but yields things in case-insensitive sorted, - breadth-first order. Directory and file names matching any glob - pattern in `ignore` are skipped. If `logger` is provided, then - warning messages are logged there when a directory cannot be listed. - """ - # Make sure the path isn't a Unicode string. - path = bytestring_path(path) - - # Get all the directories and files at this level. - try: - contents = os.listdir(syspath(path)) - except OSError as exc: - if logger: - logger.warn(u'could not list directory {0}: {1}'.format( - displayable_path(path), exc.strerror - )) - return - dirs = [] - files = [] - for base in contents: - base = bytestring_path(base) - - # Skip ignored filenames. - skip = False - for pat in ignore: - if fnmatch.fnmatch(base, pat): - skip = True - break - if skip: - continue - - # Add to output as either a file or a directory. - cur = os.path.join(path, base) - if os.path.isdir(syspath(cur)): - dirs.append(base) - else: - files.append(base) - - # Sort lists (case-insensitive) and yield the current level. - dirs.sort(key=bytes.lower) - files.sort(key=bytes.lower) - yield (path, dirs, files) - - # Recurse into directories. - for base in dirs: - cur = os.path.join(path, base) - # yield from sorted_walk(...) - for res in sorted_walk(cur, ignore, logger): - yield res - - -def mkdirall(path): - """Make all the enclosing directories of path (like mkdir -p on the - parent). - """ - for ancestor in ancestry(path): - if not os.path.isdir(syspath(ancestor)): - try: - os.mkdir(syspath(ancestor)) - except (OSError, IOError) as exc: - raise FilesystemError(exc, 'create', (ancestor,), - traceback.format_exc()) - - -def fnmatch_all(names, patterns): - """Determine whether all strings in `names` match at least one of - the `patterns`, which should be shell glob expressions. - """ - for name in names: - matches = False - for pattern in patterns: - matches = fnmatch.fnmatch(name, pattern) - if matches: - break - if not matches: - return False - return True - - -def prune_dirs(path, root=None, clutter=('.DS_Store', 'Thumbs.db')): - """If path is an empty directory, then remove it. Recursively remove - path's ancestry up to root (which is never removed) where there are - empty directories. If path is not contained in root, then nothing is - removed. Glob patterns in clutter are ignored when determining - emptiness. If root is not provided, then only path may be removed - (i.e., no recursive removal). - """ - path = normpath(path) - if root is not None: - root = normpath(root) - - ancestors = ancestry(path) - if root is None: - # Only remove the top directory. - ancestors = [] - elif root in ancestors: - # Only remove directories below the root. - ancestors = ancestors[ancestors.index(root) + 1:] - else: - # Remove nothing. - return - - # Traverse upward from path. - ancestors.append(path) - ancestors.reverse() - for directory in ancestors: - directory = syspath(directory) - if not os.path.exists(directory): - # Directory gone already. - continue - if fnmatch_all(os.listdir(directory), clutter): - # Directory contains only clutter (or nothing). - try: - shutil.rmtree(directory) - except OSError: - break - else: - break - - -def components(path): - """Return a list of the path components in path. For instance: - - >>> components('/a/b/c') - ['a', 'b', 'c'] - - The argument should *not* be the result of a call to `syspath`. - """ - comps = [] - ances = ancestry(path) - for anc in ances: - comp = os.path.basename(anc) - if comp: - comps.append(comp) - else: # root - comps.append(anc) - - last = os.path.basename(path) - if last: - comps.append(last) - - return comps - - -def _fsencoding(): - """Get the system's filesystem encoding. On Windows, this is always - UTF-8 (not MBCS). - """ - encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() - if encoding == 'mbcs': - # On Windows, a broken encoding known to Python as "MBCS" is - # used for the filesystem. However, we only use the Unicode API - # for Windows paths, so the encoding is actually immaterial so - # we can avoid dealing with this nastiness. We arbitrarily - # choose UTF-8. - encoding = 'utf8' - return encoding - - -def bytestring_path(path): - """Given a path, which is either a str or a unicode, returns a str - path (ensuring that we never deal with Unicode pathnames). - """ - # Pass through bytestrings. - if isinstance(path, str): - return path - - # On Windows, remove the magic prefix added by `syspath`. This makes - # ``bytestring_path(syspath(X)) == X``, i.e., we can safely - # round-trip through `syspath`. - if os.path.__name__ == 'ntpath' and path.startswith(WINDOWS_MAGIC_PREFIX): - path = path[len(WINDOWS_MAGIC_PREFIX):] - - # Try to encode with default encodings, but fall back to UTF8. - try: - return path.encode(_fsencoding()) - except (UnicodeError, LookupError): - return path.encode('utf8') - - -def displayable_path(path, separator=u'; '): - """Attempts to decode a bytestring path to a unicode object for the - purpose of displaying it to the user. If the `path` argument is a - list or a tuple, the elements are joined with `separator`. - """ - if isinstance(path, (list, tuple)): - return separator.join(displayable_path(p) for p in path) - elif isinstance(path, unicode): - return path - elif not isinstance(path, str): - # A non-string object: just get its unicode representation. - return unicode(path) - - try: - return path.decode(_fsencoding(), 'ignore') - except (UnicodeError, LookupError): - return path.decode('utf8', 'ignore') - - -def syspath(path, prefix=True): - """Convert a path for use by the operating system. In particular, - paths on Windows must receive a magic prefix and must be converted - to Unicode before they are sent to the OS. To disable the magic - prefix on Windows, set `prefix` to False---but only do this if you - *really* know what you're doing. - """ - # Don't do anything if we're not on windows - if os.path.__name__ != 'ntpath': - return path - - if not isinstance(path, unicode): - # Beets currently represents Windows paths internally with UTF-8 - # arbitrarily. But earlier versions used MBCS because it is - # reported as the FS encoding by Windows. Try both. - try: - path = path.decode('utf8') - except UnicodeError: - # The encoding should always be MBCS, Windows' broken - # Unicode representation. - encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() - path = path.decode(encoding, 'replace') - - # Add the magic prefix if it isn't already there. - # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx - if prefix and not path.startswith(WINDOWS_MAGIC_PREFIX): - if path.startswith(u'\\\\'): - # UNC path. Final path should look like \\?\UNC\... - path = u'UNC' + path[1:] - path = WINDOWS_MAGIC_PREFIX + path - - return path - - -def samefile(p1, p2): - """Safer equality for paths.""" - return shutil._samefile(syspath(p1), syspath(p2)) - - -def remove(path, soft=True): - """Remove the file. If `soft`, then no error will be raised if the - file does not exist. - """ - path = syspath(path) - if soft and not os.path.exists(path): - return - try: - os.remove(path) - except (OSError, IOError) as exc: - raise FilesystemError(exc, 'delete', (path,), traceback.format_exc()) - - -def copy(path, dest, replace=False): - """Copy a plain file. Permissions are not copied. If `dest` already - exists, raises a FilesystemError unless `replace` is True. Has no - effect if `path` is the same as `dest`. Paths are translated to - system paths before the syscall. - """ - if samefile(path, dest): - return - path = syspath(path) - dest = syspath(dest) - if not replace and os.path.exists(dest): - raise FilesystemError('file exists', 'copy', (path, dest)) - try: - shutil.copyfile(path, dest) - except (OSError, IOError) as exc: - raise FilesystemError(exc, 'copy', (path, dest), - traceback.format_exc()) - - -def move(path, dest, replace=False): - """Rename a file. `dest` may not be a directory. If `dest` already - exists, raises an OSError unless `replace` is True. Has no effect if - `path` is the same as `dest`. If the paths are on different - filesystems (or the rename otherwise fails), a copy is attempted - instead, in which case metadata will *not* be preserved. Paths are - translated to system paths. - """ - if samefile(path, dest): - return - path = syspath(path) - dest = syspath(dest) - if os.path.exists(dest) and not replace: - raise FilesystemError('file exists', 'rename', (path, dest), - traceback.format_exc()) - - # First, try renaming the file. - try: - os.rename(path, dest) - except OSError: - # Otherwise, copy and delete the original. - try: - shutil.copyfile(path, dest) - os.remove(path) - except (OSError, IOError) as exc: - raise FilesystemError(exc, 'move', (path, dest), - traceback.format_exc()) - - -def link(path, dest, replace=False): - """Create a symbolic link from path to `dest`. Raises an OSError if - `dest` already exists, unless `replace` is True. Does nothing if - `path` == `dest`.""" - if (samefile(path, dest)): - return - - path = syspath(path) - dest = syspath(dest) - if os.path.exists(dest) and not replace: - raise FilesystemError('file exists', 'rename', (path, dest), - traceback.format_exc()) - try: - os.symlink(path, dest) - except OSError: - raise FilesystemError('Operating system does not support symbolic ' - 'links.', 'link', (path, dest), - traceback.format_exc()) - - -def unique_path(path): - """Returns a version of ``path`` that does not exist on the - filesystem. Specifically, if ``path` itself already exists, then - something unique is appended to the path. - """ - if not os.path.exists(syspath(path)): - return path - - base, ext = os.path.splitext(path) - match = re.search(r'\.(\d)+$', base) - if match: - num = int(match.group(1)) - base = base[:match.start()] - else: - num = 0 - while True: - num += 1 - new_path = '%s.%i%s' % (base, num, ext) - if not os.path.exists(new_path): - return new_path - -# Note: The Windows "reserved characters" are, of course, allowed on -# Unix. They are forbidden here because they cause problems on Samba -# shares, which are sufficiently common as to cause frequent problems. -# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx -CHAR_REPLACE = [ - (re.compile(ur'[\\/]'), u'_'), # / and \ -- forbidden everywhere. - (re.compile(ur'^\.'), u'_'), # Leading dot (hidden files on Unix). - (re.compile(ur'[\x00-\x1f]'), u''), # Control characters. - (re.compile(ur'[<>:"\?\*\|]'), u'_'), # Windows "reserved characters". - (re.compile(ur'\.$'), u'_'), # Trailing dots. - (re.compile(ur'\s+$'), u''), # Trailing whitespace. -] - - -def sanitize_path(path, replacements=None): - """Takes a path (as a Unicode string) and makes sure that it is - legal. Returns a new path. Only works with fragments; won't work - reliably on Windows when a path begins with a drive letter. Path - separators (including altsep!) should already be cleaned from the - path components. If replacements is specified, it is used *instead* - of the default set of replacements; it must be a list of (compiled - regex, replacement string) pairs. - """ - replacements = replacements or CHAR_REPLACE - - comps = components(path) - if not comps: - return '' - for i, comp in enumerate(comps): - for regex, repl in replacements: - comp = regex.sub(repl, comp) - comps[i] = comp - return os.path.join(*comps) - - -def truncate_path(path, length=MAX_FILENAME_LENGTH): - """Given a bytestring path or a Unicode path fragment, truncate the - components to a legal length. In the last component, the extension - is preserved. - """ - comps = components(path) - - out = [c[:length] for c in comps] - base, ext = os.path.splitext(comps[-1]) - if ext: - # Last component has an extension. - base = base[:length - len(ext)] - out[-1] = base + ext - - return os.path.join(*out) - - -def str2bool(value): - """Returns a boolean reflecting a human-entered string.""" - if value.lower() in ('yes', '1', 'true', 't', 'y'): - return True - else: - return False - - -def as_string(value): - """Convert a value to a Unicode object for matching with a query. - None becomes the empty string. Bytestrings are silently decoded. - """ - if value is None: - return u'' - elif isinstance(value, buffer): - return str(value).decode('utf8', 'ignore') - elif isinstance(value, str): - return value.decode('utf8', 'ignore') - else: - return unicode(value) - - -def levenshtein(s1, s2): - """A nice DP edit distance implementation from Wikibooks: - http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/ - Levenshtein_distance#Python - """ - if len(s1) < len(s2): - return levenshtein(s2, s1) - if not s1: - return len(s2) - - previous_row = xrange(len(s2) + 1) - for i, c1 in enumerate(s1): - current_row = [i + 1] - for j, c2 in enumerate(s2): - insertions = previous_row[j + 1] + 1 - deletions = current_row[j] + 1 - substitutions = previous_row[j] + (c1 != c2) - current_row.append(min(insertions, deletions, substitutions)) - previous_row = current_row - - return previous_row[-1] - - -def plurality(objs): - """Given a sequence of comparable objects, returns the object that - is most common in the set and the frequency of that object. The - sequence must contain at least one object. - """ - # Calculate frequencies. - freqs = defaultdict(int) - for obj in objs: - freqs[obj] += 1 - - if not freqs: - raise ValueError('sequence must be non-empty') - - # Find object with maximum frequency. - max_freq = 0 - res = None - for obj, freq in freqs.items(): - if freq > max_freq: - max_freq = freq - res = obj - - return res, max_freq - - -def cpu_count(): - """Return the number of hardware thread contexts (cores or SMT - threads) in the system. - """ - # Adapted from the soundconverter project: - # https://github.com/kassoulet/soundconverter - if sys.platform == 'win32': - try: - num = int(os.environ['NUMBER_OF_PROCESSORS']) - except (ValueError, KeyError): - num = 0 - elif sys.platform == 'darwin': - try: - num = int(command_output(['sysctl', '-n', 'hw.ncpu'])) - except ValueError: - num = 0 - else: - try: - num = os.sysconf('SC_NPROCESSORS_ONLN') - except (ValueError, OSError, AttributeError): - num = 0 - if num >= 1: - return num - else: - return 1 - - -def command_output(cmd, shell=False): - """Runs the command and returns its output after it has exited. - - ``cmd`` is a list of arguments starting with the command names. If - ``shell`` is true, ``cmd`` is assumed to be a string and passed to a - shell to execute. - - If the process exits with a non-zero return code - ``subprocess.CalledProcessError`` is raised. May also raise - ``OSError``. - - This replaces `subprocess.check_output`, which isn't available in - Python 2.6 and which can have problems if lots of output is sent to - stderr. - """ - proc = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=platform.system() != 'Windows', - shell=shell - ) - stdout, stderr = proc.communicate() - if proc.returncode: - raise subprocess.CalledProcessError( - returncode=proc.returncode, - cmd=' '.join(cmd), - ) - return stdout - - -def max_filename_length(path, limit=MAX_FILENAME_LENGTH): - """Attempt to determine the maximum filename length for the - filesystem containing `path`. If the value is greater than `limit`, - then `limit` is used instead (to prevent errors when a filesystem - misreports its capacity). If it cannot be determined (e.g., on - Windows), return `limit`. - """ - if hasattr(os, 'statvfs'): - try: - res = os.statvfs(path) - except OSError: - return limit - return min(res[9], limit) - else: - return limit diff --git a/lib/beets/util/artresizer.py b/lib/beets/util/artresizer.py deleted file mode 100644 index f17fdc5b..00000000 --- a/lib/beets/util/artresizer.py +++ /dev/null @@ -1,211 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Fabrice Laporte -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Abstraction layer to resize images using PIL, ImageMagick, or a -public resizing proxy if neither is available. -""" -import urllib -import subprocess -import os -import re -from tempfile import NamedTemporaryFile -import logging -from beets import util - -# Resizing methods -PIL = 1 -IMAGEMAGICK = 2 -WEBPROXY = 3 - -PROXY_URL = 'http://images.weserv.nl/' - -log = logging.getLogger('beets') - - -def resize_url(url, maxwidth): - """Return a proxied image URL that resizes the original image to - maxwidth (preserving aspect ratio). - """ - return '{0}?{1}'.format(PROXY_URL, urllib.urlencode({ - 'url': url.replace('http://', ''), - 'w': str(maxwidth), - })) - - -def temp_file_for(path): - """Return an unused filename with the same extension as the - specified path. - """ - ext = os.path.splitext(path)[1] - with NamedTemporaryFile(suffix=ext, delete=False) as f: - return f.name - - -def pil_resize(maxwidth, path_in, path_out=None): - """Resize using Python Imaging Library (PIL). Return the output path - of resized image. - """ - path_out = path_out or temp_file_for(path_in) - from PIL import Image - log.debug(u'artresizer: PIL resizing {0} to {1}'.format( - util.displayable_path(path_in), util.displayable_path(path_out) - )) - - try: - im = Image.open(util.syspath(path_in)) - size = maxwidth, maxwidth - im.thumbnail(size, Image.ANTIALIAS) - im.save(path_out) - return path_out - except IOError: - log.error(u"PIL cannot create thumbnail for '{0}'".format( - util.displayable_path(path_in) - )) - return path_in - - -def im_resize(maxwidth, path_in, path_out=None): - """Resize using ImageMagick's ``convert`` tool. - Return the output path of resized image. - """ - path_out = path_out or temp_file_for(path_in) - log.debug(u'artresizer: ImageMagick resizing {0} to {1}'.format( - util.displayable_path(path_in), util.displayable_path(path_out) - )) - - # "-resize widthxheight>" shrinks images with dimension(s) larger - # than the corresponding width and/or height dimension(s). The > - # "only shrink" flag is prefixed by ^ escape char for Windows - # compatibility. - try: - util.command_output([ - 'convert', util.syspath(path_in), - '-resize', '{0}x^>'.format(maxwidth), path_out - ]) - except subprocess.CalledProcessError: - log.warn(u'artresizer: IM convert failed for {0}'.format( - util.displayable_path(path_in) - )) - return path_in - return path_out - - -BACKEND_FUNCS = { - PIL: pil_resize, - IMAGEMAGICK: im_resize, -} - - -class Shareable(type): - """A pseudo-singleton metaclass that allows both shared and - non-shared instances. The ``MyClass.shared`` property holds a - lazily-created shared instance of ``MyClass`` while calling - ``MyClass()`` to construct a new object works as usual. - """ - def __init__(cls, name, bases, dict): - super(Shareable, cls).__init__(name, bases, dict) - cls._instance = None - - @property - def shared(cls): - if cls._instance is None: - cls._instance = cls() - return cls._instance - - -class ArtResizer(object): - """A singleton class that performs image resizes. - """ - __metaclass__ = Shareable - - def __init__(self, method=None): - """Create a resizer object for the given method or, if none is - specified, with an inferred method. - """ - self.method = self._check_method(method) - log.debug(u"artresizer: method is {0}".format(self.method)) - self.can_compare = self._can_compare() - - def resize(self, maxwidth, path_in, path_out=None): - """Manipulate an image file according to the method, returning a - new path. For PIL or IMAGEMAGIC methods, resizes the image to a - temporary file. For WEBPROXY, returns `path_in` unmodified. - """ - if self.local: - func = BACKEND_FUNCS[self.method[0]] - return func(maxwidth, path_in, path_out) - else: - return path_in - - def proxy_url(self, maxwidth, url): - """Modifies an image URL according the method, returning a new - URL. For WEBPROXY, a URL on the proxy server is returned. - Otherwise, the URL is returned unmodified. - """ - if self.local: - return url - else: - return resize_url(url, maxwidth) - - @property - def local(self): - """A boolean indicating whether the resizing method is performed - locally (i.e., PIL or ImageMagick). - """ - return self.method[0] in BACKEND_FUNCS - - def _can_compare(self): - """A boolean indicating whether image comparison is available""" - - return self.method[0] == IMAGEMAGICK and self.method[1] > (6, 8, 7) - - @staticmethod - def _check_method(method=None): - """A tuple indicating whether current method is available and its - version. If no method is given, it returns a supported one. - """ - # Guess available method - if not method: - for m in [IMAGEMAGICK, PIL]: - _, version = ArtResizer._check_method(m) - if version: - return (m, version) - return (WEBPROXY, (0)) - - if method == IMAGEMAGICK: - - # Try invoking ImageMagick's "convert". - try: - out = util.command_output(['identify', '--version']) - - if 'imagemagick' in out.lower(): - pattern = r".+ (\d+)\.(\d+)\.(\d+).*" - match = re.search(pattern, out) - if match: - return (IMAGEMAGICK, - (int(match.group(1)), - int(match.group(2)), - int(match.group(3)))) - return (IMAGEMAGICK, (0)) - - except (subprocess.CalledProcessError, OSError): - return (IMAGEMAGICK, None) - - if method == PIL: - # Try importing PIL. - try: - __import__('PIL', fromlist=['Image']) - return (PIL, (0)) - except ImportError: - return (PIL, None) diff --git a/lib/beets/util/bluelet.py b/lib/beets/util/bluelet.py deleted file mode 100644 index a12ec945..00000000 --- a/lib/beets/util/bluelet.py +++ /dev/null @@ -1,647 +0,0 @@ -"""Extremely simple pure-Python implementation of coroutine-style -asynchronous socket I/O. Inspired by, but inferior to, Eventlet. -Bluelet can also be thought of as a less-terrible replacement for -asyncore. - -Bluelet: easy concurrency without all the messy parallelism. -""" -import socket -import select -import sys -import types -import errno -import traceback -import time -import collections - - -# A little bit of "six" (Python 2/3 compatibility): cope with PEP 3109 syntax -# changes. - -PY3 = sys.version_info[0] == 3 -if PY3: - def _reraise(typ, exc, tb): - raise exc.with_traceback(tb) -else: - exec(""" -def _reraise(typ, exc, tb): - raise typ, exc, tb -""") - - -# Basic events used for thread scheduling. - -class Event(object): - """Just a base class identifying Bluelet events. An event is an - object yielded from a Bluelet thread coroutine to suspend operation - and communicate with the scheduler. - """ - pass - - -class WaitableEvent(Event): - """A waitable event is one encapsulating an action that can be - waited for using a select() call. That is, it's an event with an - associated file descriptor. - """ - def waitables(self): - """Return "waitable" objects to pass to select(). Should return - three iterables for input readiness, output readiness, and - exceptional conditions (i.e., the three lists passed to - select()). - """ - return (), (), () - - def fire(self): - """Called when an associated file descriptor becomes ready - (i.e., is returned from a select() call). - """ - pass - - -class ValueEvent(Event): - """An event that does nothing but return a fixed value.""" - def __init__(self, value): - self.value = value - - -class ExceptionEvent(Event): - """Raise an exception at the yield point. Used internally.""" - def __init__(self, exc_info): - self.exc_info = exc_info - - -class SpawnEvent(Event): - """Add a new coroutine thread to the scheduler.""" - def __init__(self, coro): - self.spawned = coro - - -class JoinEvent(Event): - """Suspend the thread until the specified child thread has - completed. - """ - def __init__(self, child): - self.child = child - - -class KillEvent(Event): - """Unschedule a child thread.""" - def __init__(self, child): - self.child = child - - -class DelegationEvent(Event): - """Suspend execution of the current thread, start a new thread and, - once the child thread finished, return control to the parent - thread. - """ - def __init__(self, coro): - self.spawned = coro - - -class ReturnEvent(Event): - """Return a value the current thread's delegator at the point of - delegation. Ends the current (delegate) thread. - """ - def __init__(self, value): - self.value = value - - -class SleepEvent(WaitableEvent): - """Suspend the thread for a given duration. - """ - def __init__(self, duration): - self.wakeup_time = time.time() + duration - - def time_left(self): - return max(self.wakeup_time - time.time(), 0.0) - - -class ReadEvent(WaitableEvent): - """Reads from a file-like object.""" - def __init__(self, fd, bufsize): - self.fd = fd - self.bufsize = bufsize - - def waitables(self): - return (self.fd,), (), () - - def fire(self): - return self.fd.read(self.bufsize) - - -class WriteEvent(WaitableEvent): - """Writes to a file-like object.""" - def __init__(self, fd, data): - self.fd = fd - self.data = data - - def waitable(self): - return (), (self.fd,), () - - def fire(self): - self.fd.write(self.data) - - -# Core logic for executing and scheduling threads. - -def _event_select(events): - """Perform a select() over all the Events provided, returning the - ones ready to be fired. Only WaitableEvents (including SleepEvents) - matter here; all other events are ignored (and thus postponed). - """ - # Gather waitables and wakeup times. - waitable_to_event = {} - rlist, wlist, xlist = [], [], [] - earliest_wakeup = None - for event in events: - if isinstance(event, SleepEvent): - if not earliest_wakeup: - earliest_wakeup = event.wakeup_time - else: - earliest_wakeup = min(earliest_wakeup, event.wakeup_time) - elif isinstance(event, WaitableEvent): - r, w, x = event.waitables() - rlist += r - wlist += w - xlist += x - for waitable in r: - waitable_to_event[('r', waitable)] = event - for waitable in w: - waitable_to_event[('w', waitable)] = event - for waitable in x: - waitable_to_event[('x', waitable)] = event - - # If we have a any sleeping threads, determine how long to sleep. - if earliest_wakeup: - timeout = max(earliest_wakeup - time.time(), 0.0) - else: - timeout = None - - # Perform select() if we have any waitables. - if rlist or wlist or xlist: - rready, wready, xready = select.select(rlist, wlist, xlist, timeout) - else: - rready, wready, xready = (), (), () - if timeout: - time.sleep(timeout) - - # Gather ready events corresponding to the ready waitables. - ready_events = set() - for ready in rready: - ready_events.add(waitable_to_event[('r', ready)]) - for ready in wready: - ready_events.add(waitable_to_event[('w', ready)]) - for ready in xready: - ready_events.add(waitable_to_event[('x', ready)]) - - # Gather any finished sleeps. - for event in events: - if isinstance(event, SleepEvent) and event.time_left() == 0.0: - ready_events.add(event) - - return ready_events - - -class ThreadException(Exception): - def __init__(self, coro, exc_info): - self.coro = coro - self.exc_info = exc_info - - def reraise(self): - _reraise(self.exc_info[0], self.exc_info[1], self.exc_info[2]) - - -SUSPENDED = Event() # Special sentinel placeholder for suspended threads. - - -class Delegated(Event): - """Placeholder indicating that a thread has delegated execution to a - different thread. - """ - def __init__(self, child): - self.child = child - - -def run(root_coro): - """Schedules a coroutine, running it to completion. This - encapsulates the Bluelet scheduler, which the root coroutine can - add to by spawning new coroutines. - """ - # The "threads" dictionary keeps track of all the currently- - # executing and suspended coroutines. It maps coroutines to their - # currently "blocking" event. The event value may be SUSPENDED if - # the coroutine is waiting on some other condition: namely, a - # delegated coroutine or a joined coroutine. In this case, the - # coroutine should *also* appear as a value in one of the below - # dictionaries `delegators` or `joiners`. - threads = {root_coro: ValueEvent(None)} - - # Maps child coroutines to delegating parents. - delegators = {} - - # Maps child coroutines to joining (exit-waiting) parents. - joiners = collections.defaultdict(list) - - def complete_thread(coro, return_value): - """Remove a coroutine from the scheduling pool, awaking - delegators and joiners as necessary and returning the specified - value to any delegating parent. - """ - del threads[coro] - - # Resume delegator. - if coro in delegators: - threads[delegators[coro]] = ValueEvent(return_value) - del delegators[coro] - - # Resume joiners. - if coro in joiners: - for parent in joiners[coro]: - threads[parent] = ValueEvent(None) - del joiners[coro] - - def advance_thread(coro, value, is_exc=False): - """After an event is fired, run a given coroutine associated with - it in the threads dict until it yields again. If the coroutine - exits, then the thread is removed from the pool. If the coroutine - raises an exception, it is reraised in a ThreadException. If - is_exc is True, then the value must be an exc_info tuple and the - exception is thrown into the coroutine. - """ - try: - if is_exc: - next_event = coro.throw(*value) - else: - next_event = coro.send(value) - except StopIteration: - # Thread is done. - complete_thread(coro, None) - except: - # Thread raised some other exception. - del threads[coro] - raise ThreadException(coro, sys.exc_info()) - else: - if isinstance(next_event, types.GeneratorType): - # Automatically invoke sub-coroutines. (Shorthand for - # explicit bluelet.call().) - next_event = DelegationEvent(next_event) - threads[coro] = next_event - - def kill_thread(coro): - """Unschedule this thread and its (recursive) delegates. - """ - # Collect all coroutines in the delegation stack. - coros = [coro] - while isinstance(threads[coro], Delegated): - coro = threads[coro].child - coros.append(coro) - - # Complete each coroutine from the top to the bottom of the - # stack. - for coro in reversed(coros): - complete_thread(coro, None) - - # Continue advancing threads until root thread exits. - exit_te = None - while threads: - try: - # Look for events that can be run immediately. Continue - # running immediate events until nothing is ready. - while True: - have_ready = False - for coro, event in list(threads.items()): - if isinstance(event, SpawnEvent): - threads[event.spawned] = ValueEvent(None) # Spawn. - advance_thread(coro, None) - have_ready = True - elif isinstance(event, ValueEvent): - advance_thread(coro, event.value) - have_ready = True - elif isinstance(event, ExceptionEvent): - advance_thread(coro, event.exc_info, True) - have_ready = True - elif isinstance(event, DelegationEvent): - threads[coro] = Delegated(event.spawned) # Suspend. - threads[event.spawned] = ValueEvent(None) # Spawn. - delegators[event.spawned] = coro - have_ready = True - elif isinstance(event, ReturnEvent): - # Thread is done. - complete_thread(coro, event.value) - have_ready = True - elif isinstance(event, JoinEvent): - threads[coro] = SUSPENDED # Suspend. - joiners[event.child].append(coro) - have_ready = True - elif isinstance(event, KillEvent): - threads[coro] = ValueEvent(None) - kill_thread(event.child) - have_ready = True - - # Only start the select when nothing else is ready. - if not have_ready: - break - - # Wait and fire. - event2coro = dict((v, k) for k, v in threads.items()) - for event in _event_select(threads.values()): - # Run the IO operation, but catch socket errors. - try: - value = event.fire() - except socket.error as exc: - if isinstance(exc.args, tuple) and \ - exc.args[0] == errno.EPIPE: - # Broken pipe. Remote host disconnected. - pass - else: - traceback.print_exc() - # Abort the coroutine. - threads[event2coro[event]] = ReturnEvent(None) - else: - advance_thread(event2coro[event], value) - - except ThreadException as te: - # Exception raised from inside a thread. - event = ExceptionEvent(te.exc_info) - if te.coro in delegators: - # The thread is a delegate. Raise exception in its - # delegator. - threads[delegators[te.coro]] = event - del delegators[te.coro] - else: - # The thread is root-level. Raise in client code. - exit_te = te - break - - except: - # For instance, KeyboardInterrupt during select(). Raise - # into root thread and terminate others. - threads = {root_coro: ExceptionEvent(sys.exc_info())} - - # If any threads still remain, kill them. - for coro in threads: - coro.close() - - # If we're exiting with an exception, raise it in the client. - if exit_te: - exit_te.reraise() - - -# Sockets and their associated events. - -class SocketClosedError(Exception): - pass - - -class Listener(object): - """A socket wrapper object for listening sockets. - """ - def __init__(self, host, port): - """Create a listening socket on the given hostname and port. - """ - self._closed = False - self.host = host - self.port = port - self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - self.sock.bind((host, port)) - self.sock.listen(5) - - def accept(self): - """An event that waits for a connection on the listening socket. - When a connection is made, the event returns a Connection - object. - """ - if self._closed: - raise SocketClosedError() - return AcceptEvent(self) - - def close(self): - """Immediately close the listening socket. (Not an event.) - """ - self._closed = True - self.sock.close() - - -class Connection(object): - """A socket wrapper object for connected sockets. - """ - def __init__(self, sock, addr): - self.sock = sock - self.addr = addr - self._buf = b'' - self._closed = False - - def close(self): - """Close the connection.""" - self._closed = True - self.sock.close() - - def recv(self, size): - """Read at most size bytes of data from the socket.""" - if self._closed: - raise SocketClosedError() - - if self._buf: - # We already have data read previously. - out = self._buf[:size] - self._buf = self._buf[size:] - return ValueEvent(out) - else: - return ReceiveEvent(self, size) - - def send(self, data): - """Sends data on the socket, returning the number of bytes - successfully sent. - """ - if self._closed: - raise SocketClosedError() - return SendEvent(self, data) - - def sendall(self, data): - """Send all of data on the socket.""" - if self._closed: - raise SocketClosedError() - return SendEvent(self, data, True) - - def readline(self, terminator=b"\n", bufsize=1024): - """Reads a line (delimited by terminator) from the socket.""" - if self._closed: - raise SocketClosedError() - - while True: - if terminator in self._buf: - line, self._buf = self._buf.split(terminator, 1) - line += terminator - yield ReturnEvent(line) - break - data = yield ReceiveEvent(self, bufsize) - if data: - self._buf += data - else: - line = self._buf - self._buf = b'' - yield ReturnEvent(line) - break - - -class AcceptEvent(WaitableEvent): - """An event for Listener objects (listening sockets) that suspends - execution until the socket gets a connection. - """ - def __init__(self, listener): - self.listener = listener - - def waitables(self): - return (self.listener.sock,), (), () - - def fire(self): - sock, addr = self.listener.sock.accept() - return Connection(sock, addr) - - -class ReceiveEvent(WaitableEvent): - """An event for Connection objects (connected sockets) for - asynchronously reading data. - """ - def __init__(self, conn, bufsize): - self.conn = conn - self.bufsize = bufsize - - def waitables(self): - return (self.conn.sock,), (), () - - def fire(self): - return self.conn.sock.recv(self.bufsize) - - -class SendEvent(WaitableEvent): - """An event for Connection objects (connected sockets) for - asynchronously writing data. - """ - def __init__(self, conn, data, sendall=False): - self.conn = conn - self.data = data - self.sendall = sendall - - def waitables(self): - return (), (self.conn.sock,), () - - def fire(self): - if self.sendall: - return self.conn.sock.sendall(self.data) - else: - return self.conn.sock.send(self.data) - - -# Public interface for threads; each returns an event object that -# can immediately be "yield"ed. - -def null(): - """Event: yield to the scheduler without doing anything special. - """ - return ValueEvent(None) - - -def spawn(coro): - """Event: add another coroutine to the scheduler. Both the parent - and child coroutines run concurrently. - """ - if not isinstance(coro, types.GeneratorType): - raise ValueError('%s is not a coroutine' % str(coro)) - return SpawnEvent(coro) - - -def call(coro): - """Event: delegate to another coroutine. The current coroutine - is resumed once the sub-coroutine finishes. If the sub-coroutine - returns a value using end(), then this event returns that value. - """ - if not isinstance(coro, types.GeneratorType): - raise ValueError('%s is not a coroutine' % str(coro)) - return DelegationEvent(coro) - - -def end(value=None): - """Event: ends the coroutine and returns a value to its - delegator. - """ - return ReturnEvent(value) - - -def read(fd, bufsize=None): - """Event: read from a file descriptor asynchronously.""" - if bufsize is None: - # Read all. - def reader(): - buf = [] - while True: - data = yield read(fd, 1024) - if not data: - break - buf.append(data) - yield ReturnEvent(''.join(buf)) - return DelegationEvent(reader()) - - else: - return ReadEvent(fd, bufsize) - - -def write(fd, data): - """Event: write to a file descriptor asynchronously.""" - return WriteEvent(fd, data) - - -def connect(host, port): - """Event: connect to a network address and return a Connection - object for communicating on the socket. - """ - addr = (host, port) - sock = socket.create_connection(addr) - return ValueEvent(Connection(sock, addr)) - - -def sleep(duration): - """Event: suspend the thread for ``duration`` seconds. - """ - return SleepEvent(duration) - - -def join(coro): - """Suspend the thread until another, previously `spawn`ed thread - completes. - """ - return JoinEvent(coro) - - -def kill(coro): - """Halt the execution of a different `spawn`ed thread. - """ - return KillEvent(coro) - - -# Convenience function for running socket servers. - -def server(host, port, func): - """A coroutine that runs a network server. Host and port specify the - listening address. func should be a coroutine that takes a single - parameter, a Connection object. The coroutine is invoked for every - incoming connection on the listening socket. - """ - def handler(conn): - try: - yield func(conn) - finally: - conn.close() - - listener = Listener(host, port) - try: - while True: - conn = yield listener.accept() - yield spawn(handler(conn)) - except KeyboardInterrupt: - pass - finally: - listener.close() diff --git a/lib/beets/util/confit.py b/lib/beets/util/confit.py deleted file mode 100644 index de22e0ad..00000000 --- a/lib/beets/util/confit.py +++ /dev/null @@ -1,1281 +0,0 @@ -# This file is part of Confit. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Worry-free YAML configuration files. -""" -from __future__ import unicode_literals -import platform -import os -import pkgutil -import sys -import yaml -import types -import collections -import re -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict - -UNIX_DIR_VAR = 'XDG_CONFIG_HOME' -UNIX_DIR_FALLBACK = '~/.config' -WINDOWS_DIR_VAR = 'APPDATA' -WINDOWS_DIR_FALLBACK = '~\\AppData\\Roaming' -MAC_DIR = '~/Library/Application Support' - -CONFIG_FILENAME = 'config.yaml' -DEFAULT_FILENAME = 'config_default.yaml' -ROOT_NAME = 'root' - -YAML_TAB_PROBLEM = "found character '\\t' that cannot start any token" - - -# Utilities. - -PY3 = sys.version_info[0] == 3 -STRING = str if PY3 else unicode -BASESTRING = str if PY3 else basestring -NUMERIC_TYPES = (int, float) if PY3 else (int, float, long) -TYPE_TYPES = (type,) if PY3 else (type, types.ClassType) - - -def iter_first(sequence): - """Get the first element from an iterable or raise a ValueError if - the iterator generates no values. - """ - it = iter(sequence) - try: - if PY3: - return next(it) - else: - return it.next() - except StopIteration: - raise ValueError() - - -# Exceptions. - -class ConfigError(Exception): - """Base class for exceptions raised when querying a configuration. - """ - - -class NotFoundError(ConfigError): - """A requested value could not be found in the configuration trees. - """ - - -class ConfigValueError(ConfigError): - """The value in the configuration is illegal.""" - - -class ConfigTypeError(ConfigValueError): - """The value in the configuration did not match the expected type. - """ - - -class ConfigTemplateError(ConfigError): - """Base class for exceptions raised because of an invalid template. - """ - - -class ConfigReadError(ConfigError): - """A configuration file could not be read.""" - def __init__(self, filename, reason=None): - self.filename = filename - self.reason = reason - - message = 'file {0} could not be read'.format(filename) - if isinstance(reason, yaml.scanner.ScannerError) and \ - reason.problem == YAML_TAB_PROBLEM: - # Special-case error message for tab indentation in YAML markup. - message += ': found tab character at line {0}, column {1}'.format( - reason.problem_mark.line + 1, - reason.problem_mark.column + 1, - ) - elif reason: - # Generic error message uses exception's message. - message += ': {0}'.format(reason) - - super(ConfigReadError, self).__init__(message) - - -# Views and sources. - -class ConfigSource(dict): - """A dictionary augmented with metadata about the source of the - configuration. - """ - def __init__(self, value, filename=None, default=False): - super(ConfigSource, self).__init__(value) - if filename is not None and not isinstance(filename, BASESTRING): - raise TypeError('filename must be a string or None') - self.filename = filename - self.default = default - - def __repr__(self): - return 'ConfigSource({0}, {1}, {2})'.format( - super(ConfigSource, self).__repr__(), - repr(self.filename), - repr(self.default) - ) - - @classmethod - def of(self, value): - """Given either a dictionary or a `ConfigSource` object, return - a `ConfigSource` object. This lets a function accept either type - of object as an argument. - """ - if isinstance(value, ConfigSource): - return value - elif isinstance(value, dict): - return ConfigSource(value) - else: - raise TypeError('source value must be a dict') - - -class ConfigView(object): - """A configuration "view" is a query into a program's configuration - data. A view represents a hypothetical location in the configuration - tree; to extract the data from the location, a client typically - calls the ``view.get()`` method. The client can access children in - the tree (subviews) by subscripting the parent view (i.e., - ``view[key]``). - """ - - name = None - """The name of the view, depicting the path taken through the - configuration in Python-like syntax (e.g., ``foo['bar'][42]``). - """ - - def resolve(self): - """The core (internal) data retrieval method. Generates (value, - source) pairs for each source that contains a value for this - view. May raise ConfigTypeError if a type error occurs while - traversing a source. - """ - raise NotImplementedError - - def first(self): - """Return a (value, source) pair for the first object found for - this view. This amounts to the first element returned by - `resolve`. If no values are available, a NotFoundError is - raised. - """ - pairs = self.resolve() - try: - return iter_first(pairs) - except ValueError: - raise NotFoundError("{0} not found".format(self.name)) - - def exists(self): - """Determine whether the view has a setting in any source. - """ - try: - self.first() - except NotFoundError: - return False - return True - - def add(self, value): - """Set the *default* value for this configuration view. The - specified value is added as the lowest-priority configuration - data source. - """ - raise NotImplementedError - - def set(self, value): - """*Override* the value for this configuration view. The - specified value is added as the highest-priority configuration - data source. - """ - raise NotImplementedError - - def root(self): - """The RootView object from which this view is descended. - """ - raise NotImplementedError - - def __repr__(self): - return '' % self.name - - def __getitem__(self, key): - """Get a subview of this view.""" - return Subview(self, key) - - def __setitem__(self, key, value): - """Create an overlay source to assign a given key under this - view. - """ - self.set({key: value}) - - def __contains__(self, key): - return self[key].exists() - - def set_args(self, namespace): - """Overlay parsed command-line arguments, generated by a library - like argparse or optparse, onto this view's value. - """ - args = {} - for key, value in namespace.__dict__.items(): - if value is not None: # Avoid unset options. - args[key] = value - self.set(args) - - # Magical conversions. These special methods make it possible to use - # View objects somewhat transparently in certain circumstances. For - # example, rather than using ``view.get(bool)``, it's possible to - # just say ``bool(view)`` or use ``view`` in a conditional. - - def __str__(self): - """Gets the value for this view as a byte string.""" - return str(self.get()) - - def __unicode__(self): - """Gets the value for this view as a unicode string. (Python 2 - only.) - """ - return unicode(self.get()) - - def __nonzero__(self): - """Gets the value for this view as a boolean. (Python 2 only.) - """ - return self.__bool__() - - def __bool__(self): - """Gets the value for this view as a boolean. (Python 3 only.) - """ - return bool(self.get()) - - # Dictionary emulation methods. - - def keys(self): - """Returns a list containing all the keys available as subviews - of the current views. This enumerates all the keys in *all* - dictionaries matching the current view, in contrast to - ``view.get(dict).keys()``, which gets all the keys for the - *first* dict matching the view. If the object for this view in - any source is not a dict, then a ConfigTypeError is raised. The - keys are ordered according to how they appear in each source. - """ - keys = [] - - for dic, _ in self.resolve(): - try: - cur_keys = dic.keys() - except AttributeError: - raise ConfigTypeError( - '{0} must be a dict, not {1}'.format( - self.name, type(dic).__name__ - ) - ) - - for key in cur_keys: - if key not in keys: - keys.append(key) - - return keys - - def items(self): - """Iterates over (key, subview) pairs contained in dictionaries - from *all* sources at this view. If the object for this view in - any source is not a dict, then a ConfigTypeError is raised. - """ - for key in self.keys(): - yield key, self[key] - - def values(self): - """Iterates over all the subviews contained in dictionaries from - *all* sources at this view. If the object for this view in any - source is not a dict, then a ConfigTypeError is raised. - """ - for key in self.keys(): - yield self[key] - - # List/sequence emulation. - - def all_contents(self): - """Iterates over all subviews from collections at this view from - *all* sources. If the object for this view in any source is not - iterable, then a ConfigTypeError is raised. This method is - intended to be used when the view indicates a list; this method - will concatenate the contents of the list from all sources. - """ - for collection, _ in self.resolve(): - try: - it = iter(collection) - except TypeError: - raise ConfigTypeError( - '{0} must be an iterable, not {1}'.format( - self.name, type(collection).__name__ - ) - ) - for value in it: - yield value - - # Validation and conversion. - - def flatten(self): - """Create a hierarchy of OrderedDicts containing the data from - this view, recursively reifying all views to get their - represented values. - """ - od = OrderedDict() - for key, view in self.items(): - try: - od[key] = view.flatten() - except ConfigTypeError: - od[key] = view.get() - return od - - def get(self, template=None): - """Retrieve the value for this view according to the template. - - The `template` against which the values are checked can be - anything convertible to a `Template` using `as_template`. This - means you can pass in a default integer or string value, for - example, or a type to just check that something matches the type - you expect. - - May raise a `ConfigValueError` (or its subclass, - `ConfigTypeError`) or a `NotFoundError` when the configuration - doesn't satisfy the template. - """ - return as_template(template).value(self, template) - - # Old validation methods (deprecated). - - def as_filename(self): - return self.get(Filename()) - - def as_choice(self, choices): - return self.get(Choice(choices)) - - def as_number(self): - return self.get(Number()) - - def as_str_seq(self): - return self.get(StrSeq()) - - -class RootView(ConfigView): - """The base of a view hierarchy. This view keeps track of the - sources that may be accessed by subviews. - """ - def __init__(self, sources): - """Create a configuration hierarchy for a list of sources. At - least one source must be provided. The first source in the list - has the highest priority. - """ - self.sources = list(sources) - self.name = ROOT_NAME - - def add(self, obj): - self.sources.append(ConfigSource.of(obj)) - - def set(self, value): - self.sources.insert(0, ConfigSource.of(value)) - - def resolve(self): - return ((dict(s), s) for s in self.sources) - - def clear(self): - """Remove all sources from this configuration.""" - del self.sources[:] - - def root(self): - return self - - -class Subview(ConfigView): - """A subview accessed via a subscript of a parent view.""" - def __init__(self, parent, key): - """Make a subview of a parent view for a given subscript key. - """ - self.parent = parent - self.key = key - - # Choose a human-readable name for this view. - if isinstance(self.parent, RootView): - self.name = '' - else: - self.name = self.parent.name - if not isinstance(self.key, int): - self.name += '.' - if isinstance(self.key, int): - self.name += '#{0}'.format(self.key) - elif isinstance(self.key, BASESTRING): - self.name += '{0}'.format(self.key) - else: - self.name += '{0}'.format(repr(self.key)) - - def resolve(self): - for collection, source in self.parent.resolve(): - try: - value = collection[self.key] - except IndexError: - # List index out of bounds. - continue - except KeyError: - # Dict key does not exist. - continue - except TypeError: - # Not subscriptable. - raise ConfigTypeError( - "{0} must be a collection, not {1}".format( - self.parent.name, type(collection).__name__ - ) - ) - yield value, source - - def set(self, value): - self.parent.set({self.key: value}) - - def add(self, value): - self.parent.add({self.key: value}) - - def root(self): - return self.parent.root() - - -# Config file paths, including platform-specific paths and in-package -# defaults. - -# Based on get_root_path from Flask by Armin Ronacher. -def _package_path(name): - """Returns the path to the package containing the named module or - None if the path could not be identified (e.g., if - ``name == "__main__"``). - """ - loader = pkgutil.get_loader(name) - if loader is None or name == '__main__': - return None - - if hasattr(loader, 'get_filename'): - filepath = loader.get_filename(name) - else: - # Fall back to importing the specified module. - __import__(name) - filepath = sys.modules[name].__file__ - - return os.path.dirname(os.path.abspath(filepath)) - - -def config_dirs(): - """Return a platform-specific list of candidates for user - configuration directories on the system. - - The candidates are in order of priority, from highest to lowest. The - last element is the "fallback" location to be used when no - higher-priority config file exists. - """ - paths = [] - - if platform.system() == 'Darwin': - paths.append(MAC_DIR) - paths.append(UNIX_DIR_FALLBACK) - if UNIX_DIR_VAR in os.environ: - paths.append(os.environ[UNIX_DIR_VAR]) - - elif platform.system() == 'Windows': - paths.append(WINDOWS_DIR_FALLBACK) - if WINDOWS_DIR_VAR in os.environ: - paths.append(os.environ[WINDOWS_DIR_VAR]) - - else: - # Assume Unix. - paths.append(UNIX_DIR_FALLBACK) - if UNIX_DIR_VAR in os.environ: - paths.append(os.environ[UNIX_DIR_VAR]) - - # Expand and deduplicate paths. - out = [] - for path in paths: - path = os.path.abspath(os.path.expanduser(path)) - if path not in out: - out.append(path) - return out - - -# YAML loading. - -class Loader(yaml.SafeLoader): - """A customized YAML loader. This loader deviates from the official - YAML spec in a few convenient ways: - - - All strings as are Unicode objects. - - All maps are OrderedDicts. - - Strings can begin with % without quotation. - """ - # All strings should be Unicode objects, regardless of contents. - def _construct_unicode(self, node): - return self.construct_scalar(node) - - # Use ordered dictionaries for every YAML map. - # From https://gist.github.com/844388 - def construct_yaml_map(self, node): - data = OrderedDict() - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_mapping(self, node, deep=False): - if isinstance(node, yaml.MappingNode): - self.flatten_mapping(node) - else: - raise yaml.constructor.ConstructorError( - None, None, - 'expected a mapping node, but found %s' % node.id, - node.start_mark - ) - - mapping = OrderedDict() - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - try: - hash(key) - except TypeError as exc: - raise yaml.constructor.ConstructorError( - 'while constructing a mapping', - node.start_mark, 'found unacceptable key (%s)' % exc, - key_node.start_mark - ) - value = self.construct_object(value_node, deep=deep) - mapping[key] = value - return mapping - - # Allow bare strings to begin with %. Directives are still detected. - def check_plain(self): - plain = super(Loader, self).check_plain() - return plain or self.peek() == '%' - - -Loader.add_constructor('tag:yaml.org,2002:str', Loader._construct_unicode) -Loader.add_constructor('tag:yaml.org,2002:map', Loader.construct_yaml_map) -Loader.add_constructor('tag:yaml.org,2002:omap', Loader.construct_yaml_map) - - -def load_yaml(filename): - """Read a YAML document from a file. If the file cannot be read or - parsed, a ConfigReadError is raised. - """ - try: - with open(filename, 'r') as f: - return yaml.load(f, Loader=Loader) - except (IOError, yaml.error.YAMLError) as exc: - raise ConfigReadError(filename, exc) - - -# YAML dumping. - -class Dumper(yaml.SafeDumper): - """A PyYAML Dumper that represents OrderedDicts as ordinary mappings - (in order, of course). - """ - # From http://pyyaml.org/attachment/ticket/161/use_ordered_dict.py - def represent_mapping(self, tag, mapping, flow_style=None): - value = [] - node = yaml.MappingNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = False - if hasattr(mapping, 'items'): - mapping = list(mapping.items()) - for item_key, item_value in mapping: - node_key = self.represent_data(item_key) - node_value = self.represent_data(item_value) - if not (isinstance(node_key, yaml.ScalarNode) - and not node_key.style): - best_style = False - if not (isinstance(node_value, yaml.ScalarNode) - and not node_value.style): - best_style = False - value.append((node_key, node_value)) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_list(self, data): - """If a list has less than 4 items, represent it in inline style - (i.e. comma separated, within square brackets). - """ - node = super(Dumper, self).represent_list(data) - length = len(data) - if self.default_flow_style is None and length < 4: - node.flow_style = True - elif self.default_flow_style is None: - node.flow_style = False - return node - - def represent_bool(self, data): - """Represent bool as 'yes' or 'no' instead of 'true' or 'false'. - """ - if data: - value = 'yes' - else: - value = 'no' - return self.represent_scalar('tag:yaml.org,2002:bool', value) - - def represent_none(self, data): - """Represent a None value with nothing instead of 'none'. - """ - return self.represent_scalar('tag:yaml.org,2002:null', '') - - -Dumper.add_representer(OrderedDict, Dumper.represent_dict) -Dumper.add_representer(bool, Dumper.represent_bool) -Dumper.add_representer(type(None), Dumper.represent_none) -Dumper.add_representer(list, Dumper.represent_list) - - -def restore_yaml_comments(data, default_data): - """Scan default_data for comments (we include empty lines in our - definition of comments) and place them before the same keys in data. - Only works with comments that are on one or more own lines, i.e. - not next to a yaml mapping. - """ - comment_map = dict() - default_lines = iter(default_data.splitlines()) - for line in default_lines: - if not line: - comment = "\n" - elif line.startswith("#"): - comment = "{0}\n".format(line) - else: - continue - while True: - line = next(default_lines) - if line and not line.startswith("#"): - break - comment += "{0}\n".format(line) - key = line.split(':')[0].strip() - comment_map[key] = comment - out_lines = iter(data.splitlines()) - out_data = "" - for line in out_lines: - key = line.split(':')[0].strip() - if key in comment_map: - out_data += comment_map[key] - out_data += "{0}\n".format(line) - return out_data - - -# Main interface. - -class Configuration(RootView): - def __init__(self, appname, modname=None, read=True): - """Create a configuration object by reading the - automatically-discovered config files for the application for a - given name. If `modname` is specified, it should be the import - name of a module whose package will be searched for a default - config file. (Otherwise, no defaults are used.) Pass `False` for - `read` to disable automatic reading of all discovered - configuration files. Use this when creating a configuration - object at module load time and then call the `read` method - later. - """ - super(Configuration, self).__init__([]) - self.appname = appname - self.modname = modname - - self._env_var = '{0}DIR'.format(self.appname.upper()) - - if read: - self.read() - - def user_config_path(self): - """Points to the location of the user configuration. - - The file may not exist. - """ - return os.path.join(self.config_dir(), CONFIG_FILENAME) - - def _add_user_source(self): - """Add the configuration options from the YAML file in the - user's configuration directory (given by `config_dir`) if it - exists. - """ - filename = self.user_config_path() - if os.path.isfile(filename): - self.add(ConfigSource(load_yaml(filename) or {}, filename)) - - def _add_default_source(self): - """Add the package's default configuration settings. This looks - for a YAML file located inside the package for the module - `modname` if it was given. - """ - if self.modname: - pkg_path = _package_path(self.modname) - if pkg_path: - filename = os.path.join(pkg_path, DEFAULT_FILENAME) - if os.path.isfile(filename): - self.add(ConfigSource(load_yaml(filename), filename, True)) - - def read(self, user=True, defaults=True): - """Find and read the files for this configuration and set them - as the sources for this configuration. To disable either - discovered user configuration files or the in-package defaults, - set `user` or `defaults` to `False`. - """ - if user: - self._add_user_source() - if defaults: - self._add_default_source() - - def config_dir(self): - """Get the path to the user configuration directory. The - directory is guaranteed to exist as a postcondition (one may be - created if none exist). - - If the application's ``...DIR`` environment variable is set, it - is used as the configuration directory. Otherwise, - platform-specific standard configuration locations are searched - for a ``config.yaml`` file. If no configuration file is found, a - fallback path is used. - """ - # If environment variable is set, use it. - if self._env_var in os.environ: - appdir = os.environ[self._env_var] - appdir = os.path.abspath(os.path.expanduser(appdir)) - if os.path.isfile(appdir): - raise ConfigError('{0} must be a directory'.format( - self._env_var - )) - - else: - # Search platform-specific locations. If no config file is - # found, fall back to the final directory in the list. - for confdir in config_dirs(): - appdir = os.path.join(confdir, self.appname) - if os.path.isfile(os.path.join(appdir, CONFIG_FILENAME)): - break - - # Ensure that the directory exists. - if not os.path.isdir(appdir): - os.makedirs(appdir) - return appdir - - def set_file(self, filename): - """Parses the file as YAML and inserts it into the configuration - sources with highest priority. - """ - filename = os.path.abspath(filename) - self.set(ConfigSource(load_yaml(filename), filename)) - - def dump(self, full=True): - """Dump the Configuration object to a YAML file. - - The order of the keys is determined from the default - configuration file. All keys not in the default configuration - will be appended to the end of the file. - - :param filename: The file to dump the configuration to, or None - if the YAML string should be returned instead - :type filename: unicode - :param full: Dump settings that don't differ from the defaults - as well - """ - if full: - out_dict = self.flatten() - else: - # Exclude defaults when flattening. - sources = [s for s in self.sources if not s.default] - out_dict = RootView(sources).flatten() - - yaml_out = yaml.dump(out_dict, Dumper=Dumper, - default_flow_style=None, indent=4, - width=1000) - - # Restore comments to the YAML text. - default_source = None - for source in self.sources: - if source.default: - default_source = source - break - if default_source: - with open(default_source.filename, 'r') as fp: - default_data = fp.read() - yaml_out = restore_yaml_comments(yaml_out, default_data) - - return yaml_out - - -class LazyConfig(Configuration): - """A Configuration at reads files on demand when it is first - accessed. This is appropriate for using as a global config object at - the module level. - """ - def __init__(self, appname, modname=None): - super(LazyConfig, self).__init__(appname, modname, False) - self._materialized = False # Have we read the files yet? - self._lazy_prefix = [] # Pre-materialization calls to set(). - self._lazy_suffix = [] # Calls to add(). - - def read(self, user=True, defaults=True): - self._materialized = True - super(LazyConfig, self).read(user, defaults) - - def resolve(self): - if not self._materialized: - # Read files and unspool buffers. - self.read() - self.sources += self._lazy_suffix - self.sources[:0] = self._lazy_prefix - return super(LazyConfig, self).resolve() - - def add(self, value): - super(LazyConfig, self).add(value) - if not self._materialized: - # Buffer additions to end. - self._lazy_suffix += self.sources - del self.sources[:] - - def set(self, value): - super(LazyConfig, self).set(value) - if not self._materialized: - # Buffer additions to beginning. - self._lazy_prefix[:0] = self.sources - del self.sources[:] - - def clear(self): - """Remove all sources from this configuration.""" - del self.sources[:] - self._lazy_suffix = [] - self._lazy_prefix = [] - - -# "Validated" configuration views: experimental! - - -REQUIRED = object() -"""A sentinel indicating that there is no default value and an exception -should be raised when the value is missing. -""" - - -class Template(object): - """A value template for configuration fields. - - The template works like a type and instructs Confit about how to - interpret a deserialized YAML value. This includes type conversions, - providing a default value, and validating for errors. For example, a - filepath type might expand tildes and check that the file exists. - """ - def __init__(self, default=REQUIRED): - """Create a template with a given default value. - - If `default` is the sentinel `REQUIRED` (as it is by default), - then an error will be raised when a value is missing. Otherwise, - missing values will instead return `default`. - """ - self.default = default - - def __call__(self, view): - """Invoking a template on a view gets the view's value according - to the template. - """ - return self.value(view, self) - - def value(self, view, template=None): - """Get the value for a `ConfigView`. - - May raise a `NotFoundError` if the value is missing (and the - template requires it) or a `ConfigValueError` for invalid values. - """ - if view.exists(): - value, _ = view.first() - return self.convert(value, view) - elif self.default is REQUIRED: - # Missing required value. This is an error. - raise NotFoundError("{0} not found".format(view.name)) - else: - # Missing value, but not required. - return self.default - - def convert(self, value, view): - """Convert the YAML-deserialized value to a value of the desired - type. - - Subclasses should override this to provide useful conversions. - May raise a `ConfigValueError` when the configuration is wrong. - """ - # Default implementation does no conversion. - return value - - def fail(self, message, view, type_error=False): - """Raise an exception indicating that a value cannot be - accepted. - - `type_error` indicates whether the error is due to a type - mismatch rather than a malformed value. In this case, a more - specific exception is raised. - """ - exc_class = ConfigTypeError if type_error else ConfigValueError - raise exc_class( - '{0}: {1}'.format(view.name, message) - ) - - def __repr__(self): - return '{0}({1})'.format( - type(self).__name__, - '' if self.default is REQUIRED else repr(self.default), - ) - - -class Integer(Template): - """An integer configuration value template. - """ - def convert(self, value, view): - """Check that the value is an integer. Floats are rounded. - """ - if isinstance(value, int): - return value - elif isinstance(value, float): - return int(value) - else: - self.fail('must be a number', view, True) - - -class Number(Template): - """A numeric type: either an integer or a floating-point number. - """ - def convert(self, value, view): - """Check that the value is an int or a float. - """ - if isinstance(value, NUMERIC_TYPES): - return value - else: - self.fail( - 'must be numeric, not {0}'.format(type(value).__name__), - view, - True - ) - - -class MappingTemplate(Template): - """A template that uses a dictionary to specify other types for the - values for a set of keys and produce a validated `AttrDict`. - """ - def __init__(self, mapping): - """Create a template according to a dict (mapping). The - mapping's values should themselves either be Types or - convertible to Types. - """ - subtemplates = {} - for key, typ in mapping.items(): - subtemplates[key] = as_template(typ) - self.subtemplates = subtemplates - - def value(self, view, template=None): - """Get a dict with the same keys as the template and values - validated according to the value types. - """ - out = AttrDict() - for key, typ in self.subtemplates.items(): - out[key] = typ.value(view[key], self) - return out - - def __repr__(self): - return 'MappingTemplate({0})'.format(repr(self.subtemplates)) - - -class String(Template): - """A string configuration value template. - """ - def __init__(self, default=REQUIRED, pattern=None): - """Create a template with the added optional `pattern` argument, - a regular expression string that the value should match. - """ - super(String, self).__init__(default) - self.pattern = pattern - if pattern: - self.regex = re.compile(pattern) - - def convert(self, value, view): - """Check that the value is a string and matches the pattern. - """ - if isinstance(value, BASESTRING): - if self.pattern and not self.regex.match(value): - self.fail( - "must match the pattern {0}".format(self.pattern), - view - ) - return value - else: - self.fail('must be a string', view, True) - - -class Choice(Template): - """A template that permits values from a sequence of choices. - """ - def __init__(self, choices): - """Create a template that validates any of the values from the - iterable `choices`. - - If `choices` is a map, then the corresponding value is emitted. - Otherwise, the value itself is emitted. - """ - self.choices = choices - - def convert(self, value, view): - """Ensure that the value is among the choices (and remap if the - choices are a mapping). - """ - if value not in self.choices: - self.fail( - 'must be one of {0}, not {1}'.format( - repr(list(self.choices)), repr(value) - ), - view - ) - - if isinstance(self.choices, collections.Mapping): - return self.choices[value] - else: - return value - - def __repr__(self): - return 'Choice({0!r})'.format(self.choices) - - -class StrSeq(Template): - """A template for values that are lists of strings. - - Validates both actual YAML string lists and single strings. Strings - can optionally be split on whitespace. - """ - def __init__(self, split=True): - """Create a new template. - - `split` indicates whether, when the underlying value is a single - string, it should be split on whitespace. Otherwise, the - resulting value is a list containing a single string. - """ - super(StrSeq, self).__init__() - self.split = split - - def convert(self, value, view): - if isinstance(value, bytes): - value = value.decode('utf8', 'ignore') - - if isinstance(value, STRING): - if self.split: - return value.split() - else: - return [value] - - try: - value = list(value) - except TypeError: - self.fail('must be a whitespace-separated string or a list', - view, True) - - def convert(x): - if isinstance(x, unicode): - return x - elif isinstance(x, BASESTRING): - return x.decode('utf8', 'ignore') - else: - self.fail('must be a list of strings', view, True) - return map(convert, value) - - -class Filename(Template): - """A template that validates strings as filenames. - - Filenames are returned as absolute, tilde-free paths. - - Relative paths are relative to the template's `cwd` argument - when it is specified, then the configuration directory (see - the `config_dir` method) if they come from a file. Otherwise, - they are relative to the current working directory. This helps - attain the expected behavior when using command-line options. - """ - def __init__(self, default=REQUIRED, cwd=None, relative_to=None, - in_app_dir=False): - """ `relative_to` is the name of a sibling value that is - being validated at the same time. - - `in_app_dir` indicates whether the path should be resolved - inside the application's config directory (even when the setting - does not come from a file). - """ - super(Filename, self).__init__(default) - self.cwd = cwd - self.relative_to = relative_to - self.in_app_dir = in_app_dir - - def __repr__(self): - args = [] - - if self.default is not REQUIRED: - args.append(repr(self.default)) - - if self.cwd is not None: - args.append('cwd=' + repr(self.cwd)) - - if self.relative_to is not None: - args.append('relative_to=' + repr(self.relative_to)) - - if self.in_app_dir: - args.append('in_app_dir=True') - - return 'Filename({0})'.format(', '.join(args)) - - def resolve_relative_to(self, view, template): - if not isinstance(template, (collections.Mapping, MappingTemplate)): - # disallow config.get(Filename(relative_to='foo')) - raise ConfigTemplateError( - 'relative_to may only be used when getting multiple values.' - ) - - elif self.relative_to == view.key: - raise ConfigTemplateError( - '{0} is relative to itself'.format(view.name) - ) - - elif self.relative_to not in view.parent.keys(): - # self.relative_to is not in the config - self.fail( - ( - 'needs sibling value "{0}" to expand relative path' - ).format(self.relative_to), - view - ) - - old_template = {} - old_template.update(template.subtemplates) - - # save time by skipping MappingTemplate's init loop - next_template = MappingTemplate({}) - next_relative = self.relative_to - - # gather all the needed templates and nothing else - while next_relative is not None: - try: - # pop to avoid infinite loop because of recursive - # relative paths - rel_to_template = old_template.pop(next_relative) - except KeyError: - if next_relative in template.subtemplates: - # we encountered this config key previously - raise ConfigTemplateError(( - '{0} and {1} are recursively relative' - ).format(view.name, self.relative_to)) - else: - raise ConfigTemplateError(( - 'missing template for {0}, needed to expand {1}\'s' + - 'relative path' - ).format(self.relative_to, view.name)) - - next_template.subtemplates[next_relative] = rel_to_template - next_relative = rel_to_template.relative_to - - return view.parent.get(next_template)[self.relative_to] - - def value(self, view, template=None): - path, source = view.first() - if not isinstance(path, BASESTRING): - self.fail( - 'must be a filename, not {0}'.format(type(path).__name__), - view, - True - ) - path = os.path.expanduser(STRING(path)) - - if not os.path.isabs(path): - if self.cwd is not None: - # relative to the template's argument - path = os.path.join(self.cwd, path) - - elif self.relative_to is not None: - path = os.path.join( - self.resolve_relative_to(view, template), - path, - ) - - elif source.filename or self.in_app_dir: - # From defaults: relative to the app's directory. - path = os.path.join(view.root().config_dir(), path) - - return os.path.abspath(path) - - -class TypeTemplate(Template): - """A simple template that checks that a value is an instance of a - desired Python type. - """ - def __init__(self, typ, default=REQUIRED): - """Create a template that checks that the value is an instance - of `typ`. - """ - super(TypeTemplate, self).__init__(default) - self.typ = typ - - def convert(self, value, view): - if not isinstance(value, self.typ): - self.fail( - 'must be a {0}, not {1}'.format( - self.typ.__name__, - type(value).__name__, - ), - view, - True - ) - return value - - -class AttrDict(dict): - """A `dict` subclass that can be accessed via attributes (dot - notation) for convenience. - """ - def __getattr__(self, key): - if key in self: - return self[key] - else: - raise AttributeError(key) - - -def as_template(value): - """Convert a simple "shorthand" Python value to a `Template`. - """ - if isinstance(value, Template): - # If it's already a Template, pass it through. - return value - elif isinstance(value, collections.Mapping): - # Dictionaries work as templates. - return MappingTemplate(value) - elif value is int: - return Integer() - elif isinstance(value, int): - return Integer(value) - elif isinstance(value, type) and issubclass(value, BASESTRING): - return String() - elif isinstance(value, BASESTRING): - return String(value) - elif value is float: - return Number() - elif value is None: - return Template() - elif value is dict: - return TypeTemplate(collections.Mapping) - elif value is list: - return TypeTemplate(collections.Sequence) - elif isinstance(value, type): - return TypeTemplate(value) - else: - raise ValueError('cannot convert to template: {0!r}'.format(value)) diff --git a/lib/beets/util/enumeration.py b/lib/beets/util/enumeration.py deleted file mode 100644 index e8cd0fe1..00000000 --- a/lib/beets/util/enumeration.py +++ /dev/null @@ -1,40 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -from enum import Enum - - -class OrderedEnum(Enum): - """ - An Enum subclass that allows comparison of members. - """ - def __ge__(self, other): - if self.__class__ is other.__class__: - return self.value >= other.value - return NotImplemented - - def __gt__(self, other): - if self.__class__ is other.__class__: - return self.value > other.value - return NotImplemented - - def __le__(self, other): - if self.__class__ is other.__class__: - return self.value <= other.value - return NotImplemented - - def __lt__(self, other): - if self.__class__ is other.__class__: - return self.value < other.value - return NotImplemented diff --git a/lib/beets/util/functemplate.py b/lib/beets/util/functemplate.py deleted file mode 100644 index 03e57c61..00000000 --- a/lib/beets/util/functemplate.py +++ /dev/null @@ -1,571 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""This module implements a string formatter based on the standard PEP -292 string.Template class extended with function calls. Variables, as -with string.Template, are indicated with $ and functions are delimited -with %. - -This module assumes that everything is Unicode: the template and the -substitution values. Bytestrings are not supported. Also, the templates -always behave like the ``safe_substitute`` method in the standard -library: unknown symbols are left intact. - -This is sort of like a tiny, horrible degeneration of a real templating -engine like Jinja2 or Mustache. -""" -from __future__ import print_function - -import re -import ast -import dis -import types - -SYMBOL_DELIM = u'$' -FUNC_DELIM = u'%' -GROUP_OPEN = u'{' -GROUP_CLOSE = u'}' -ARG_SEP = u',' -ESCAPE_CHAR = u'$' - -VARIABLE_PREFIX = '__var_' -FUNCTION_PREFIX = '__func_' - - -class Environment(object): - """Contains the values and functions to be substituted into a - template. - """ - def __init__(self, values, functions): - self.values = values - self.functions = functions - - -# Code generation helpers. - -def ex_lvalue(name): - """A variable load expression.""" - return ast.Name(name, ast.Store()) - - -def ex_rvalue(name): - """A variable store expression.""" - return ast.Name(name, ast.Load()) - - -def ex_literal(val): - """An int, float, long, bool, string, or None literal with the given - value. - """ - if val is None: - return ast.Name('None', ast.Load()) - elif isinstance(val, (int, float, long)): - return ast.Num(val) - elif isinstance(val, bool): - return ast.Name(str(val), ast.Load()) - elif isinstance(val, basestring): - return ast.Str(val) - raise TypeError('no literal for {0}'.format(type(val))) - - -def ex_varassign(name, expr): - """Assign an expression into a single variable. The expression may - either be an `ast.expr` object or a value to be used as a literal. - """ - if not isinstance(expr, ast.expr): - expr = ex_literal(expr) - return ast.Assign([ex_lvalue(name)], expr) - - -def ex_call(func, args): - """A function-call expression with only positional parameters. The - function may be an expression or the name of a function. Each - argument may be an expression or a value to be used as a literal. - """ - if isinstance(func, basestring): - func = ex_rvalue(func) - - args = list(args) - for i in range(len(args)): - if not isinstance(args[i], ast.expr): - args[i] = ex_literal(args[i]) - - return ast.Call(func, args, [], None, None) - - -def compile_func(arg_names, statements, name='_the_func', debug=False): - """Compile a list of statements as the body of a function and return - the resulting Python function. If `debug`, then print out the - bytecode of the compiled function. - """ - func_def = ast.FunctionDef( - name, - ast.arguments( - [ast.Name(n, ast.Param()) for n in arg_names], - None, None, - [ex_literal(None) for _ in arg_names], - ), - statements, - [], - ) - mod = ast.Module([func_def]) - ast.fix_missing_locations(mod) - - prog = compile(mod, '', 'exec') - - # Debug: show bytecode. - if debug: - dis.dis(prog) - for const in prog.co_consts: - if isinstance(const, types.CodeType): - dis.dis(const) - - the_locals = {} - exec prog in {}, the_locals - return the_locals[name] - - -# AST nodes for the template language. - -class Symbol(object): - """A variable-substitution symbol in a template.""" - def __init__(self, ident, original): - self.ident = ident - self.original = original - - def __repr__(self): - return u'Symbol(%s)' % repr(self.ident) - - def evaluate(self, env): - """Evaluate the symbol in the environment, returning a Unicode - string. - """ - if self.ident in env.values: - # Substitute for a value. - return env.values[self.ident] - else: - # Keep original text. - return self.original - - def translate(self): - """Compile the variable lookup.""" - expr = ex_rvalue(VARIABLE_PREFIX + self.ident.encode('utf8')) - return [expr], set([self.ident.encode('utf8')]), set() - - -class Call(object): - """A function call in a template.""" - def __init__(self, ident, args, original): - self.ident = ident - self.args = args - self.original = original - - def __repr__(self): - return u'Call(%s, %s, %s)' % (repr(self.ident), repr(self.args), - repr(self.original)) - - def evaluate(self, env): - """Evaluate the function call in the environment, returning a - Unicode string. - """ - if self.ident in env.functions: - arg_vals = [expr.evaluate(env) for expr in self.args] - try: - out = env.functions[self.ident](*arg_vals) - except Exception as exc: - # Function raised exception! Maybe inlining the name of - # the exception will help debug. - return u'<%s>' % unicode(exc) - return unicode(out) - else: - return self.original - - def translate(self): - """Compile the function call.""" - varnames = set() - funcnames = set([self.ident.encode('utf8')]) - - arg_exprs = [] - for arg in self.args: - subexprs, subvars, subfuncs = arg.translate() - varnames.update(subvars) - funcnames.update(subfuncs) - - # Create a subexpression that joins the result components of - # the arguments. - arg_exprs.append(ex_call( - ast.Attribute(ex_literal(u''), 'join', ast.Load()), - [ex_call( - 'map', - [ - ex_rvalue('unicode'), - ast.List(subexprs, ast.Load()), - ] - )], - )) - - subexpr_call = ex_call( - FUNCTION_PREFIX + self.ident.encode('utf8'), - arg_exprs - ) - return [subexpr_call], varnames, funcnames - - -class Expression(object): - """Top-level template construct: contains a list of text blobs, - Symbols, and Calls. - """ - def __init__(self, parts): - self.parts = parts - - def __repr__(self): - return u'Expression(%s)' % (repr(self.parts)) - - def evaluate(self, env): - """Evaluate the entire expression in the environment, returning - a Unicode string. - """ - out = [] - for part in self.parts: - if isinstance(part, basestring): - out.append(part) - else: - out.append(part.evaluate(env)) - return u''.join(map(unicode, out)) - - def translate(self): - """Compile the expression to a list of Python AST expressions, a - set of variable names used, and a set of function names. - """ - expressions = [] - varnames = set() - funcnames = set() - for part in self.parts: - if isinstance(part, basestring): - expressions.append(ex_literal(part)) - else: - e, v, f = part.translate() - expressions.extend(e) - varnames.update(v) - funcnames.update(f) - return expressions, varnames, funcnames - - -# Parser. - -class ParseError(Exception): - pass - - -class Parser(object): - """Parses a template expression string. Instantiate the class with - the template source and call ``parse_expression``. The ``pos`` field - will indicate the character after the expression finished and - ``parts`` will contain a list of Unicode strings, Symbols, and Calls - reflecting the concatenated portions of the expression. - - This is a terrible, ad-hoc parser implementation based on a - left-to-right scan with no lexing step to speak of; it's probably - both inefficient and incorrect. Maybe this should eventually be - replaced with a real, accepted parsing technique (PEG, parser - generator, etc.). - """ - def __init__(self, string): - self.string = string - self.pos = 0 - self.parts = [] - - # Common parsing resources. - special_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_OPEN, GROUP_CLOSE, - ARG_SEP, ESCAPE_CHAR) - special_char_re = re.compile(ur'[%s]|$' % - u''.join(re.escape(c) for c in special_chars)) - - def parse_expression(self): - """Parse a template expression starting at ``pos``. Resulting - components (Unicode strings, Symbols, and Calls) are added to - the ``parts`` field, a list. The ``pos`` field is updated to be - the next character after the expression. - """ - text_parts = [] - - while self.pos < len(self.string): - char = self.string[self.pos] - - if char not in self.special_chars: - # A non-special character. Skip to the next special - # character, treating the interstice as literal text. - next_pos = ( - self.special_char_re.search(self.string[self.pos:]).start() - + self.pos - ) - text_parts.append(self.string[self.pos:next_pos]) - self.pos = next_pos - continue - - if self.pos == len(self.string) - 1: - # The last character can never begin a structure, so we - # just interpret it as a literal character (unless it - # terminates the expression, as with , and }). - if char not in (GROUP_CLOSE, ARG_SEP): - text_parts.append(char) - self.pos += 1 - break - - next_char = self.string[self.pos + 1] - if char == ESCAPE_CHAR and next_char in \ - (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP): - # An escaped special character ($$, $}, etc.). Note that - # ${ is not an escape sequence: this is ambiguous with - # the start of a symbol and it's not necessary (just - # using { suffices in all cases). - text_parts.append(next_char) - self.pos += 2 # Skip the next character. - continue - - # Shift all characters collected so far into a single string. - if text_parts: - self.parts.append(u''.join(text_parts)) - text_parts = [] - - if char == SYMBOL_DELIM: - # Parse a symbol. - self.parse_symbol() - elif char == FUNC_DELIM: - # Parse a function call. - self.parse_call() - elif char in (GROUP_CLOSE, ARG_SEP): - # Template terminated. - break - elif char == GROUP_OPEN: - # Start of a group has no meaning hear; just pass - # through the character. - text_parts.append(char) - self.pos += 1 - else: - assert False - - # If any parsed characters remain, shift them into a string. - if text_parts: - self.parts.append(u''.join(text_parts)) - - def parse_symbol(self): - """Parse a variable reference (like ``$foo`` or ``${foo}``) - starting at ``pos``. Possibly appends a Symbol object (or, - failing that, text) to the ``parts`` field and updates ``pos``. - The character at ``pos`` must, as a precondition, be ``$``. - """ - assert self.pos < len(self.string) - assert self.string[self.pos] == SYMBOL_DELIM - - if self.pos == len(self.string) - 1: - # Last character. - self.parts.append(SYMBOL_DELIM) - self.pos += 1 - return - - next_char = self.string[self.pos + 1] - start_pos = self.pos - self.pos += 1 - - if next_char == GROUP_OPEN: - # A symbol like ${this}. - self.pos += 1 # Skip opening. - closer = self.string.find(GROUP_CLOSE, self.pos) - if closer == -1 or closer == self.pos: - # No closing brace found or identifier is empty. - self.parts.append(self.string[start_pos:self.pos]) - else: - # Closer found. - ident = self.string[self.pos:closer] - self.pos = closer + 1 - self.parts.append(Symbol(ident, - self.string[start_pos:self.pos])) - - else: - # A bare-word symbol. - ident = self._parse_ident() - if ident: - # Found a real symbol. - self.parts.append(Symbol(ident, - self.string[start_pos:self.pos])) - else: - # A standalone $. - self.parts.append(SYMBOL_DELIM) - - def parse_call(self): - """Parse a function call (like ``%foo{bar,baz}``) starting at - ``pos``. Possibly appends a Call object to ``parts`` and update - ``pos``. The character at ``pos`` must be ``%``. - """ - assert self.pos < len(self.string) - assert self.string[self.pos] == FUNC_DELIM - - start_pos = self.pos - self.pos += 1 - - ident = self._parse_ident() - if not ident: - # No function name. - self.parts.append(FUNC_DELIM) - return - - if self.pos >= len(self.string): - # Identifier terminates string. - self.parts.append(self.string[start_pos:self.pos]) - return - - if self.string[self.pos] != GROUP_OPEN: - # Argument list not opened. - self.parts.append(self.string[start_pos:self.pos]) - return - - # Skip past opening brace and try to parse an argument list. - self.pos += 1 - args = self.parse_argument_list() - if self.pos >= len(self.string) or \ - self.string[self.pos] != GROUP_CLOSE: - # Arguments unclosed. - self.parts.append(self.string[start_pos:self.pos]) - return - - self.pos += 1 # Move past closing brace. - self.parts.append(Call(ident, args, self.string[start_pos:self.pos])) - - def parse_argument_list(self): - """Parse a list of arguments starting at ``pos``, returning a - list of Expression objects. Does not modify ``parts``. Should - leave ``pos`` pointing to a } character or the end of the - string. - """ - # Try to parse a subexpression in a subparser. - expressions = [] - - while self.pos < len(self.string): - subparser = Parser(self.string[self.pos:]) - subparser.parse_expression() - - # Extract and advance past the parsed expression. - expressions.append(Expression(subparser.parts)) - self.pos += subparser.pos - - if self.pos >= len(self.string) or \ - self.string[self.pos] == GROUP_CLOSE: - # Argument list terminated by EOF or closing brace. - break - - # Only other way to terminate an expression is with ,. - # Continue to the next argument. - assert self.string[self.pos] == ARG_SEP - self.pos += 1 - - return expressions - - def _parse_ident(self): - """Parse an identifier and return it (possibly an empty string). - Updates ``pos``. - """ - remainder = self.string[self.pos:] - ident = re.match(ur'\w*', remainder).group(0) - self.pos += len(ident) - return ident - - -def _parse(template): - """Parse a top-level template string Expression. Any extraneous text - is considered literal text. - """ - parser = Parser(template) - parser.parse_expression() - - parts = parser.parts - remainder = parser.string[parser.pos:] - if remainder: - parts.append(remainder) - return Expression(parts) - - -# External interface. - -class Template(object): - """A string template, including text, Symbols, and Calls. - """ - def __init__(self, template): - self.expr = _parse(template) - self.original = template - self.compiled = self.translate() - - def __eq__(self, other): - return self.original == other.original - - def interpret(self, values={}, functions={}): - """Like `substitute`, but forces the interpreter (rather than - the compiled version) to be used. The interpreter includes - exception-handling code for missing variables and buggy template - functions but is much slower. - """ - return self.expr.evaluate(Environment(values, functions)) - - def substitute(self, values={}, functions={}): - """Evaluate the template given the values and functions. - """ - try: - res = self.compiled(values, functions) - except: # Handle any exceptions thrown by compiled version. - res = self.interpret(values, functions) - return res - - def translate(self): - """Compile the template to a Python function.""" - expressions, varnames, funcnames = self.expr.translate() - - argnames = [] - for varname in varnames: - argnames.append(VARIABLE_PREFIX.encode('utf8') + varname) - for funcname in funcnames: - argnames.append(FUNCTION_PREFIX.encode('utf8') + funcname) - - func = compile_func( - argnames, - [ast.Return(ast.List(expressions, ast.Load()))], - ) - - def wrapper_func(values={}, functions={}): - args = {} - for varname in varnames: - args[VARIABLE_PREFIX + varname] = values[varname] - for funcname in funcnames: - args[FUNCTION_PREFIX + funcname] = functions[funcname] - parts = func(**args) - return u''.join(parts) - - return wrapper_func - - -# Performance tests. - -if __name__ == '__main__': - import timeit - _tmpl = Template(u'foo $bar %baz{foozle $bar barzle} $bar') - _vars = {'bar': 'qux'} - _funcs = {'baz': unicode.upper} - interp_time = timeit.timeit('_tmpl.interpret(_vars, _funcs)', - 'from __main__ import _tmpl, _vars, _funcs', - number=10000) - print(interp_time) - comp_time = timeit.timeit('_tmpl.substitute(_vars, _funcs)', - 'from __main__ import _tmpl, _vars, _funcs', - number=10000) - print(comp_time) - print('Speedup:', interp_time / comp_time) diff --git a/lib/beets/util/pipeline.py b/lib/beets/util/pipeline.py deleted file mode 100644 index d267789c..00000000 --- a/lib/beets/util/pipeline.py +++ /dev/null @@ -1,517 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Simple but robust implementation of generator/coroutine-based -pipelines in Python. The pipelines may be run either sequentially -(single-threaded) or in parallel (one thread per pipeline stage). - -This implementation supports pipeline bubbles (indications that the -processing for a certain item should abort). To use them, yield the -BUBBLE constant from any stage coroutine except the last. - -In the parallel case, the implementation transparently handles thread -shutdown when the processing is complete and when a stage raises an -exception. KeyboardInterrupts (^C) are also handled. - -When running a parallel pipeline, it is also possible to use -multiple coroutines for the same pipeline stage; this lets you speed -up a bottleneck stage by dividing its work among multiple threads. -To do so, pass an iterable of coroutines to the Pipeline constructor -in place of any single coroutine. -""" -from __future__ import print_function - -import Queue -from threading import Thread, Lock -import sys - -BUBBLE = '__PIPELINE_BUBBLE__' -POISON = '__PIPELINE_POISON__' - -DEFAULT_QUEUE_SIZE = 16 - - -def _invalidate_queue(q, val=None, sync=True): - """Breaks a Queue such that it never blocks, always has size 1, - and has no maximum size. get()ing from the queue returns `val`, - which defaults to None. `sync` controls whether a lock is - required (because it's not reentrant!). - """ - def _qsize(len=len): - return 1 - - def _put(item): - pass - - def _get(): - return val - - if sync: - q.mutex.acquire() - - try: - q.maxsize = 0 - q._qsize = _qsize - q._put = _put - q._get = _get - q.not_empty.notifyAll() - q.not_full.notifyAll() - - finally: - if sync: - q.mutex.release() - - -class CountedQueue(Queue.Queue): - """A queue that keeps track of the number of threads that are - still feeding into it. The queue is poisoned when all threads are - finished with the queue. - """ - def __init__(self, maxsize=0): - Queue.Queue.__init__(self, maxsize) - self.nthreads = 0 - self.poisoned = False - - def acquire(self): - """Indicate that a thread will start putting into this queue. - Should not be called after the queue is already poisoned. - """ - with self.mutex: - assert not self.poisoned - assert self.nthreads >= 0 - self.nthreads += 1 - - def release(self): - """Indicate that a thread that was putting into this queue has - exited. If this is the last thread using the queue, the queue - is poisoned. - """ - with self.mutex: - self.nthreads -= 1 - assert self.nthreads >= 0 - if self.nthreads == 0: - # All threads are done adding to this queue. Poison it - # when it becomes empty. - self.poisoned = True - - # Replacement _get invalidates when no items remain. - _old_get = self._get - - def _get(): - out = _old_get() - if not self.queue: - _invalidate_queue(self, POISON, False) - return out - - if self.queue: - # Items remain. - self._get = _get - else: - # No items. Invalidate immediately. - _invalidate_queue(self, POISON, False) - - -class MultiMessage(object): - """A message yielded by a pipeline stage encapsulating multiple - values to be sent to the next stage. - """ - def __init__(self, messages): - self.messages = messages - - -def multiple(messages): - """Yield multiple([message, ..]) from a pipeline stage to send - multiple values to the next pipeline stage. - """ - return MultiMessage(messages) - - -def stage(func): - """Decorate a function to become a simple stage. - - >>> @stage - ... def add(n, i): - ... return i + n - >>> pipe = Pipeline([ - ... iter([1, 2, 3]), - ... add(2), - ... ]) - >>> list(pipe.pull()) - [3, 4, 5] - """ - - def coro(*args): - task = None - while True: - task = yield task - task = func(*(args + (task,))) - return coro - - -def mutator_stage(func): - """Decorate a function that manipulates items in a coroutine to - become a simple stage. - - >>> @mutator_stage - ... def setkey(key, item): - ... item[key] = True - >>> pipe = Pipeline([ - ... iter([{'x': False}, {'a': False}]), - ... setkey('x'), - ... ]) - >>> list(pipe.pull()) - [{'x': True}, {'a': False, 'x': True}] - """ - - def coro(*args): - task = None - while True: - task = yield task - func(*(args + (task,))) - return coro - - -def _allmsgs(obj): - """Returns a list of all the messages encapsulated in obj. If obj - is a MultiMessage, returns its enclosed messages. If obj is BUBBLE, - returns an empty list. Otherwise, returns a list containing obj. - """ - if isinstance(obj, MultiMessage): - return obj.messages - elif obj == BUBBLE: - return [] - else: - return [obj] - - -class PipelineThread(Thread): - """Abstract base class for pipeline-stage threads.""" - def __init__(self, all_threads): - super(PipelineThread, self).__init__() - self.abort_lock = Lock() - self.abort_flag = False - self.all_threads = all_threads - self.exc_info = None - - def abort(self): - """Shut down the thread at the next chance possible. - """ - with self.abort_lock: - self.abort_flag = True - - # Ensure that we are not blocking on a queue read or write. - if hasattr(self, 'in_queue'): - _invalidate_queue(self.in_queue, POISON) - if hasattr(self, 'out_queue'): - _invalidate_queue(self.out_queue, POISON) - - def abort_all(self, exc_info): - """Abort all other threads in the system for an exception. - """ - self.exc_info = exc_info - for thread in self.all_threads: - thread.abort() - - -class FirstPipelineThread(PipelineThread): - """The thread running the first stage in a parallel pipeline setup. - The coroutine should just be a generator. - """ - def __init__(self, coro, out_queue, all_threads): - super(FirstPipelineThread, self).__init__(all_threads) - self.coro = coro - self.out_queue = out_queue - self.out_queue.acquire() - - self.abort_lock = Lock() - self.abort_flag = False - - def run(self): - try: - while True: - with self.abort_lock: - if self.abort_flag: - return - - # Get the value from the generator. - try: - msg = self.coro.next() - except StopIteration: - break - - # Send messages to the next stage. - for msg in _allmsgs(msg): - with self.abort_lock: - if self.abort_flag: - return - self.out_queue.put(msg) - - except: - self.abort_all(sys.exc_info()) - return - - # Generator finished; shut down the pipeline. - self.out_queue.release() - - -class MiddlePipelineThread(PipelineThread): - """A thread running any stage in the pipeline except the first or - last. - """ - def __init__(self, coro, in_queue, out_queue, all_threads): - super(MiddlePipelineThread, self).__init__(all_threads) - self.coro = coro - self.in_queue = in_queue - self.out_queue = out_queue - self.out_queue.acquire() - - def run(self): - try: - # Prime the coroutine. - self.coro.next() - - while True: - with self.abort_lock: - if self.abort_flag: - return - - # Get the message from the previous stage. - msg = self.in_queue.get() - if msg is POISON: - break - - with self.abort_lock: - if self.abort_flag: - return - - # Invoke the current stage. - out = self.coro.send(msg) - - # Send messages to next stage. - for msg in _allmsgs(out): - with self.abort_lock: - if self.abort_flag: - return - self.out_queue.put(msg) - - except: - self.abort_all(sys.exc_info()) - return - - # Pipeline is shutting down normally. - self.out_queue.release() - - -class LastPipelineThread(PipelineThread): - """A thread running the last stage in a pipeline. The coroutine - should yield nothing. - """ - def __init__(self, coro, in_queue, all_threads): - super(LastPipelineThread, self).__init__(all_threads) - self.coro = coro - self.in_queue = in_queue - - def run(self): - # Prime the coroutine. - self.coro.next() - - try: - while True: - with self.abort_lock: - if self.abort_flag: - return - - # Get the message from the previous stage. - msg = self.in_queue.get() - if msg is POISON: - break - - with self.abort_lock: - if self.abort_flag: - return - - # Send to consumer. - self.coro.send(msg) - - except: - self.abort_all(sys.exc_info()) - return - - -class Pipeline(object): - """Represents a staged pattern of work. Each stage in the pipeline - is a coroutine that receives messages from the previous stage and - yields messages to be sent to the next stage. - """ - def __init__(self, stages): - """Makes a new pipeline from a list of coroutines. There must - be at least two stages. - """ - if len(stages) < 2: - raise ValueError('pipeline must have at least two stages') - self.stages = [] - for stage in stages: - if isinstance(stage, (list, tuple)): - self.stages.append(stage) - else: - # Default to one thread per stage. - self.stages.append((stage,)) - - def run_sequential(self): - """Run the pipeline sequentially in the current thread. The - stages are run one after the other. Only the first coroutine - in each stage is used. - """ - list(self.pull()) - - def run_parallel(self, queue_size=DEFAULT_QUEUE_SIZE): - """Run the pipeline in parallel using one thread per stage. The - messages between the stages are stored in queues of the given - size. - """ - queue_count = len(self.stages) - 1 - queues = [CountedQueue(queue_size) for i in range(queue_count)] - threads = [] - - # Set up first stage. - for coro in self.stages[0]: - threads.append(FirstPipelineThread(coro, queues[0], threads)) - - # Middle stages. - for i in range(1, queue_count): - for coro in self.stages[i]: - threads.append(MiddlePipelineThread( - coro, queues[i - 1], queues[i], threads - )) - - # Last stage. - for coro in self.stages[-1]: - threads.append( - LastPipelineThread(coro, queues[-1], threads) - ) - - # Start threads. - for thread in threads: - thread.start() - - # Wait for termination. The final thread lasts the longest. - try: - # Using a timeout allows us to receive KeyboardInterrupt - # exceptions during the join(). - while threads[-1].isAlive(): - threads[-1].join(1) - - except: - # Stop all the threads immediately. - for thread in threads: - thread.abort() - raise - - finally: - # Make completely sure that all the threads have finished - # before we return. They should already be either finished, - # in normal operation, or aborted, in case of an exception. - for thread in threads[:-1]: - thread.join() - - for thread in threads: - exc_info = thread.exc_info - if exc_info: - # Make the exception appear as it was raised originally. - raise exc_info[0], exc_info[1], exc_info[2] - - def pull(self): - """Yield elements from the end of the pipeline. Runs the stages - sequentially until the last yields some messages. Each of the messages - is then yielded by ``pulled.next()``. If the pipeline has a consumer, - that is the last stage does not yield any messages, then pull will not - yield any messages. Only the first coroutine in each stage is used - """ - coros = [stage[0] for stage in self.stages] - - # "Prime" the coroutines. - for coro in coros[1:]: - coro.next() - - # Begin the pipeline. - for out in coros[0]: - msgs = _allmsgs(out) - for coro in coros[1:]: - next_msgs = [] - for msg in msgs: - out = coro.send(msg) - next_msgs.extend(_allmsgs(out)) - msgs = next_msgs - for msg in msgs: - yield msg - -# Smoke test. -if __name__ == '__main__': - import time - - # Test a normally-terminating pipeline both in sequence and - # in parallel. - def produce(): - for i in range(5): - print('generating %i' % i) - time.sleep(1) - yield i - - def work(): - num = yield - while True: - print('processing %i' % num) - time.sleep(2) - num = yield num * 2 - - def consume(): - while True: - num = yield - time.sleep(1) - print('received %i' % num) - - ts_start = time.time() - Pipeline([produce(), work(), consume()]).run_sequential() - ts_seq = time.time() - Pipeline([produce(), work(), consume()]).run_parallel() - ts_par = time.time() - Pipeline([produce(), (work(), work()), consume()]).run_parallel() - ts_end = time.time() - print('Sequential time:', ts_seq - ts_start) - print('Parallel time:', ts_par - ts_seq) - print('Multiply-parallel time:', ts_end - ts_par) - print() - - # Test a pipeline that raises an exception. - def exc_produce(): - for i in range(10): - print('generating %i' % i) - time.sleep(1) - yield i - - def exc_work(): - num = yield - while True: - print('processing %i' % num) - time.sleep(3) - if num == 3: - raise Exception() - num = yield num * 2 - - def exc_consume(): - while True: - num = yield - print('received %i' % num) - - Pipeline([exc_produce(), exc_work(), exc_consume()]).run_parallel(1) diff --git a/lib/beets/vfs.py b/lib/beets/vfs.py deleted file mode 100644 index e940e21f..00000000 --- a/lib/beets/vfs.py +++ /dev/null @@ -1,50 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""A simple utility for constructing filesystem-like trees from beets -libraries. -""" -from collections import namedtuple -from beets import util - -Node = namedtuple('Node', ['files', 'dirs']) - - -def _insert(node, path, itemid): - """Insert an item into a virtual filesystem node.""" - if len(path) == 1: - # Last component. Insert file. - node.files[path[0]] = itemid - else: - # In a directory. - dirname = path[0] - rest = path[1:] - if dirname not in node.dirs: - node.dirs[dirname] = Node({}, {}) - _insert(node.dirs[dirname], rest, itemid) - - -def libtree(lib): - """Generates a filesystem-like directory tree for the files - contained in `lib`. Filesystem nodes are (files, dirs) named - tuples in which both components are dictionaries. The first - maps filenames to Item ids. The second maps directory names to - child node tuples. - """ - root = Node({}, {}) - for item in lib.items(): - dest = item.destination(fragment=True) - parts = util.components(dest) - _insert(root, parts, item.id) - return root diff --git a/lib/beetsplug/__init__.py b/lib/beetsplug/__init__.py deleted file mode 100644 index 98a7ffd5..00000000 --- a/lib/beetsplug/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# This file is part of beets. -# Copyright 2013, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""A namespace package for beets plugins.""" - -# Make this a namespace package. -from pkgutil import extend_path -__path__ = extend_path(__path__, __name__) diff --git a/lib/beetsplug/embedart.py b/lib/beetsplug/embedart.py deleted file mode 100644 index 49ed4792..00000000 --- a/lib/beetsplug/embedart.py +++ /dev/null @@ -1,280 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Allows beets to embed album art into file metadata.""" -import os.path -import logging -import imghdr -import subprocess -import platform -from tempfile import NamedTemporaryFile - -from beets.plugins import BeetsPlugin -from beets import mediafile -from beets import ui -from beets.ui import decargs -from beets.util import syspath, normpath, displayable_path -from beets.util.artresizer import ArtResizer -from beets import config - - -log = logging.getLogger('beets') - - -class EmbedCoverArtPlugin(BeetsPlugin): - """Allows albumart to be embedded into the actual files. - """ - def __init__(self): - super(EmbedCoverArtPlugin, self).__init__() - self.config.add({ - 'maxwidth': 0, - 'auto': True, - 'compare_threshold': 0, - 'ifempty': False, - }) - - if self.config['maxwidth'].get(int) and not ArtResizer.shared.local: - self.config['maxwidth'] = 0 - log.warn(u"embedart: ImageMagick or PIL not found; " - u"'maxwidth' option ignored") - if self.config['compare_threshold'].get(int) and not \ - ArtResizer.shared.can_compare: - self.config['compare_threshold'] = 0 - log.warn(u"embedart: ImageMagick 6.8.7 or higher not installed; " - u"'compare_threshold' option ignored") - - def commands(self): - # Embed command. - embed_cmd = ui.Subcommand( - 'embedart', help='embed image files into file metadata' - ) - embed_cmd.parser.add_option( - '-f', '--file', metavar='PATH', help='the image file to embed' - ) - maxwidth = config['embedart']['maxwidth'].get(int) - compare_threshold = config['embedart']['compare_threshold'].get(int) - ifempty = config['embedart']['ifempty'].get(bool) - - def embed_func(lib, opts, args): - if opts.file: - imagepath = normpath(opts.file) - for item in lib.items(decargs(args)): - embed_item(item, imagepath, maxwidth, None, - compare_threshold, ifempty) - else: - for album in lib.albums(decargs(args)): - embed_album(album, maxwidth) - - embed_cmd.func = embed_func - - # Extract command. - extract_cmd = ui.Subcommand('extractart', - help='extract an image from file metadata') - extract_cmd.parser.add_option('-o', dest='outpath', - help='image output file') - - def extract_func(lib, opts, args): - outpath = normpath(opts.outpath or 'cover') - item = lib.items(decargs(args)).get() - extract(outpath, item) - extract_cmd.func = extract_func - - # Clear command. - clear_cmd = ui.Subcommand('clearart', - help='remove images from file metadata') - - def clear_func(lib, opts, args): - clear(lib, decargs(args)) - clear_cmd.func = clear_func - - return [embed_cmd, extract_cmd, clear_cmd] - - -@EmbedCoverArtPlugin.listen('album_imported') -def album_imported(lib, album): - """Automatically embed art into imported albums. - """ - if album.artpath and config['embedart']['auto']: - embed_album(album, config['embedart']['maxwidth'].get(int), True) - - -def embed_item(item, imagepath, maxwidth=None, itempath=None, - compare_threshold=0, ifempty=False, as_album=False): - """Embed an image into the item's media file. - """ - if compare_threshold: - if not check_art_similarity(item, imagepath, compare_threshold): - log.warn(u'Image not similar; skipping.') - return - if ifempty: - art = get_art(item) - if not art: - pass - else: - log.debug(u'embedart: media file contained art already {0}'.format( - displayable_path(imagepath) - )) - return - if maxwidth and not as_album: - imagepath = resize_image(imagepath, maxwidth) - - try: - log.debug(u'embedart: embedding {0}'.format( - displayable_path(imagepath) - )) - item['images'] = [_mediafile_image(imagepath, maxwidth)] - except IOError as exc: - log.error(u'embedart: could not read image file: {0}'.format(exc)) - else: - # We don't want to store the image in the database. - item.try_write(itempath) - del item['images'] - - -def embed_album(album, maxwidth=None, quiet=False): - """Embed album art into all of the album's items. - """ - imagepath = album.artpath - if not imagepath: - log.info(u'No album art present: {0} - {1}'. - format(album.albumartist, album.album)) - return - if not os.path.isfile(syspath(imagepath)): - log.error(u'Album art not found at {0}' - .format(displayable_path(imagepath))) - return - if maxwidth: - imagepath = resize_image(imagepath, maxwidth) - - log.log( - logging.DEBUG if quiet else logging.INFO, - u'Embedding album art into {0.albumartist} - {0.album}.'.format(album), - ) - - for item in album.items(): - embed_item(item, imagepath, maxwidth, None, - config['embedart']['compare_threshold'].get(int), - config['embedart']['ifempty'].get(bool), as_album=True) - - -def resize_image(imagepath, maxwidth): - """Returns path to an image resized to maxwidth. - """ - log.info(u'Resizing album art to {0} pixels wide' - .format(maxwidth)) - imagepath = ArtResizer.shared.resize(maxwidth, syspath(imagepath)) - return imagepath - - -def check_art_similarity(item, imagepath, compare_threshold): - """A boolean indicating if an image is similar to embedded item art. - """ - with NamedTemporaryFile(delete=True) as f: - art = extract(f.name, item) - - if art: - # Converting images to grayscale tends to minimize the weight - # of colors in the diff score - cmd = 'convert {0} {1} -colorspace gray MIFF:- | ' \ - 'compare -metric PHASH - null:'.format(syspath(imagepath), - syspath(art)) - - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=platform.system() != 'Windows', - shell=True) - stdout, stderr = proc.communicate() - if proc.returncode: - if proc.returncode != 1: - log.warn(u'embedart: IM phashes compare failed for {0}, \ - {1}'.format(displayable_path(imagepath), - displayable_path(art))) - return - phashDiff = float(stderr) - else: - phashDiff = float(stdout) - - log.info(u'embedart: compare PHASH score is {0}'.format(phashDiff)) - if phashDiff > compare_threshold: - return False - - return True - - -def _mediafile_image(image_path, maxwidth=None): - """Return a `mediafile.Image` object for the path. - """ - - with open(syspath(image_path), 'rb') as f: - data = f.read() - return mediafile.Image(data, type=mediafile.ImageType.front) - - -def get_art(item): - # Extract the art. - try: - mf = mediafile.MediaFile(syspath(item.path)) - except mediafile.UnreadableFileError as exc: - log.error(u'Could not extract art from {0}: {1}'.format( - displayable_path(item.path), exc - )) - return - - return mf.art - -# 'extractart' command. - - -def extract(outpath, item): - if not item: - log.error(u'No item matches query.') - return - - art = get_art(item) - - if not art: - log.error(u'No album art present in {0} - {1}.' - .format(item.artist, item.title)) - return - - # Add an extension to the filename. - ext = imghdr.what(None, h=art) - if not ext: - log.error(u'Unknown image type.') - return - outpath += '.' + ext - - log.info(u'Extracting album art from: {0.artist} - {0.title} ' - u'to: {1}'.format(item, displayable_path(outpath))) - with open(syspath(outpath), 'wb') as f: - f.write(art) - return outpath - - -# 'clearart' command. - -def clear(lib, query): - log.info(u'Clearing album art from items:') - for item in lib.items(query): - log.info(u'{0} - {1}'.format(item.artist, item.title)) - try: - mf = mediafile.MediaFile(syspath(item.path), - config['id3v23'].get(bool)) - except mediafile.UnreadableFileError as exc: - log.error(u'Could not clear art from {0}: {1}'.format( - displayable_path(item.path), exc - )) - continue - del mf.art - mf.save() diff --git a/lib/beetsplug/fetchart.py b/lib/beetsplug/fetchart.py deleted file mode 100644 index b2a4620b..00000000 --- a/lib/beetsplug/fetchart.py +++ /dev/null @@ -1,396 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Fetches album art. -""" -from contextlib import closing -import logging -import os -import re -from tempfile import NamedTemporaryFile - -import requests - -from beets import plugins -from beets import importer -from beets import ui -from beets import util -from beets import config -from beets.util.artresizer import ArtResizer - -try: - import itunes - HAVE_ITUNES = True -except ImportError: - HAVE_ITUNES = False - -IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg'] -CONTENT_TYPES = ('image/jpeg',) -DOWNLOAD_EXTENSION = '.jpg' - -log = logging.getLogger('beets') - -requests_session = requests.Session() -requests_session.headers = {'User-Agent': 'beets'} - - -def _fetch_image(url): - """Downloads an image from a URL and checks whether it seems to - actually be an image. If so, returns a path to the downloaded image. - Otherwise, returns None. - """ - log.debug(u'fetchart: downloading art: {0}'.format(url)) - try: - with closing(requests_session.get(url, stream=True)) as resp: - if 'Content-Type' not in resp.headers \ - or resp.headers['Content-Type'] not in CONTENT_TYPES: - log.debug(u'fetchart: not an image') - return - - # Generate a temporary file with the correct extension. - with NamedTemporaryFile(suffix=DOWNLOAD_EXTENSION, delete=False) \ - as fh: - for chunk in resp.iter_content(): - fh.write(chunk) - log.debug(u'fetchart: downloaded art to: {0}'.format( - util.displayable_path(fh.name) - )) - return fh.name - except (IOError, requests.RequestException): - log.debug(u'fetchart: error fetching art') - - -# ART SOURCES ################################################################ - -# Cover Art Archive. - -CAA_URL = 'http://coverartarchive.org/release/{mbid}/front-500.jpg' -CAA_GROUP_URL = 'http://coverartarchive.org/release-group/{mbid}/front-500.jpg' - - -def caa_art(album): - """Return the Cover Art Archive and Cover Art Archive release group URLs - using album MusicBrainz release ID and release group ID. - """ - if album.mb_albumid: - yield CAA_URL.format(mbid=album.mb_albumid) - if album.mb_releasegroupid: - yield CAA_GROUP_URL.format(mbid=album.mb_releasegroupid) - - -# Art from Amazon. - -AMAZON_URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg' -AMAZON_INDICES = (1, 2) - - -def art_for_asin(album): - """Generate URLs using Amazon ID (ASIN) string. - """ - if album.asin: - for index in AMAZON_INDICES: - yield AMAZON_URL % (album.asin, index) - - -# AlbumArt.org scraper. - -AAO_URL = 'http://www.albumart.org/index_detail.php' -AAO_PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"' - - -def aao_art(album): - """Return art URL from AlbumArt.org using album ASIN. - """ - if not album.asin: - return - # Get the page from albumart.org. - try: - resp = requests_session.get(AAO_URL, params={'asin': album.asin}) - log.debug(u'fetchart: scraped art URL: {0}'.format(resp.url)) - except requests.RequestException: - log.debug(u'fetchart: error scraping art page') - return - - # Search the page for the image URL. - m = re.search(AAO_PAT, resp.text) - if m: - image_url = m.group(1) - yield image_url - else: - log.debug(u'fetchart: no image found on page') - - -# Google Images scraper. - -GOOGLE_URL = 'https://ajax.googleapis.com/ajax/services/search/images' - - -def google_art(album): - """Return art URL from google.org given an album title and - interpreter. - """ - if not (album.albumartist and album.album): - return - search_string = (album.albumartist + ',' + album.album).encode('utf-8') - response = requests_session.get(GOOGLE_URL, params={ - 'v': '1.0', - 'q': search_string, - 'start': '0', - }) - - # Get results using JSON. - try: - results = response.json() - data = results['responseData'] - dataInfo = data['results'] - for myUrl in dataInfo: - yield myUrl['unescapedUrl'] - except: - log.debug(u'fetchart: error scraping art page') - return - - -# Art from the iTunes Store. - -def itunes_art(album): - """Return art URL from iTunes Store given an album title. - """ - search_string = (album.albumartist + ' ' + album.album).encode('utf-8') - try: - # Isolate bugs in the iTunes library while searching. - try: - itunes_album = itunes.search_album(search_string)[0] - except Exception as exc: - log.debug('fetchart: iTunes search failed: {0}'.format(exc)) - return - - if itunes_album.get_artwork()['100']: - small_url = itunes_album.get_artwork()['100'] - big_url = small_url.replace('100x100', '1200x1200') - yield big_url - else: - log.debug(u'fetchart: album has no artwork in iTunes Store') - except IndexError: - log.debug(u'fetchart: album not found in iTunes Store') - - -# Art from the filesystem. - - -def filename_priority(filename, cover_names): - """Sort order for image names. - - Return indexes of cover names found in the image filename. This - means that images with lower-numbered and more keywords will have higher - priority. - """ - return [idx for (idx, x) in enumerate(cover_names) if x in filename] - - -def art_in_path(path, cover_names, cautious): - """Look for album art files in a specified directory. - """ - if not os.path.isdir(path): - return - - # Find all files that look like images in the directory. - images = [] - for fn in os.listdir(path): - for ext in IMAGE_EXTENSIONS: - if fn.lower().endswith('.' + ext): - images.append(fn) - - # Look for "preferred" filenames. - images = sorted(images, key=lambda x: filename_priority(x, cover_names)) - cover_pat = r"(\b|_)({0})(\b|_)".format('|'.join(cover_names)) - for fn in images: - if re.search(cover_pat, os.path.splitext(fn)[0], re.I): - log.debug(u'fetchart: using well-named art file {0}'.format( - util.displayable_path(fn) - )) - return os.path.join(path, fn) - - # Fall back to any image in the folder. - if images and not cautious: - log.debug(u'fetchart: using fallback art file {0}'.format( - util.displayable_path(images[0]) - )) - return os.path.join(path, images[0]) - - -# Try each source in turn. - -SOURCES_ALL = [u'coverart', u'itunes', u'amazon', u'albumart', u'google'] - -ART_FUNCS = { - u'coverart': caa_art, - u'itunes': itunes_art, - u'albumart': aao_art, - u'amazon': art_for_asin, - u'google': google_art, -} - - -def _source_urls(album, sources=SOURCES_ALL): - """Generate possible source URLs for an album's art. The URLs are - not guaranteed to work so they each need to be attempted in turn. - This allows the main `art_for_album` function to abort iteration - through this sequence early to avoid the cost of scraping when not - necessary. - """ - for s in sources: - urls = ART_FUNCS[s](album) - for url in urls: - yield url - - -def art_for_album(album, paths, maxwidth=None, local_only=False): - """Given an Album object, returns a path to downloaded art for the - album (or None if no art is found). If `maxwidth`, then images are - resized to this maximum pixel size. If `local_only`, then only local - image files from the filesystem are returned; no network requests - are made. - """ - out = None - - # Local art. - cover_names = config['fetchart']['cover_names'].as_str_seq() - cover_names = map(util.bytestring_path, cover_names) - cautious = config['fetchart']['cautious'].get(bool) - if paths: - for path in paths: - out = art_in_path(path, cover_names, cautious) - if out: - break - - # Web art sources. - remote_priority = config['fetchart']['remote_priority'].get(bool) - if not local_only and (remote_priority or not out): - for url in _source_urls(album, - config['fetchart']['sources'].as_str_seq()): - if maxwidth: - url = ArtResizer.shared.proxy_url(maxwidth, url) - candidate = _fetch_image(url) - if candidate: - out = candidate - break - - if maxwidth and out: - out = ArtResizer.shared.resize(maxwidth, out) - return out - - -# PLUGIN LOGIC ############################################################### - - -def batch_fetch_art(lib, albums, force, maxwidth=None): - """Fetch album art for each of the albums. This implements the manual - fetchart CLI command. - """ - for album in albums: - if album.artpath and not force: - message = 'has album art' - else: - # In ordinary invocations, look for images on the - # filesystem. When forcing, however, always go to the Web - # sources. - local_paths = None if force else [album.path] - - path = art_for_album(album, local_paths, maxwidth) - if path: - album.set_art(path, False) - album.store() - message = ui.colorize('green', 'found album art') - else: - message = ui.colorize('red', 'no art found') - - log.info(u'{0} - {1}: {2}'.format(album.albumartist, album.album, - message)) - - -class FetchArtPlugin(plugins.BeetsPlugin): - def __init__(self): - super(FetchArtPlugin, self).__init__() - - self.config.add({ - 'auto': True, - 'maxwidth': 0, - 'remote_priority': False, - 'cautious': False, - 'google_search': False, - 'cover_names': ['cover', 'front', 'art', 'album', 'folder'], - 'sources': SOURCES_ALL, - }) - - # Holds paths to downloaded images between fetching them and - # placing them in the filesystem. - self.art_paths = {} - - self.maxwidth = self.config['maxwidth'].get(int) - if self.config['auto']: - # Enable two import hooks when fetching is enabled. - self.import_stages = [self.fetch_art] - self.register_listener('import_task_files', self.assign_art) - - available_sources = list(SOURCES_ALL) - if not HAVE_ITUNES and u'itunes' in available_sources: - available_sources.remove(u'itunes') - self.config['sources'] = plugins.sanitize_choices( - self.config['sources'].as_str_seq(), available_sources) - - # Asynchronous; after music is added to the library. - def fetch_art(self, session, task): - """Find art for the album being imported.""" - if task.is_album: # Only fetch art for full albums. - if task.choice_flag == importer.action.ASIS: - # For as-is imports, don't search Web sources for art. - local = True - elif task.choice_flag == importer.action.APPLY: - # Search everywhere for art. - local = False - else: - # For any other choices (e.g., TRACKS), do nothing. - return - - path = art_for_album(task.album, task.paths, self.maxwidth, local) - - if path: - self.art_paths[task] = path - - # Synchronous; after music files are put in place. - def assign_art(self, session, task): - """Place the discovered art in the filesystem.""" - if task in self.art_paths: - path = self.art_paths.pop(task) - - album = task.album - src_removed = (config['import']['delete'].get(bool) or - config['import']['move'].get(bool)) - album.set_art(path, not src_removed) - album.store() - if src_removed: - task.prune(path) - - # Manual album art fetching. - def commands(self): - cmd = ui.Subcommand('fetchart', help='download album art') - cmd.parser.add_option('-f', '--force', dest='force', - action='store_true', default=False, - help='re-download art when already present') - - def func(lib, opts, args): - batch_fetch_art(lib, lib.albums(ui.decargs(args)), opts.force, - self.maxwidth) - cmd.func = func - return [cmd] diff --git a/lib/beetsplug/lyrics.py b/lib/beetsplug/lyrics.py deleted file mode 100644 index a2ebe7c3..00000000 --- a/lib/beetsplug/lyrics.py +++ /dev/null @@ -1,544 +0,0 @@ -# This file is part of beets. -# Copyright 2014, Adrian Sampson. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. - -"""Fetches, embeds, and displays lyrics. -""" -from __future__ import print_function - -import re -import logging -import requests -import json -import unicodedata -import urllib -import difflib -import itertools -from HTMLParser import HTMLParseError - -from beets import plugins -from beets import config, ui - - -# Global logger. - -log = logging.getLogger('beets') - -DIV_RE = re.compile(r'<(/?)div>?', re.I) -COMMENT_RE = re.compile(r'', re.S) -TAG_RE = re.compile(r'<[^>]*>') -BREAK_RE = re.compile(r'\n?\s*]*)*>\s*\n?', re.I) -URL_CHARACTERS = { - u'\u2018': u"'", - u'\u2019': u"'", - u'\u201c': u'"', - u'\u201d': u'"', - u'\u2010': u'-', - u'\u2011': u'-', - u'\u2012': u'-', - u'\u2013': u'-', - u'\u2014': u'-', - u'\u2015': u'-', - u'\u2016': u'-', - u'\u2026': u'...', -} - - -# Utilities. - -def fetch_url(url): - """Retrieve the content at a given URL, or return None if the source - is unreachable. - """ - try: - r = requests.get(url, verify=False) - except requests.RequestException as exc: - log.debug(u'lyrics request failed: {0}'.format(exc)) - return - if r.status_code == requests.codes.ok: - return r.text - else: - log.debug(u'failed to fetch: {0} ({1})'.format(url, r.status_code)) - - -def unescape(text): - """Resolves &#xxx; HTML entities (and some others).""" - if isinstance(text, str): - text = text.decode('utf8', 'ignore') - out = text.replace(u' ', u' ') - - def replchar(m): - num = m.group(1) - return unichr(int(num)) - out = re.sub(u"&#(\d+);", replchar, out) - return out - - -def extract_text_between(html, start_marker, end_marker): - try: - _, html = html.split(start_marker, 1) - html, _ = html.split(end_marker, 1) - except ValueError: - return u'' - return html - - -def extract_text_in(html, starttag): - """Extract the text from a
tag in the HTML starting with - ``starttag``. Returns None if parsing fails. - """ - - # Strip off the leading text before opening tag. - try: - _, html = html.split(starttag, 1) - except ValueError: - return - - # Walk through balanced DIV tags. - level = 0 - parts = [] - pos = 0 - for match in DIV_RE.finditer(html): - if match.group(1): # Closing tag. - level -= 1 - if level == 0: - pos = match.end() - else: # Opening tag. - if level == 0: - parts.append(html[pos:match.start()]) - level += 1 - - if level == -1: - parts.append(html[pos:match.start()]) - break - else: - print('no closing tag found!') - return - return u''.join(parts) - - -def search_pairs(item): - """Yield a pairs of artists and titles to search for. - - The first item in the pair is the name of the artist, the second - item is a list of song names. - - In addition to the artist and title obtained from the `item` the - method tries to strip extra information like paranthesized suffixes - and featured artists from the strings and add them as candidates. - The method also tries to split multiple titles separated with `/`. - """ - - title, artist = item.title, item.artist - titles = [title] - artists = [artist] - - # Remove any featuring artists from the artists name - pattern = r"(.*?) {0}".format(plugins.feat_tokens()) - match = re.search(pattern, artist, re.IGNORECASE) - if match: - artists.append(match.group(1)) - - # Remove a parenthesized suffix from a title string. Common - # examples include (live), (remix), and (acoustic). - pattern = r"(.+?)\s+[(].*[)]$" - match = re.search(pattern, title, re.IGNORECASE) - if match: - titles.append(match.group(1)) - - # Remove any featuring artists from the title - pattern = r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)) - for title in titles[:]: - match = re.search(pattern, title, re.IGNORECASE) - if match: - titles.append(match.group(1)) - - # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) - # and each of them. - multi_titles = [] - for title in titles: - multi_titles.append([title]) - if '/' in title: - multi_titles.append([x.strip() for x in title.split('/')]) - - return itertools.product(artists, multi_titles) - - -def _encode(s): - """Encode the string for inclusion in a URL (common to both - LyricsWiki and Lyrics.com). - """ - if isinstance(s, unicode): - for char, repl in URL_CHARACTERS.items(): - s = s.replace(char, repl) - s = s.encode('utf8', 'ignore') - return urllib.quote(s) - -# Musixmatch - -MUSIXMATCH_URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s' - - -def fetch_musixmatch(artist, title): - url = MUSIXMATCH_URL_PATTERN % (_lw_encode(artist.title()), - _lw_encode(title.title())) - html = fetch_url(url) - if not html: - return - lyrics = extract_text_between(html, '"lyrics_body":', '"lyrics_language":') - return lyrics.strip(',"').replace('\\n', '\n') - -# LyricsWiki. - -LYRICSWIKI_URL_PATTERN = 'http://lyrics.wikia.com/%s:%s' - - -def _lw_encode(s): - s = re.sub(r'\s+', '_', s) - s = s.replace("<", "Less_Than") - s = s.replace(">", "Greater_Than") - s = s.replace("#", "Number_") - s = re.sub(r'[\[\{]', '(', s) - s = re.sub(r'[\]\}]', ')', s) - return _encode(s) - - -def fetch_lyricswiki(artist, title): - """Fetch lyrics from LyricsWiki.""" - url = LYRICSWIKI_URL_PATTERN % (_lw_encode(artist), _lw_encode(title)) - html = fetch_url(url) - if not html: - return - - lyrics = extract_text_in(html, u"
") - if lyrics and 'Unfortunately, we are not licensed' not in lyrics: - return lyrics - - -# Lyrics.com. - -LYRICSCOM_URL_PATTERN = 'http://www.lyrics.com/%s-lyrics-%s.html' -LYRICSCOM_NOT_FOUND = ( - 'Sorry, we do not have the lyric', - 'Submit Lyrics', -) - - -def _lc_encode(s): - s = re.sub(r'[^\w\s-]', '', s) - s = re.sub(r'\s+', '-', s) - return _encode(s).lower() - - -def fetch_lyricscom(artist, title): - """Fetch lyrics from Lyrics.com.""" - url = LYRICSCOM_URL_PATTERN % (_lc_encode(title), _lc_encode(artist)) - html = fetch_url(url) - if not html: - return - lyrics = extract_text_between(html, '
', '
') - if not lyrics: - return - for not_found_str in LYRICSCOM_NOT_FOUND: - if not_found_str in lyrics: - return - - parts = lyrics.split('\n---\nLyrics powered by', 1) - if parts: - return parts[0] - - -# Optional Google custom search API backend. - -def slugify(text): - """Normalize a string and remove non-alphanumeric characters. - """ - text = re.sub(r"[-'_\s]", '_', text) - text = re.sub(r"_+", '_', text).strip('_') - pat = "([^,\(]*)\((.*?)\)" # Remove content within parentheses - text = re.sub(pat, '\g<1>', text).strip() - try: - text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore') - text = unicode(re.sub('[-\s]+', ' ', text)) - except UnicodeDecodeError: - log.exception(u"Failing to normalize '{0}'".format(text)) - return text - - -BY_TRANS = ['by', 'par', 'de', 'von'] -LYRICS_TRANS = ['lyrics', 'paroles', 'letras', 'liedtexte'] - - -def is_page_candidate(urlLink, urlTitle, title, artist): - """Return True if the URL title makes it a good candidate to be a - page that contains lyrics of title by artist. - """ - title = slugify(title.lower()) - artist = slugify(artist.lower()) - sitename = re.search(u"//([^/]+)/.*", slugify(urlLink.lower())).group(1) - urlTitle = slugify(urlTitle.lower()) - # Check if URL title contains song title (exact match) - if urlTitle.find(title) != -1: - return True - # or try extracting song title from URL title and check if - # they are close enough - tokens = [by + '_' + artist for by in BY_TRANS] + \ - [artist, sitename, sitename.replace('www.', '')] + LYRICS_TRANS - songTitle = re.sub(u'(%s)' % u'|'.join(tokens), u'', urlTitle) - songTitle = songTitle.strip('_|') - typoRatio = .9 - return difflib.SequenceMatcher(None, songTitle, title).ratio() >= typoRatio - - -def remove_credits(text): - """Remove first/last line of text if it contains the word 'lyrics' - eg 'Lyrics by songsdatabase.com' - """ - textlines = text.split('\n') - credits = None - for i in (0, -1): - if textlines and 'lyrics' in textlines[i].lower(): - credits = textlines.pop(i) - if credits: - text = '\n'.join(textlines) - return text - - -def is_lyrics(text, artist=None): - """Determine whether the text seems to be valid lyrics. - """ - if not text: - return False - badTriggersOcc = [] - nbLines = text.count('\n') - if nbLines <= 1: - log.debug(u"Ignoring too short lyrics '{0}'".format(text)) - return False - elif nbLines < 5: - badTriggersOcc.append('too_short') - else: - # Lyrics look legit, remove credits to avoid being penalized further - # down - text = remove_credits(text) - - badTriggers = ['lyrics', 'copyright', 'property', 'links'] - if artist: - badTriggersOcc += [artist] - - for item in badTriggers: - badTriggersOcc += [item] * len(re.findall(r'\W%s\W' % item, - text, re.I)) - - if badTriggersOcc: - log.debug(u'Bad triggers detected: {0}'.format(badTriggersOcc)) - return len(badTriggersOcc) < 2 - - -def _scrape_strip_cruft(html, plain_text_out=False): - """Clean up HTML - """ - html = unescape(html) - - html = html.replace('\r', '\n') # Normalize EOL. - html = re.sub(r' +', ' ', html) # Whitespaces collapse. - html = BREAK_RE.sub('\n', html) #
eats up surrounding '\n'. - html = re.sub(r'<(script).*?(?s)', '', html) # Strip script tags. - - if plain_text_out: # Strip remaining HTML tags - html = COMMENT_RE.sub('', html) - html = TAG_RE.sub('', html) - - html = '\n'.join([x.strip() for x in html.strip().split('\n')]) - html = re.sub(r'\n{3,}', r'\n\n', html) - return html - - -def _scrape_merge_paragraphs(html): - html = re.sub(r'

\s*]*)>', '\n', html) - return re.sub(r'
\s*
', '\n', html) - - -def scrape_lyrics_from_html(html): - """Scrape lyrics from a URL. If no lyrics can be found, return None - instead. - """ - from bs4 import SoupStrainer, BeautifulSoup - - if not html: - return None - - def is_text_notcode(text): - length = len(text) - return (length > 20 and - text.count(' ') > length / 25 and - (text.find('{') == -1 or text.find(';') == -1)) - html = _scrape_strip_cruft(html) - html = _scrape_merge_paragraphs(html) - - # extract all long text blocks that are not code - try: - soup = BeautifulSoup(html, "html.parser", - parse_only=SoupStrainer(text=is_text_notcode)) - except HTMLParseError: - return None - soup = sorted(soup.stripped_strings, key=len)[-1] - return soup - - -def fetch_google(artist, title): - """Fetch lyrics from Google search results. - """ - query = u"%s %s" % (artist, title) - api_key = config['lyrics']['google_API_key'].get(unicode) - engine_id = config['lyrics']['google_engine_ID'].get(unicode) - url = u'https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&q=%s' % \ - (api_key, engine_id, urllib.quote(query.encode('utf8'))) - - data = urllib.urlopen(url) - data = json.load(data) - if 'error' in data: - reason = data['error']['errors'][0]['reason'] - log.debug(u'google lyrics backend error: {0}'.format(reason)) - return - - if 'items' in data.keys(): - for item in data['items']: - urlLink = item['link'] - urlTitle = item.get('title', u'') - if not is_page_candidate(urlLink, urlTitle, title, artist): - continue - html = fetch_url(urlLink) - lyrics = scrape_lyrics_from_html(html) - if not lyrics: - continue - - if is_lyrics(lyrics, artist): - log.debug(u'got lyrics from {0}'.format(item['displayLink'])) - return lyrics - - -# Plugin logic. - -SOURCES = ['google', 'lyricwiki', 'lyrics.com', 'musixmatch'] -SOURCE_BACKENDS = { - 'google': fetch_google, - 'lyricwiki': fetch_lyricswiki, - 'lyrics.com': fetch_lyricscom, - 'musixmatch': fetch_musixmatch, -} - - -class LyricsPlugin(plugins.BeetsPlugin): - def __init__(self): - super(LyricsPlugin, self).__init__() - self.import_stages = [self.imported] - self.config.add({ - 'auto': True, - 'google_API_key': None, - 'google_engine_ID': u'009217259823014548361:lndtuqkycfu', - 'fallback': None, - 'force': False, - 'sources': SOURCES, - }) - - available_sources = list(SOURCES) - if not self.config['google_API_key'].get() and \ - 'google' in SOURCES: - available_sources.remove('google') - self.config['sources'] = plugins.sanitize_choices( - self.config['sources'].as_str_seq(), available_sources) - self.backends = [] - for key in self.config['sources'].as_str_seq(): - self.backends.append(SOURCE_BACKENDS[key]) - - def commands(self): - cmd = ui.Subcommand('lyrics', help='fetch song lyrics') - cmd.parser.add_option('-p', '--print', dest='printlyr', - action='store_true', default=False, - help='print lyrics to console') - cmd.parser.add_option('-f', '--force', dest='force_refetch', - action='store_true', default=False, - help='always re-download lyrics') - - def func(lib, opts, args): - # The "write to files" option corresponds to the - # import_write config value. - write = config['import']['write'].get(bool) - for item in lib.items(ui.decargs(args)): - self.fetch_item_lyrics( - lib, logging.INFO, item, write, - opts.force_refetch or self.config['force'], - ) - if opts.printlyr and item.lyrics: - ui.print_(item.lyrics) - - cmd.func = func - return [cmd] - - def imported(self, session, task): - """Import hook for fetching lyrics automatically. - """ - if self.config['auto']: - for item in task.imported_items(): - self.fetch_item_lyrics(session.lib, logging.DEBUG, item, - False, self.config['force']) - - def fetch_item_lyrics(self, lib, loglevel, item, write, force): - """Fetch and store lyrics for a single item. If ``write``, then the - lyrics will also be written to the file itself. The ``loglevel`` - parameter controls the visibility of the function's status log - messages. - """ - # Skip if the item already has lyrics. - if not force and item.lyrics: - log.log(loglevel, u'lyrics already present: {0} - {1}' - .format(item.artist, item.title)) - return - - lyrics = None - for artist, titles in search_pairs(item): - lyrics = [self.get_lyrics(artist, title) for title in titles] - if any(lyrics): - break - - lyrics = u"\n\n---\n\n".join([l for l in lyrics if l]) - - if lyrics: - log.log(loglevel, u'fetched lyrics: {0} - {1}' - .format(item.artist, item.title)) - else: - log.log(loglevel, u'lyrics not found: {0} - {1}' - .format(item.artist, item.title)) - fallback = self.config['fallback'].get() - if fallback: - lyrics = fallback - else: - return - - item.lyrics = lyrics - - if write: - item.try_write() - item.store() - - def get_lyrics(self, artist, title): - """Fetch lyrics, trying each source in turn. Return a string or - None if no lyrics were found. - """ - for backend in self.backends: - lyrics = backend(artist, title) - if lyrics: - log.debug(u'got lyrics from backend: {0}' - .format(backend.__name__)) - return _scrape_strip_cruft(lyrics, True) diff --git a/lib/biplist/__init__.py b/lib/biplist/__init__.py deleted file mode 100755 index fe5b711b..00000000 --- a/lib/biplist/__init__.py +++ /dev/null @@ -1,803 +0,0 @@ -"""biplist -- a library for reading and writing binary property list files. - -Binary Property List (plist) files provide a faster and smaller serialization -format for property lists on OS X. This is a library for generating binary -plists which can be read by OS X, iOS, or other clients. - -The API models the plistlib API, and will call through to plistlib when -XML serialization or deserialization is required. - -To generate plists with UID values, wrap the values with the Uid object. The -value must be an int. - -To generate plists with NSData/CFData values, wrap the values with the -Data object. The value must be a string. - -Date values can only be datetime.datetime objects. - -The exceptions InvalidPlistException and NotBinaryPlistException may be -thrown to indicate that the data cannot be serialized or deserialized as -a binary plist. - -Plist generation example: - - from biplist import * - from datetime import datetime - plist = {'aKey':'aValue', - '0':1.322, - 'now':datetime.now(), - 'list':[1,2,3], - 'tuple':('a','b','c') - } - try: - writePlist(plist, "example.plist") - except (InvalidPlistException, NotBinaryPlistException), e: - print "Something bad happened:", e - -Plist parsing example: - - from biplist import * - try: - plist = readPlist("example.plist") - print plist - except (InvalidPlistException, NotBinaryPlistException), e: - print "Not a plist:", e -""" - -import sys -from collections import namedtuple -import datetime -import io -import math -import plistlib -from struct import pack, unpack -from struct import error as struct_error -import sys -import time - -try: - unicode - unicodeEmpty = r'' -except NameError: - unicode = str - unicodeEmpty = '' -try: - long -except NameError: - long = int -try: - {}.iteritems - iteritems = lambda x: x.iteritems() -except AttributeError: - iteritems = lambda x: x.items() - -__all__ = [ - 'Uid', 'Data', 'readPlist', 'writePlist', 'readPlistFromString', - 'writePlistToString', 'InvalidPlistException', 'NotBinaryPlistException' -] - -# Apple uses Jan 1, 2001 as a base for all plist date/times. -apple_reference_date = datetime.datetime.utcfromtimestamp(978307200) - -class Uid(int): - """Wrapper around integers for representing UID values. This - is used in keyed archiving.""" - def __repr__(self): - return "Uid(%d)" % self - -class Data(bytes): - """Wrapper around str types for representing Data values.""" - pass - -class InvalidPlistException(Exception): - """Raised when the plist is incorrectly formatted.""" - pass - -class NotBinaryPlistException(Exception): - """Raised when a binary plist was expected but not encountered.""" - pass - -def readPlist(pathOrFile): - """Raises NotBinaryPlistException, InvalidPlistException""" - didOpen = False - result = None - if isinstance(pathOrFile, (bytes, unicode)): - pathOrFile = open(pathOrFile, 'rb') - didOpen = True - try: - reader = PlistReader(pathOrFile) - result = reader.parse() - except NotBinaryPlistException as e: - try: - pathOrFile.seek(0) - result = None - if hasattr(plistlib, 'loads'): - contents = None - if isinstance(pathOrFile, (bytes, unicode)): - with open(pathOrFile, 'rb') as f: - contents = f.read() - else: - contents = pathOrFile.read() - result = plistlib.loads(contents) - else: - result = plistlib.readPlist(pathOrFile) - result = wrapDataObject(result, for_binary=True) - except Exception as e: - raise InvalidPlistException(e) - finally: - if didOpen: - pathOrFile.close() - return result - -def wrapDataObject(o, for_binary=False): - if isinstance(o, Data) and not for_binary: - v = sys.version_info - if not (v[0] >= 3 and v[1] >= 4): - o = plistlib.Data(o) - elif isinstance(o, (bytes, plistlib.Data)) and for_binary: - if hasattr(o, 'data'): - o = Data(o.data) - elif isinstance(o, tuple): - o = wrapDataObject(list(o), for_binary) - o = tuple(o) - elif isinstance(o, list): - for i in range(len(o)): - o[i] = wrapDataObject(o[i], for_binary) - elif isinstance(o, dict): - for k in o: - o[k] = wrapDataObject(o[k], for_binary) - return o - -def writePlist(rootObject, pathOrFile, binary=True): - if not binary: - rootObject = wrapDataObject(rootObject, binary) - if hasattr(plistlib, "dump"): - if isinstance(pathOrFile, (bytes, unicode)): - with open(pathOrFile, 'wb') as f: - return plistlib.dump(rootObject, f) - else: - return plistlib.dump(rootObject, pathOrFile) - else: - return plistlib.writePlist(rootObject, pathOrFile) - else: - didOpen = False - if isinstance(pathOrFile, (bytes, unicode)): - pathOrFile = open(pathOrFile, 'wb') - didOpen = True - writer = PlistWriter(pathOrFile) - result = writer.writeRoot(rootObject) - if didOpen: - pathOrFile.close() - return result - -def readPlistFromString(data): - return readPlist(io.BytesIO(data)) - -def writePlistToString(rootObject, binary=True): - if not binary: - rootObject = wrapDataObject(rootObject, binary) - if hasattr(plistlib, "dumps"): - return plistlib.dumps(rootObject) - elif hasattr(plistlib, "writePlistToBytes"): - return plistlib.writePlistToBytes(rootObject) - else: - return plistlib.writePlistToString(rootObject) - else: - ioObject = io.BytesIO() - writer = PlistWriter(ioObject) - writer.writeRoot(rootObject) - return ioObject.getvalue() - -def is_stream_binary_plist(stream): - stream.seek(0) - header = stream.read(7) - if header == b'bplist0': - return True - else: - return False - -PlistTrailer = namedtuple('PlistTrailer', 'offsetSize, objectRefSize, offsetCount, topLevelObjectNumber, offsetTableOffset') -PlistByteCounts = namedtuple('PlistByteCounts', 'nullBytes, boolBytes, intBytes, realBytes, dateBytes, dataBytes, stringBytes, uidBytes, arrayBytes, setBytes, dictBytes') - -class PlistReader(object): - file = None - contents = '' - offsets = None - trailer = None - currentOffset = 0 - - def __init__(self, fileOrStream): - """Raises NotBinaryPlistException.""" - self.reset() - self.file = fileOrStream - - def parse(self): - return self.readRoot() - - def reset(self): - self.trailer = None - self.contents = '' - self.offsets = [] - self.currentOffset = 0 - - def readRoot(self): - result = None - self.reset() - # Get the header, make sure it's a valid file. - if not is_stream_binary_plist(self.file): - raise NotBinaryPlistException() - self.file.seek(0) - self.contents = self.file.read() - if len(self.contents) < 32: - raise InvalidPlistException("File is too short.") - trailerContents = self.contents[-32:] - try: - self.trailer = PlistTrailer._make(unpack("!xxxxxxBBQQQ", trailerContents)) - offset_size = self.trailer.offsetSize * self.trailer.offsetCount - offset = self.trailer.offsetTableOffset - offset_contents = self.contents[offset:offset+offset_size] - offset_i = 0 - while offset_i < self.trailer.offsetCount: - begin = self.trailer.offsetSize*offset_i - tmp_contents = offset_contents[begin:begin+self.trailer.offsetSize] - tmp_sized = self.getSizedInteger(tmp_contents, self.trailer.offsetSize) - self.offsets.append(tmp_sized) - offset_i += 1 - self.setCurrentOffsetToObjectNumber(self.trailer.topLevelObjectNumber) - result = self.readObject() - except TypeError as e: - raise InvalidPlistException(e) - return result - - def setCurrentOffsetToObjectNumber(self, objectNumber): - self.currentOffset = self.offsets[objectNumber] - - def readObject(self): - result = None - tmp_byte = self.contents[self.currentOffset:self.currentOffset+1] - marker_byte = unpack("!B", tmp_byte)[0] - format = (marker_byte >> 4) & 0x0f - extra = marker_byte & 0x0f - self.currentOffset += 1 - - def proc_extra(extra): - if extra == 0b1111: - #self.currentOffset += 1 - extra = self.readObject() - return extra - - # bool, null, or fill byte - if format == 0b0000: - if extra == 0b0000: - result = None - elif extra == 0b1000: - result = False - elif extra == 0b1001: - result = True - elif extra == 0b1111: - pass # fill byte - else: - raise InvalidPlistException("Invalid object found at offset: %d" % (self.currentOffset - 1)) - # int - elif format == 0b0001: - extra = proc_extra(extra) - result = self.readInteger(pow(2, extra)) - # real - elif format == 0b0010: - extra = proc_extra(extra) - result = self.readReal(extra) - # date - elif format == 0b0011 and extra == 0b0011: - result = self.readDate() - # data - elif format == 0b0100: - extra = proc_extra(extra) - result = self.readData(extra) - # ascii string - elif format == 0b0101: - extra = proc_extra(extra) - result = self.readAsciiString(extra) - # Unicode string - elif format == 0b0110: - extra = proc_extra(extra) - result = self.readUnicode(extra) - # uid - elif format == 0b1000: - result = self.readUid(extra) - # array - elif format == 0b1010: - extra = proc_extra(extra) - result = self.readArray(extra) - # set - elif format == 0b1100: - extra = proc_extra(extra) - result = set(self.readArray(extra)) - # dict - elif format == 0b1101: - extra = proc_extra(extra) - result = self.readDict(extra) - else: - raise InvalidPlistException("Invalid object found: {format: %s, extra: %s}" % (bin(format), bin(extra))) - return result - - def readInteger(self, byteSize): - result = 0 - original_offset = self.currentOffset - data = self.contents[self.currentOffset:self.currentOffset + byteSize] - result = self.getSizedInteger(data, byteSize, as_number=True) - self.currentOffset = original_offset + byteSize - return result - - def readReal(self, length): - result = 0.0 - to_read = pow(2, length) - data = self.contents[self.currentOffset:self.currentOffset+to_read] - if length == 2: # 4 bytes - result = unpack('>f', data)[0] - elif length == 3: # 8 bytes - result = unpack('>d', data)[0] - else: - raise InvalidPlistException("Unknown real of length %d bytes" % to_read) - return result - - def readRefs(self, count): - refs = [] - i = 0 - while i < count: - fragment = self.contents[self.currentOffset:self.currentOffset+self.trailer.objectRefSize] - ref = self.getSizedInteger(fragment, len(fragment)) - refs.append(ref) - self.currentOffset += self.trailer.objectRefSize - i += 1 - return refs - - def readArray(self, count): - result = [] - values = self.readRefs(count) - i = 0 - while i < len(values): - self.setCurrentOffsetToObjectNumber(values[i]) - value = self.readObject() - result.append(value) - i += 1 - return result - - def readDict(self, count): - result = {} - keys = self.readRefs(count) - values = self.readRefs(count) - i = 0 - while i < len(keys): - self.setCurrentOffsetToObjectNumber(keys[i]) - key = self.readObject() - self.setCurrentOffsetToObjectNumber(values[i]) - value = self.readObject() - result[key] = value - i += 1 - return result - - def readAsciiString(self, length): - result = unpack("!%ds" % length, self.contents[self.currentOffset:self.currentOffset+length])[0] - self.currentOffset += length - return result - - def readUnicode(self, length): - actual_length = length*2 - data = self.contents[self.currentOffset:self.currentOffset+actual_length] - # unpack not needed?!! data = unpack(">%ds" % (actual_length), data)[0] - self.currentOffset += actual_length - return data.decode('utf_16_be') - - def readDate(self): - result = unpack(">d", self.contents[self.currentOffset:self.currentOffset+8])[0] - # Use timedelta to workaround time_t size limitation on 32-bit python. - result = datetime.timedelta(seconds=result) + apple_reference_date - self.currentOffset += 8 - return result - - def readData(self, length): - result = self.contents[self.currentOffset:self.currentOffset+length] - self.currentOffset += length - return Data(result) - - def readUid(self, length): - return Uid(self.readInteger(length+1)) - - def getSizedInteger(self, data, byteSize, as_number=False): - """Numbers of 8 bytes are signed integers when they refer to numbers, but unsigned otherwise.""" - result = 0 - # 1, 2, and 4 byte integers are unsigned - if byteSize == 1: - result = unpack('>B', data)[0] - elif byteSize == 2: - result = unpack('>H', data)[0] - elif byteSize == 4: - result = unpack('>L', data)[0] - elif byteSize == 8: - if as_number: - result = unpack('>q', data)[0] - else: - result = unpack('>Q', data)[0] - elif byteSize <= 16: - # Handle odd-sized or integers larger than 8 bytes - # Don't naively go over 16 bytes, in order to prevent infinite loops. - result = 0 - if hasattr(int, 'from_bytes'): - result = int.from_bytes(data, 'big') - else: - for byte in data: - result = (result << 8) | unpack('>B', byte)[0] - else: - raise InvalidPlistException("Encountered integer longer than 16 bytes.") - return result - -class HashableWrapper(object): - def __init__(self, value): - self.value = value - def __repr__(self): - return "" % [self.value] - -class BoolWrapper(object): - def __init__(self, value): - self.value = value - def __repr__(self): - return "" % self.value - -class FloatWrapper(object): - _instances = {} - def __new__(klass, value): - # Ensure FloatWrapper(x) for a given float x is always the same object - wrapper = klass._instances.get(value) - if wrapper is None: - wrapper = object.__new__(klass) - wrapper.value = value - klass._instances[value] = wrapper - return wrapper - def __repr__(self): - return "" % self.value - -class PlistWriter(object): - header = b'bplist00bybiplist1.0' - file = None - byteCounts = None - trailer = None - computedUniques = None - writtenReferences = None - referencePositions = None - wrappedTrue = None - wrappedFalse = None - - def __init__(self, file): - self.reset() - self.file = file - self.wrappedTrue = BoolWrapper(True) - self.wrappedFalse = BoolWrapper(False) - - def reset(self): - self.byteCounts = PlistByteCounts(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - self.trailer = PlistTrailer(0, 0, 0, 0, 0) - - # A set of all the uniques which have been computed. - self.computedUniques = set() - # A list of all the uniques which have been written. - self.writtenReferences = {} - # A dict of the positions of the written uniques. - self.referencePositions = {} - - def positionOfObjectReference(self, obj): - """If the given object has been written already, return its - position in the offset table. Otherwise, return None.""" - return self.writtenReferences.get(obj) - - def writeRoot(self, root): - """ - Strategy is: - - write header - - wrap root object so everything is hashable - - compute size of objects which will be written - - need to do this in order to know how large the object refs - will be in the list/dict/set reference lists - - write objects - - keep objects in writtenReferences - - keep positions of object references in referencePositions - - write object references with the length computed previously - - computer object reference length - - write object reference positions - - write trailer - """ - output = self.header - wrapped_root = self.wrapRoot(root) - should_reference_root = True#not isinstance(wrapped_root, HashableWrapper) - self.computeOffsets(wrapped_root, asReference=should_reference_root, isRoot=True) - self.trailer = self.trailer._replace(**{'objectRefSize':self.intSize(len(self.computedUniques))}) - (_, output) = self.writeObjectReference(wrapped_root, output) - output = self.writeObject(wrapped_root, output, setReferencePosition=True) - - # output size at this point is an upper bound on how big the - # object reference offsets need to be. - self.trailer = self.trailer._replace(**{ - 'offsetSize':self.intSize(len(output)), - 'offsetCount':len(self.computedUniques), - 'offsetTableOffset':len(output), - 'topLevelObjectNumber':0 - }) - - output = self.writeOffsetTable(output) - output += pack('!xxxxxxBBQQQ', *self.trailer) - self.file.write(output) - - def wrapRoot(self, root): - if isinstance(root, bool): - if root is True: - return self.wrappedTrue - else: - return self.wrappedFalse - elif isinstance(root, float): - return FloatWrapper(root) - elif isinstance(root, set): - n = set() - for value in root: - n.add(self.wrapRoot(value)) - return HashableWrapper(n) - elif isinstance(root, dict): - n = {} - for key, value in iteritems(root): - n[self.wrapRoot(key)] = self.wrapRoot(value) - return HashableWrapper(n) - elif isinstance(root, list): - n = [] - for value in root: - n.append(self.wrapRoot(value)) - return HashableWrapper(n) - elif isinstance(root, tuple): - n = tuple([self.wrapRoot(value) for value in root]) - return HashableWrapper(n) - else: - return root - - def incrementByteCount(self, field, incr=1): - self.byteCounts = self.byteCounts._replace(**{field:self.byteCounts.__getattribute__(field) + incr}) - - def computeOffsets(self, obj, asReference=False, isRoot=False): - def check_key(key): - if key is None: - raise InvalidPlistException('Dictionary keys cannot be null in plists.') - elif isinstance(key, Data): - raise InvalidPlistException('Data cannot be dictionary keys in plists.') - elif not isinstance(key, (bytes, unicode)): - raise InvalidPlistException('Keys must be strings.') - - def proc_size(size): - if size > 0b1110: - size += self.intSize(size) - return size - # If this should be a reference, then we keep a record of it in the - # uniques table. - if asReference: - if obj in self.computedUniques: - return - else: - self.computedUniques.add(obj) - - if obj is None: - self.incrementByteCount('nullBytes') - elif isinstance(obj, BoolWrapper): - self.incrementByteCount('boolBytes') - elif isinstance(obj, Uid): - size = self.intSize(obj) - self.incrementByteCount('uidBytes', incr=1+size) - elif isinstance(obj, (int, long)): - size = self.intSize(obj) - self.incrementByteCount('intBytes', incr=1+size) - elif isinstance(obj, FloatWrapper): - size = self.realSize(obj) - self.incrementByteCount('realBytes', incr=1+size) - elif isinstance(obj, datetime.datetime): - self.incrementByteCount('dateBytes', incr=2) - elif isinstance(obj, Data): - size = proc_size(len(obj)) - self.incrementByteCount('dataBytes', incr=1+size) - elif isinstance(obj, (unicode, bytes)): - size = proc_size(len(obj)) - self.incrementByteCount('stringBytes', incr=1+size) - elif isinstance(obj, HashableWrapper): - obj = obj.value - if isinstance(obj, set): - size = proc_size(len(obj)) - self.incrementByteCount('setBytes', incr=1+size) - for value in obj: - self.computeOffsets(value, asReference=True) - elif isinstance(obj, (list, tuple)): - size = proc_size(len(obj)) - self.incrementByteCount('arrayBytes', incr=1+size) - for value in obj: - asRef = True - self.computeOffsets(value, asReference=True) - elif isinstance(obj, dict): - size = proc_size(len(obj)) - self.incrementByteCount('dictBytes', incr=1+size) - for key, value in iteritems(obj): - check_key(key) - self.computeOffsets(key, asReference=True) - self.computeOffsets(value, asReference=True) - else: - raise InvalidPlistException("Unknown object type.") - - def writeObjectReference(self, obj, output): - """Tries to write an object reference, adding it to the references - table. Does not write the actual object bytes or set the reference - position. Returns a tuple of whether the object was a new reference - (True if it was, False if it already was in the reference table) - and the new output. - """ - position = self.positionOfObjectReference(obj) - if position is None: - self.writtenReferences[obj] = len(self.writtenReferences) - output += self.binaryInt(len(self.writtenReferences) - 1, byteSize=self.trailer.objectRefSize) - return (True, output) - else: - output += self.binaryInt(position, byteSize=self.trailer.objectRefSize) - return (False, output) - - def writeObject(self, obj, output, setReferencePosition=False): - """Serializes the given object to the output. Returns output. - If setReferencePosition is True, will set the position the - object was written. - """ - def proc_variable_length(format, length): - result = b'' - if length > 0b1110: - result += pack('!B', (format << 4) | 0b1111) - result = self.writeObject(length, result) - else: - result += pack('!B', (format << 4) | length) - return result - - if isinstance(obj, (str, unicode)) and obj == unicodeEmpty: - # The Apple Plist decoder can't decode a zero length Unicode string. - obj = b'' - - if setReferencePosition: - self.referencePositions[obj] = len(output) - - if obj is None: - output += pack('!B', 0b00000000) - elif isinstance(obj, BoolWrapper): - if obj.value is False: - output += pack('!B', 0b00001000) - else: - output += pack('!B', 0b00001001) - elif isinstance(obj, Uid): - size = self.intSize(obj) - output += pack('!B', (0b1000 << 4) | size - 1) - output += self.binaryInt(obj) - elif isinstance(obj, (int, long)): - byteSize = self.intSize(obj) - root = math.log(byteSize, 2) - output += pack('!B', (0b0001 << 4) | int(root)) - output += self.binaryInt(obj, as_number=True) - elif isinstance(obj, FloatWrapper): - # just use doubles - output += pack('!B', (0b0010 << 4) | 3) - output += self.binaryReal(obj) - elif isinstance(obj, datetime.datetime): - timestamp = (obj - apple_reference_date).total_seconds() - output += pack('!B', 0b00110011) - output += pack('!d', float(timestamp)) - elif isinstance(obj, Data): - output += proc_variable_length(0b0100, len(obj)) - output += obj - elif isinstance(obj, unicode): - byteData = obj.encode('utf_16_be') - output += proc_variable_length(0b0110, len(byteData)//2) - output += byteData - elif isinstance(obj, bytes): - output += proc_variable_length(0b0101, len(obj)) - output += obj - elif isinstance(obj, HashableWrapper): - obj = obj.value - if isinstance(obj, (set, list, tuple)): - if isinstance(obj, set): - output += proc_variable_length(0b1100, len(obj)) - else: - output += proc_variable_length(0b1010, len(obj)) - - objectsToWrite = [] - for objRef in obj: - (isNew, output) = self.writeObjectReference(objRef, output) - if isNew: - objectsToWrite.append(objRef) - for objRef in objectsToWrite: - output = self.writeObject(objRef, output, setReferencePosition=True) - elif isinstance(obj, dict): - output += proc_variable_length(0b1101, len(obj)) - keys = [] - values = [] - objectsToWrite = [] - for key, value in iteritems(obj): - keys.append(key) - values.append(value) - for key in keys: - (isNew, output) = self.writeObjectReference(key, output) - if isNew: - objectsToWrite.append(key) - for value in values: - (isNew, output) = self.writeObjectReference(value, output) - if isNew: - objectsToWrite.append(value) - for objRef in objectsToWrite: - output = self.writeObject(objRef, output, setReferencePosition=True) - return output - - def writeOffsetTable(self, output): - """Writes all of the object reference offsets.""" - all_positions = [] - writtenReferences = list(self.writtenReferences.items()) - writtenReferences.sort(key=lambda x: x[1]) - for obj,order in writtenReferences: - # Porting note: Elsewhere we deliberately replace empty unicdoe strings - # with empty binary strings, but the empty unicode string - # goes into writtenReferences. This isn't an issue in Py2 - # because u'' and b'' have the same hash; but it is in - # Py3, where they don't. - if bytes != str and obj == unicodeEmpty: - obj = b'' - position = self.referencePositions.get(obj) - if position is None: - raise InvalidPlistException("Error while writing offsets table. Object not found. %s" % obj) - output += self.binaryInt(position, self.trailer.offsetSize) - all_positions.append(position) - return output - - def binaryReal(self, obj): - # just use doubles - result = pack('>d', obj.value) - return result - - def binaryInt(self, obj, byteSize=None, as_number=False): - result = b'' - if byteSize is None: - byteSize = self.intSize(obj) - if byteSize == 1: - result += pack('>B', obj) - elif byteSize == 2: - result += pack('>H', obj) - elif byteSize == 4: - result += pack('>L', obj) - elif byteSize == 8: - if as_number: - result += pack('>q', obj) - else: - result += pack('>Q', obj) - elif byteSize <= 16: - try: - result = pack('>Q', 0) + pack('>Q', obj) - except struct_error as e: - raise InvalidPlistException("Unable to pack integer %d: %s" % (obj, e)) - else: - raise InvalidPlistException("Core Foundation can't handle integers with size greater than 16 bytes.") - return result - - def intSize(self, obj): - """Returns the number of bytes necessary to store the given integer.""" - # SIGNED - if obj < 0: # Signed integer, always 8 bytes - return 8 - # UNSIGNED - elif obj <= 0xFF: # 1 byte - return 1 - elif obj <= 0xFFFF: # 2 bytes - return 2 - elif obj <= 0xFFFFFFFF: # 4 bytes - return 4 - # SIGNED - # 0x7FFFFFFFFFFFFFFF is the max. - elif obj <= 0x7FFFFFFFFFFFFFFF: # 8 bytes signed - return 8 - elif obj <= 0xffffffffffffffff: # 8 bytes unsigned - return 16 - else: - raise InvalidPlistException("Core Foundation can't handle integers with size greater than 8 bytes.") - - def realSize(self, obj): - return 8 diff --git a/lib/bs4/__init__.py b/lib/bs4/__init__.py deleted file mode 100644 index d35f765b..00000000 --- a/lib/bs4/__init__.py +++ /dev/null @@ -1,468 +0,0 @@ -"""Beautiful Soup -Elixir and Tonic -"The Screen-Scraper's Friend" -http://www.crummy.com/software/BeautifulSoup/ - -Beautiful Soup uses a pluggable XML or HTML parser to parse a -(possibly invalid) document into a tree representation. Beautiful Soup -provides provides methods and Pythonic idioms that make it easy to -navigate, search, and modify the parse tree. - -Beautiful Soup works with Python 2.6 and up. It works better if lxml -and/or html5lib is installed. - -For more than you ever wanted to know about Beautiful Soup, see the -documentation: -http://www.crummy.com/software/BeautifulSoup/bs4/doc/ -""" - -__author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.4.0" -__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson" -__license__ = "MIT" - -__all__ = ['BeautifulSoup'] - -import os -import re -import warnings - -from .builder import builder_registry, ParserRejectedMarkup -from .dammit import UnicodeDammit -from .element import ( - CData, - Comment, - DEFAULT_OUTPUT_ENCODING, - Declaration, - Doctype, - NavigableString, - PageElement, - ProcessingInstruction, - ResultSet, - SoupStrainer, - Tag, - ) - -# The very first thing we do is give a useful error if someone is -# running this code under Python 3 without converting it. -'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' - -class BeautifulSoup(Tag): - """ - This class defines the basic interface called by the tree builders. - - These methods will be called by the parser: - reset() - feed(markup) - - The tree builder may call these methods from its feed() implementation: - handle_starttag(name, attrs) # See note about return value - handle_endtag(name) - handle_data(data) # Appends to the current data node - endData(containerClass=NavigableString) # Ends the current data node - - No matter how complicated the underlying parser is, you should be - able to build a tree using 'start tag' events, 'end tag' events, - 'data' events, and "done with data" events. - - If you encounter an empty-element tag (aka a self-closing tag, - like HTML's
tag), call handle_starttag and then - handle_endtag. - """ - ROOT_TAG_NAME = u'[document]' - - # If the end-user gives no indication which tree builder they - # want, look for one with these features. - DEFAULT_BUILDER_FEATURES = ['html', 'fast'] - - ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' - - NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" - - def __init__(self, markup="", features=None, builder=None, - parse_only=None, from_encoding=None, exclude_encodings=None, - **kwargs): - """The Soup object is initialized as the 'root tag', and the - provided markup (which can be a string or a file-like object) - is fed into the underlying parser.""" - - if 'convertEntities' in kwargs: - warnings.warn( - "BS4 does not respect the convertEntities argument to the " - "BeautifulSoup constructor. Entities are always converted " - "to Unicode characters.") - - if 'markupMassage' in kwargs: - del kwargs['markupMassage'] - warnings.warn( - "BS4 does not respect the markupMassage argument to the " - "BeautifulSoup constructor. The tree builder is responsible " - "for any necessary markup massage.") - - if 'smartQuotesTo' in kwargs: - del kwargs['smartQuotesTo'] - warnings.warn( - "BS4 does not respect the smartQuotesTo argument to the " - "BeautifulSoup constructor. Smart quotes are always converted " - "to Unicode characters.") - - if 'selfClosingTags' in kwargs: - del kwargs['selfClosingTags'] - warnings.warn( - "BS4 does not respect the selfClosingTags argument to the " - "BeautifulSoup constructor. The tree builder is responsible " - "for understanding self-closing tags.") - - if 'isHTML' in kwargs: - del kwargs['isHTML'] - warnings.warn( - "BS4 does not respect the isHTML argument to the " - "BeautifulSoup constructor. Suggest you use " - "features='lxml' for HTML and features='lxml-xml' for " - "XML.") - - def deprecated_argument(old_name, new_name): - if old_name in kwargs: - warnings.warn( - 'The "%s" argument to the BeautifulSoup constructor ' - 'has been renamed to "%s."' % (old_name, new_name)) - value = kwargs[old_name] - del kwargs[old_name] - return value - return None - - parse_only = parse_only or deprecated_argument( - "parseOnlyThese", "parse_only") - - from_encoding = from_encoding or deprecated_argument( - "fromEncoding", "from_encoding") - - if len(kwargs) > 0: - arg = kwargs.keys().pop() - raise TypeError( - "__init__() got an unexpected keyword argument '%s'" % arg) - - if builder is None: - original_features = features - if isinstance(features, basestring): - features = [features] - if features is None or len(features) == 0: - features = self.DEFAULT_BUILDER_FEATURES - builder_class = builder_registry.lookup(*features) - if builder_class is None: - raise FeatureNotFound( - "Couldn't find a tree builder with the features you " - "requested: %s. Do you need to install a parser library?" - % ",".join(features)) - builder = builder_class() - if not (original_features == builder.NAME or - original_features in builder.ALTERNATE_NAMES): - if builder.is_xml: - markup_type = "XML" - else: - markup_type = "HTML" - warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( - parser=builder.NAME, - markup_type=markup_type)) - - self.builder = builder - self.is_xml = builder.is_xml - self.builder.soup = self - - self.parse_only = parse_only - - if hasattr(markup, 'read'): # It's a file-type object. - markup = markup.read() - elif len(markup) <= 256: - # Print out warnings for a couple beginner problems - # involving passing non-markup to Beautiful Soup. - # Beautiful Soup will still parse the input as markup, - # just in case that's what the user really wants. - if (isinstance(markup, unicode) - and not os.path.supports_unicode_filenames): - possible_filename = markup.encode("utf8") - else: - possible_filename = markup - is_file = False - try: - is_file = os.path.exists(possible_filename) - except Exception, e: - # This is almost certainly a problem involving - # characters not valid in filenames on this - # system. Just let it go. - pass - if is_file: - if isinstance(markup, unicode): - markup = markup.encode("utf8") - warnings.warn( - '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) - if markup[:5] == "http:" or markup[:6] == "https:": - # TODO: This is ugly but I couldn't get it to work in - # Python 3 otherwise. - if ((isinstance(markup, bytes) and not b' ' in markup) - or (isinstance(markup, unicode) and not u' ' in markup)): - if isinstance(markup, unicode): - markup = markup.encode("utf8") - warnings.warn( - '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) - - for (self.markup, self.original_encoding, self.declared_html_encoding, - self.contains_replacement_characters) in ( - self.builder.prepare_markup( - markup, from_encoding, exclude_encodings=exclude_encodings)): - self.reset() - try: - self._feed() - break - except ParserRejectedMarkup: - pass - - # Clear out the markup and remove the builder's circular - # reference to this object. - self.markup = None - self.builder.soup = None - - def __copy__(self): - return type(self)(self.encode(), builder=self.builder) - - def __getstate__(self): - # Frequently a tree builder can't be pickled. - d = dict(self.__dict__) - if 'builder' in d and not self.builder.picklable: - del d['builder'] - return d - - def _feed(self): - # Convert the document to Unicode. - self.builder.reset() - - self.builder.feed(self.markup) - # Close out any unfinished strings and close all the open tags. - self.endData() - while self.currentTag.name != self.ROOT_TAG_NAME: - self.popTag() - - def reset(self): - Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) - self.hidden = 1 - self.builder.reset() - self.current_data = [] - self.currentTag = None - self.tagStack = [] - self.preserve_whitespace_tag_stack = [] - self.pushTag(self) - - def new_tag(self, name, namespace=None, nsprefix=None, **attrs): - """Create a new tag associated with this soup.""" - return Tag(None, self.builder, name, namespace, nsprefix, attrs) - - def new_string(self, s, subclass=NavigableString): - """Create a new NavigableString associated with this soup.""" - return subclass(s) - - def insert_before(self, successor): - raise NotImplementedError("BeautifulSoup objects don't support insert_before().") - - def insert_after(self, successor): - raise NotImplementedError("BeautifulSoup objects don't support insert_after().") - - def popTag(self): - tag = self.tagStack.pop() - if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: - self.preserve_whitespace_tag_stack.pop() - #print "Pop", tag.name - if self.tagStack: - self.currentTag = self.tagStack[-1] - return self.currentTag - - def pushTag(self, tag): - #print "Push", tag.name - if self.currentTag: - self.currentTag.contents.append(tag) - self.tagStack.append(tag) - self.currentTag = self.tagStack[-1] - if tag.name in self.builder.preserve_whitespace_tags: - self.preserve_whitespace_tag_stack.append(tag) - - def endData(self, containerClass=NavigableString): - if self.current_data: - current_data = u''.join(self.current_data) - # If whitespace is not preserved, and this string contains - # nothing but ASCII spaces, replace it with a single space - # or newline. - if not self.preserve_whitespace_tag_stack: - strippable = True - for i in current_data: - if i not in self.ASCII_SPACES: - strippable = False - break - if strippable: - if '\n' in current_data: - current_data = '\n' - else: - current_data = ' ' - - # Reset the data collector. - self.current_data = [] - - # Should we add this string to the tree at all? - if self.parse_only and len(self.tagStack) <= 1 and \ - (not self.parse_only.text or \ - not self.parse_only.search(current_data)): - return - - o = containerClass(current_data) - self.object_was_parsed(o) - - def object_was_parsed(self, o, parent=None, most_recent_element=None): - """Add an object to the parse tree.""" - parent = parent or self.currentTag - previous_element = most_recent_element or self._most_recent_element - - next_element = previous_sibling = next_sibling = None - if isinstance(o, Tag): - next_element = o.next_element - next_sibling = o.next_sibling - previous_sibling = o.previous_sibling - if not previous_element: - previous_element = o.previous_element - - o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) - - self._most_recent_element = o - parent.contents.append(o) - - if parent.next_sibling: - # This node is being inserted into an element that has - # already been parsed. Deal with any dangling references. - index = parent.contents.index(o) - if index == 0: - previous_element = parent - previous_sibling = None - else: - previous_element = previous_sibling = parent.contents[index-1] - if index == len(parent.contents)-1: - next_element = parent.next_sibling - next_sibling = None - else: - next_element = next_sibling = parent.contents[index+1] - - o.previous_element = previous_element - if previous_element: - previous_element.next_element = o - o.next_element = next_element - if next_element: - next_element.previous_element = o - o.next_sibling = next_sibling - if next_sibling: - next_sibling.previous_sibling = o - o.previous_sibling = previous_sibling - if previous_sibling: - previous_sibling.next_sibling = o - - def _popToTag(self, name, nsprefix=None, inclusivePop=True): - """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - #print "Popping to %s" % name - if name == self.ROOT_TAG_NAME: - # The BeautifulSoup object itself can never be popped. - return - - most_recently_popped = None - - stack_size = len(self.tagStack) - for i in range(stack_size - 1, 0, -1): - t = self.tagStack[i] - if (name == t.name and nsprefix == t.prefix): - if inclusivePop: - most_recently_popped = self.popTag() - break - most_recently_popped = self.popTag() - - return most_recently_popped - - def handle_starttag(self, name, namespace, nsprefix, attrs): - """Push a start tag on to the stack. - - If this method returns None, the tag was rejected by the - SoupStrainer. You should proceed as if the tag had not occured - in the document. For instance, if this was a self-closing tag, - don't call handle_endtag. - """ - - # print "Start tag %s: %s" % (name, attrs) - self.endData() - - if (self.parse_only and len(self.tagStack) <= 1 - and (self.parse_only.text - or not self.parse_only.search_tag(name, attrs))): - return None - - tag = Tag(self, self.builder, name, namespace, nsprefix, attrs, - self.currentTag, self._most_recent_element) - if tag is None: - return tag - if self._most_recent_element: - self._most_recent_element.next_element = tag - self._most_recent_element = tag - self.pushTag(tag) - return tag - - def handle_endtag(self, name, nsprefix=None): - #print "End tag: " + name - self.endData() - self._popToTag(name, nsprefix) - - def handle_data(self, data): - self.current_data.append(data) - - def decode(self, pretty_print=False, - eventual_encoding=DEFAULT_OUTPUT_ENCODING, - formatter="minimal"): - """Returns a string or Unicode representation of this document. - To get Unicode, pass None for encoding.""" - - if self.is_xml: - # Print the XML declaration - encoding_part = '' - if eventual_encoding != None: - encoding_part = ' encoding="%s"' % eventual_encoding - prefix = u'\n' % encoding_part - else: - prefix = u'' - if not pretty_print: - indent_level = None - else: - indent_level = 0 - return prefix + super(BeautifulSoup, self).decode( - indent_level, eventual_encoding, formatter) - -# Alias to make it easier to type import: 'from bs4 import _soup' -_s = BeautifulSoup -_soup = BeautifulSoup - -class BeautifulStoneSoup(BeautifulSoup): - """Deprecated interface to an XML parser.""" - - def __init__(self, *args, **kwargs): - kwargs['features'] = 'xml' - warnings.warn( - 'The BeautifulStoneSoup class is deprecated. Instead of using ' - 'it, pass features="xml" into the BeautifulSoup constructor.') - super(BeautifulStoneSoup, self).__init__(*args, **kwargs) - - -class StopParsing(Exception): - pass - -class FeatureNotFound(ValueError): - pass - - -#By default, act as an HTML pretty-printer. -if __name__ == '__main__': - import sys - soup = BeautifulSoup(sys.stdin) - print soup.prettify() diff --git a/lib/bs4/builder/__init__.py b/lib/bs4/builder/__init__.py deleted file mode 100644 index f8fce568..00000000 --- a/lib/bs4/builder/__init__.py +++ /dev/null @@ -1,324 +0,0 @@ -from collections import defaultdict -import itertools -import sys -from bs4.element import ( - CharsetMetaAttributeValue, - ContentMetaAttributeValue, - whitespace_re - ) - -__all__ = [ - 'HTMLTreeBuilder', - 'SAXTreeBuilder', - 'TreeBuilder', - 'TreeBuilderRegistry', - ] - -# Some useful features for a TreeBuilder to have. -FAST = 'fast' -PERMISSIVE = 'permissive' -STRICT = 'strict' -XML = 'xml' -HTML = 'html' -HTML_5 = 'html5' - - -class TreeBuilderRegistry(object): - - def __init__(self): - self.builders_for_feature = defaultdict(list) - self.builders = [] - - def register(self, treebuilder_class): - """Register a treebuilder based on its advertised features.""" - for feature in treebuilder_class.features: - self.builders_for_feature[feature].insert(0, treebuilder_class) - self.builders.insert(0, treebuilder_class) - - def lookup(self, *features): - if len(self.builders) == 0: - # There are no builders at all. - return None - - if len(features) == 0: - # They didn't ask for any features. Give them the most - # recently registered builder. - return self.builders[0] - - # Go down the list of features in order, and eliminate any builders - # that don't match every feature. - features = list(features) - features.reverse() - candidates = None - candidate_set = None - while len(features) > 0: - feature = features.pop() - we_have_the_feature = self.builders_for_feature.get(feature, []) - if len(we_have_the_feature) > 0: - if candidates is None: - candidates = we_have_the_feature - candidate_set = set(candidates) - else: - # Eliminate any candidates that don't have this feature. - candidate_set = candidate_set.intersection( - set(we_have_the_feature)) - - # The only valid candidates are the ones in candidate_set. - # Go through the original list of candidates and pick the first one - # that's in candidate_set. - if candidate_set is None: - return None - for candidate in candidates: - if candidate in candidate_set: - return candidate - return None - -# The BeautifulSoup class will take feature lists from developers and use them -# to look up builders in this registry. -builder_registry = TreeBuilderRegistry() - -class TreeBuilder(object): - """Turn a document into a Beautiful Soup object tree.""" - - NAME = "[Unknown tree builder]" - ALTERNATE_NAMES = [] - features = [] - - is_xml = False - picklable = False - preserve_whitespace_tags = set() - empty_element_tags = None # A tag will be considered an empty-element - # tag when and only when it has no contents. - - # A value for these tag/attribute combinations is a space- or - # comma-separated list of CDATA, rather than a single CDATA. - cdata_list_attributes = {} - - - def __init__(self): - self.soup = None - - def reset(self): - pass - - def can_be_empty_element(self, tag_name): - """Might a tag with this name be an empty-element tag? - - The final markup may or may not actually present this tag as - self-closing. - - For instance: an HTMLBuilder does not consider a

tag to be - an empty-element tag (it's not in - HTMLBuilder.empty_element_tags). This means an empty

tag - will be presented as "

", not "

". - - The default implementation has no opinion about which tags are - empty-element tags, so a tag will be presented as an - empty-element tag if and only if it has no contents. - "" will become "", and "bar" will - be left alone. - """ - if self.empty_element_tags is None: - return True - return tag_name in self.empty_element_tags - - def feed(self, markup): - raise NotImplementedError() - - def prepare_markup(self, markup, user_specified_encoding=None, - document_declared_encoding=None): - return markup, None, None, False - - def test_fragment_to_document(self, fragment): - """Wrap an HTML fragment to make it look like a document. - - Different parsers do this differently. For instance, lxml - introduces an empty tag, and html5lib - doesn't. Abstracting this away lets us write simple tests - which run HTML fragments through the parser and compare the - results against other HTML fragments. - - This method should not be used outside of tests. - """ - return fragment - - def set_up_substitutions(self, tag): - return False - - def _replace_cdata_list_attribute_values(self, tag_name, attrs): - """Replaces class="foo bar" with class=["foo", "bar"] - - Modifies its input in place. - """ - if not attrs: - return attrs - if self.cdata_list_attributes: - universal = self.cdata_list_attributes.get('*', []) - tag_specific = self.cdata_list_attributes.get( - tag_name.lower(), None) - for attr in attrs.keys(): - if attr in universal or (tag_specific and attr in tag_specific): - # We have a "class"-type attribute whose string - # value is a whitespace-separated list of - # values. Split it into a list. - value = attrs[attr] - if isinstance(value, basestring): - values = whitespace_re.split(value) - else: - # html5lib sometimes calls setAttributes twice - # for the same tag when rearranging the parse - # tree. On the second call the attribute value - # here is already a list. If this happens, - # leave the value alone rather than trying to - # split it again. - values = value - attrs[attr] = values - return attrs - -class SAXTreeBuilder(TreeBuilder): - """A Beautiful Soup treebuilder that listens for SAX events.""" - - def feed(self, markup): - raise NotImplementedError() - - def close(self): - pass - - def startElement(self, name, attrs): - attrs = dict((key[1], value) for key, value in list(attrs.items())) - #print "Start %s, %r" % (name, attrs) - self.soup.handle_starttag(name, attrs) - - def endElement(self, name): - #print "End %s" % name - self.soup.handle_endtag(name) - - def startElementNS(self, nsTuple, nodeName, attrs): - # Throw away (ns, nodeName) for now. - self.startElement(nodeName, attrs) - - def endElementNS(self, nsTuple, nodeName): - # Throw away (ns, nodeName) for now. - self.endElement(nodeName) - #handler.endElementNS((ns, node.nodeName), node.nodeName) - - def startPrefixMapping(self, prefix, nodeValue): - # Ignore the prefix for now. - pass - - def endPrefixMapping(self, prefix): - # Ignore the prefix for now. - # handler.endPrefixMapping(prefix) - pass - - def characters(self, content): - self.soup.handle_data(content) - - def startDocument(self): - pass - - def endDocument(self): - pass - - -class HTMLTreeBuilder(TreeBuilder): - """This TreeBuilder knows facts about HTML. - - Such as which tags are empty-element tags. - """ - - preserve_whitespace_tags = set(['pre', 'textarea']) - empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base']) - - # The HTML standard defines these attributes as containing a - # space-separated list of values, not a single value. That is, - # class="foo bar" means that the 'class' attribute has two values, - # 'foo' and 'bar', not the single value 'foo bar'. When we - # encounter one of these attributes, we will parse its value into - # a list of values if possible. Upon output, the list will be - # converted back into a string. - cdata_list_attributes = { - "*" : ['class', 'accesskey', 'dropzone'], - "a" : ['rel', 'rev'], - "link" : ['rel', 'rev'], - "td" : ["headers"], - "th" : ["headers"], - "td" : ["headers"], - "form" : ["accept-charset"], - "object" : ["archive"], - - # These are HTML5 specific, as are *.accesskey and *.dropzone above. - "area" : ["rel"], - "icon" : ["sizes"], - "iframe" : ["sandbox"], - "output" : ["for"], - } - - def set_up_substitutions(self, tag): - # We are only interested in tags - if tag.name != 'meta': - return False - - http_equiv = tag.get('http-equiv') - content = tag.get('content') - charset = tag.get('charset') - - # We are interested in tags that say what encoding the - # document was originally in. This means HTML 5-style - # tags that provide the "charset" attribute. It also means - # HTML 4-style tags that provide the "content" - # attribute and have "http-equiv" set to "content-type". - # - # In both cases we will replace the value of the appropriate - # attribute with a standin object that can take on any - # encoding. - meta_encoding = None - if charset is not None: - # HTML 5 style: - # - meta_encoding = charset - tag['charset'] = CharsetMetaAttributeValue(charset) - - elif (content is not None and http_equiv is not None - and http_equiv.lower() == 'content-type'): - # HTML 4 style: - # - tag['content'] = ContentMetaAttributeValue(content) - - return (meta_encoding is not None) - -def register_treebuilders_from(module): - """Copy TreeBuilders from the given module into this module.""" - # I'm fairly sure this is not the best way to do this. - this_module = sys.modules['bs4.builder'] - for name in module.__all__: - obj = getattr(module, name) - - if issubclass(obj, TreeBuilder): - setattr(this_module, name, obj) - this_module.__all__.append(name) - # Register the builder while we're at it. - this_module.builder_registry.register(obj) - -class ParserRejectedMarkup(Exception): - pass - -# Builders are registered in reverse order of priority, so that custom -# builder registrations will take precedence. In general, we want lxml -# to take precedence over html5lib, because it's faster. And we only -# want to use HTMLParser as a last result. -from . import _htmlparser -register_treebuilders_from(_htmlparser) -try: - from . import _html5lib - register_treebuilders_from(_html5lib) -except ImportError: - # They don't have html5lib installed. - pass -try: - from . import _lxml - register_treebuilders_from(_lxml) -except ImportError: - # They don't have lxml installed. - pass diff --git a/lib/bs4/builder/_html5lib.py b/lib/bs4/builder/_html5lib.py deleted file mode 100644 index ab5793c1..00000000 --- a/lib/bs4/builder/_html5lib.py +++ /dev/null @@ -1,329 +0,0 @@ -__all__ = [ - 'HTML5TreeBuilder', - ] - -from pdb import set_trace -import warnings -from bs4.builder import ( - PERMISSIVE, - HTML, - HTML_5, - HTMLTreeBuilder, - ) -from bs4.element import ( - NamespacedAttribute, - whitespace_re, -) -import html5lib -from html5lib.constants import namespaces -from bs4.element import ( - Comment, - Doctype, - NavigableString, - Tag, - ) - -class HTML5TreeBuilder(HTMLTreeBuilder): - """Use html5lib to build a tree.""" - - NAME = "html5lib" - - features = [NAME, PERMISSIVE, HTML_5, HTML] - - def prepare_markup(self, markup, user_specified_encoding, - document_declared_encoding=None, exclude_encodings=None): - # Store the user-specified encoding for use later on. - self.user_specified_encoding = user_specified_encoding - - # document_declared_encoding and exclude_encodings aren't used - # ATM because the html5lib TreeBuilder doesn't use - # UnicodeDammit. - if exclude_encodings: - warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.") - yield (markup, None, None, False) - - # These methods are defined by Beautiful Soup. - def feed(self, markup): - if self.soup.parse_only is not None: - warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.") - parser = html5lib.HTMLParser(tree=self.create_treebuilder) - doc = parser.parse(markup, encoding=self.user_specified_encoding) - - # Set the character encoding detected by the tokenizer. - if isinstance(markup, unicode): - # We need to special-case this because html5lib sets - # charEncoding to UTF-8 if it gets Unicode input. - doc.original_encoding = None - else: - doc.original_encoding = parser.tokenizer.stream.charEncoding[0] - - def create_treebuilder(self, namespaceHTMLElements): - self.underlying_builder = TreeBuilderForHtml5lib( - self.soup, namespaceHTMLElements) - return self.underlying_builder - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" - return u'%s' % fragment - - -class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder): - - def __init__(self, soup, namespaceHTMLElements): - self.soup = soup - super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements) - - def documentClass(self): - self.soup.reset() - return Element(self.soup, self.soup, None) - - def insertDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - - doctype = Doctype.for_name_and_ids(name, publicId, systemId) - self.soup.object_was_parsed(doctype) - - def elementClass(self, name, namespace): - tag = self.soup.new_tag(name, namespace) - return Element(tag, self.soup, namespace) - - def commentClass(self, data): - return TextNode(Comment(data), self.soup) - - def fragmentClass(self): - self.soup = BeautifulSoup("") - self.soup.name = "[document_fragment]" - return Element(self.soup, self.soup, None) - - def appendChild(self, node): - # XXX This code is not covered by the BS4 tests. - self.soup.append(node.element) - - def getDocument(self): - return self.soup - - def getFragment(self): - return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element - -class AttrList(object): - def __init__(self, element): - self.element = element - self.attrs = dict(self.element.attrs) - def __iter__(self): - return list(self.attrs.items()).__iter__() - def __setitem__(self, name, value): - # If this attribute is a multi-valued attribute for this element, - # turn its value into a list. - list_attr = HTML5TreeBuilder.cdata_list_attributes - if (name in list_attr['*'] - or (self.element.name in list_attr - and name in list_attr[self.element.name])): - value = whitespace_re.split(value) - self.element[name] = value - def items(self): - return list(self.attrs.items()) - def keys(self): - return list(self.attrs.keys()) - def __len__(self): - return len(self.attrs) - def __getitem__(self, name): - return self.attrs[name] - def __contains__(self, name): - return name in list(self.attrs.keys()) - - -class Element(html5lib.treebuilders._base.Node): - def __init__(self, element, soup, namespace): - html5lib.treebuilders._base.Node.__init__(self, element.name) - self.element = element - self.soup = soup - self.namespace = namespace - - def appendChild(self, node): - string_child = child = None - if isinstance(node, basestring): - # Some other piece of code decided to pass in a string - # instead of creating a TextElement object to contain the - # string. - string_child = child = node - elif isinstance(node, Tag): - # Some other piece of code decided to pass in a Tag - # instead of creating an Element object to contain the - # Tag. - child = node - elif node.element.__class__ == NavigableString: - string_child = child = node.element - else: - child = node.element - - if not isinstance(child, basestring) and child.parent is not None: - node.element.extract() - - if (string_child and self.element.contents - and self.element.contents[-1].__class__ == NavigableString): - # We are appending a string onto another string. - # TODO This has O(n^2) performance, for input like - # "aaa..." - old_element = self.element.contents[-1] - new_element = self.soup.new_string(old_element + string_child) - old_element.replace_with(new_element) - self.soup._most_recent_element = new_element - else: - if isinstance(node, basestring): - # Create a brand new NavigableString from this string. - child = self.soup.new_string(node) - - # Tell Beautiful Soup to act as if it parsed this element - # immediately after the parent's last descendant. (Or - # immediately after the parent, if it has no children.) - if self.element.contents: - most_recent_element = self.element._last_descendant(False) - elif self.element.next_element is not None: - # Something from further ahead in the parse tree is - # being inserted into this earlier element. This is - # very annoying because it means an expensive search - # for the last element in the tree. - most_recent_element = self.soup._last_descendant() - else: - most_recent_element = self.element - - self.soup.object_was_parsed( - child, parent=self.element, - most_recent_element=most_recent_element) - - def getAttributes(self): - return AttrList(self.element) - - def setAttributes(self, attributes): - - if attributes is not None and len(attributes) > 0: - - converted_attributes = [] - for name, value in list(attributes.items()): - if isinstance(name, tuple): - new_name = NamespacedAttribute(*name) - del attributes[name] - attributes[new_name] = value - - self.soup.builder._replace_cdata_list_attribute_values( - self.name, attributes) - for name, value in attributes.items(): - self.element[name] = value - - # The attributes may contain variables that need substitution. - # Call set_up_substitutions manually. - # - # The Tag constructor called this method when the Tag was created, - # but we just set/changed the attributes, so call it again. - self.soup.builder.set_up_substitutions(self.element) - attributes = property(getAttributes, setAttributes) - - def insertText(self, data, insertBefore=None): - if insertBefore: - text = TextNode(self.soup.new_string(data), self.soup) - self.insertBefore(data, insertBefore) - else: - self.appendChild(data) - - def insertBefore(self, node, refNode): - index = self.element.index(refNode.element) - if (node.element.__class__ == NavigableString and self.element.contents - and self.element.contents[index-1].__class__ == NavigableString): - # (See comments in appendChild) - old_node = self.element.contents[index-1] - new_str = self.soup.new_string(old_node + node.element) - old_node.replace_with(new_str) - else: - self.element.insert(index, node.element) - node.parent = self - - def removeChild(self, node): - node.element.extract() - - def reparentChildren(self, new_parent): - """Move all of this tag's children into another tag.""" - # print "MOVE", self.element.contents - # print "FROM", self.element - # print "TO", new_parent.element - element = self.element - new_parent_element = new_parent.element - # Determine what this tag's next_element will be once all the children - # are removed. - final_next_element = element.next_sibling - - new_parents_last_descendant = new_parent_element._last_descendant(False, False) - if len(new_parent_element.contents) > 0: - # The new parent already contains children. We will be - # appending this tag's children to the end. - new_parents_last_child = new_parent_element.contents[-1] - new_parents_last_descendant_next_element = new_parents_last_descendant.next_element - else: - # The new parent contains no children. - new_parents_last_child = None - new_parents_last_descendant_next_element = new_parent_element.next_element - - to_append = element.contents - append_after = new_parent_element.contents - if len(to_append) > 0: - # Set the first child's previous_element and previous_sibling - # to elements within the new parent - first_child = to_append[0] - if new_parents_last_descendant: - first_child.previous_element = new_parents_last_descendant - else: - first_child.previous_element = new_parent_element - first_child.previous_sibling = new_parents_last_child - if new_parents_last_descendant: - new_parents_last_descendant.next_element = first_child - else: - new_parent_element.next_element = first_child - if new_parents_last_child: - new_parents_last_child.next_sibling = first_child - - # Fix the last child's next_element and next_sibling - last_child = to_append[-1] - last_child.next_element = new_parents_last_descendant_next_element - if new_parents_last_descendant_next_element: - new_parents_last_descendant_next_element.previous_element = last_child - last_child.next_sibling = None - - for child in to_append: - child.parent = new_parent_element - new_parent_element.contents.append(child) - - # Now that this element has no children, change its .next_element. - element.contents = [] - element.next_element = final_next_element - - # print "DONE WITH MOVE" - # print "FROM", self.element - # print "TO", new_parent_element - - def cloneNode(self): - tag = self.soup.new_tag(self.element.name, self.namespace) - node = Element(tag, self.soup, self.namespace) - for key,value in self.attributes: - node.attributes[key] = value - return node - - def hasContent(self): - return self.element.contents - - def getNameTuple(self): - if self.namespace == None: - return namespaces["html"], self.name - else: - return self.namespace, self.name - - nameTuple = property(getNameTuple) - -class TextNode(Element): - def __init__(self, element, soup): - html5lib.treebuilders._base.Node.__init__(self, None) - self.element = element - self.soup = soup - - def cloneNode(self): - raise NotImplementedError diff --git a/lib/bs4/builder/_htmlparser.py b/lib/bs4/builder/_htmlparser.py deleted file mode 100644 index 0101d647..00000000 --- a/lib/bs4/builder/_htmlparser.py +++ /dev/null @@ -1,262 +0,0 @@ -"""Use the HTMLParser library to parse HTML files that aren't too bad.""" - -__all__ = [ - 'HTMLParserTreeBuilder', - ] - -from HTMLParser import HTMLParser - -try: - from HTMLParser import HTMLParseError -except ImportError, e: - # HTMLParseError is removed in Python 3.5. Since it can never be - # thrown in 3.5, we can just define our own class as a placeholder. - class HTMLParseError(Exception): - pass - -import sys -import warnings - -# Starting in Python 3.2, the HTMLParser constructor takes a 'strict' -# argument, which we'd like to set to False. Unfortunately, -# http://bugs.python.org/issue13273 makes strict=True a better bet -# before Python 3.2.3. -# -# At the end of this file, we monkeypatch HTMLParser so that -# strict=True works well on Python 3.2.2. -major, minor, release = sys.version_info[:3] -CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3 -CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3 -CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4 - - -from bs4.element import ( - CData, - Comment, - Declaration, - Doctype, - ProcessingInstruction, - ) -from bs4.dammit import EntitySubstitution, UnicodeDammit - -from bs4.builder import ( - HTML, - HTMLTreeBuilder, - STRICT, - ) - - -HTMLPARSER = 'html.parser' - -class BeautifulSoupHTMLParser(HTMLParser): - def handle_starttag(self, name, attrs): - # XXX namespace - attr_dict = {} - for key, value in attrs: - # Change None attribute values to the empty string - # for consistency with the other tree builders. - if value is None: - value = '' - attr_dict[key] = value - attrvalue = '""' - self.soup.handle_starttag(name, None, None, attr_dict) - - def handle_endtag(self, name): - self.soup.handle_endtag(name) - - def handle_data(self, data): - self.soup.handle_data(data) - - def handle_charref(self, name): - # XXX workaround for a bug in HTMLParser. Remove this once - # it's fixed in all supported versions. - # http://bugs.python.org/issue13633 - if name.startswith('x'): - real_name = int(name.lstrip('x'), 16) - elif name.startswith('X'): - real_name = int(name.lstrip('X'), 16) - else: - real_name = int(name) - - try: - data = unichr(real_name) - except (ValueError, OverflowError), e: - data = u"\N{REPLACEMENT CHARACTER}" - - self.handle_data(data) - - def handle_entityref(self, name): - character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name) - if character is not None: - data = character - else: - data = "&%s;" % name - self.handle_data(data) - - def handle_comment(self, data): - self.soup.endData() - self.soup.handle_data(data) - self.soup.endData(Comment) - - def handle_decl(self, data): - self.soup.endData() - if data.startswith("DOCTYPE "): - data = data[len("DOCTYPE "):] - elif data == 'DOCTYPE': - # i.e. "" - data = '' - self.soup.handle_data(data) - self.soup.endData(Doctype) - - def unknown_decl(self, data): - if data.upper().startswith('CDATA['): - cls = CData - data = data[len('CDATA['):] - else: - cls = Declaration - self.soup.endData() - self.soup.handle_data(data) - self.soup.endData(cls) - - def handle_pi(self, data): - self.soup.endData() - self.soup.handle_data(data) - self.soup.endData(ProcessingInstruction) - - -class HTMLParserTreeBuilder(HTMLTreeBuilder): - - is_xml = False - picklable = True - NAME = HTMLPARSER - features = [NAME, HTML, STRICT] - - def __init__(self, *args, **kwargs): - if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED: - kwargs['strict'] = False - if CONSTRUCTOR_TAKES_CONVERT_CHARREFS: - kwargs['convert_charrefs'] = False - self.parser_args = (args, kwargs) - - def prepare_markup(self, markup, user_specified_encoding=None, - document_declared_encoding=None, exclude_encodings=None): - """ - :return: A 4-tuple (markup, original encoding, encoding - declared within markup, whether any characters had to be - replaced with REPLACEMENT CHARACTER). - """ - if isinstance(markup, unicode): - yield (markup, None, None, False) - return - - try_encodings = [user_specified_encoding, document_declared_encoding] - dammit = UnicodeDammit(markup, try_encodings, is_html=True, - exclude_encodings=exclude_encodings) - yield (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding, - dammit.contains_replacement_characters) - - def feed(self, markup): - args, kwargs = self.parser_args - parser = BeautifulSoupHTMLParser(*args, **kwargs) - parser.soup = self.soup - try: - parser.feed(markup) - except HTMLParseError, e: - warnings.warn(RuntimeWarning( - "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) - raise e - -# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some -# 3.2.3 code. This ensures they don't treat markup like

as a -# string. -# -# XXX This code can be removed once most Python 3 users are on 3.2.3. -if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT: - import re - attrfind_tolerant = re.compile( - r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?') - HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant - - locatestarttagend = re.compile(r""" - <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name - (?:\s+ # whitespace before attribute name - (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name - (?:\s*=\s* # value indicator - (?:'[^']*' # LITA-enclosed value - |\"[^\"]*\" # LIT-enclosed value - |[^'\">\s]+ # bare value - ) - )? - ) - )* - \s* # trailing whitespace -""", re.VERBOSE) - BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend - - from html.parser import tagfind, attrfind - - def parse_starttag(self, i): - self.__starttag_text = None - endpos = self.check_for_whole_start_tag(i) - if endpos < 0: - return endpos - rawdata = self.rawdata - self.__starttag_text = rawdata[i:endpos] - - # Now parse the data between i+1 and j into a tag and attrs - attrs = [] - match = tagfind.match(rawdata, i+1) - assert match, 'unexpected call to parse_starttag()' - k = match.end() - self.lasttag = tag = rawdata[i+1:k].lower() - while k < endpos: - if self.strict: - m = attrfind.match(rawdata, k) - else: - m = attrfind_tolerant.match(rawdata, k) - if not m: - break - attrname, rest, attrvalue = m.group(1, 2, 3) - if not rest: - attrvalue = None - elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ - attrvalue[:1] == '"' == attrvalue[-1:]: - attrvalue = attrvalue[1:-1] - if attrvalue: - attrvalue = self.unescape(attrvalue) - attrs.append((attrname.lower(), attrvalue)) - k = m.end() - - end = rawdata[k:endpos].strip() - if end not in (">", "/>"): - lineno, offset = self.getpos() - if "\n" in self.__starttag_text: - lineno = lineno + self.__starttag_text.count("\n") - offset = len(self.__starttag_text) \ - - self.__starttag_text.rfind("\n") - else: - offset = offset + len(self.__starttag_text) - if self.strict: - self.error("junk characters in start tag: %r" - % (rawdata[k:endpos][:20],)) - self.handle_data(rawdata[i:endpos]) - return endpos - if end.endswith('/>'): - # XHTML-style empty tag: - self.handle_startendtag(tag, attrs) - else: - self.handle_starttag(tag, attrs) - if tag in self.CDATA_CONTENT_ELEMENTS: - self.set_cdata_mode(tag) - return endpos - - def set_cdata_mode(self, elem): - self.cdata_elem = elem.lower() - self.interesting = re.compile(r'' % self.cdata_elem, re.I) - - BeautifulSoupHTMLParser.parse_starttag = parse_starttag - BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode - - CONSTRUCTOR_TAKES_STRICT = True diff --git a/lib/bs4/builder/_lxml.py b/lib/bs4/builder/_lxml.py deleted file mode 100644 index 9e8f88fb..00000000 --- a/lib/bs4/builder/_lxml.py +++ /dev/null @@ -1,248 +0,0 @@ -__all__ = [ - 'LXMLTreeBuilderForXML', - 'LXMLTreeBuilder', - ] - -from io import BytesIO -from StringIO import StringIO -import collections -from lxml import etree -from bs4.element import ( - Comment, - Doctype, - NamespacedAttribute, - ProcessingInstruction, -) -from bs4.builder import ( - FAST, - HTML, - HTMLTreeBuilder, - PERMISSIVE, - ParserRejectedMarkup, - TreeBuilder, - XML) -from bs4.dammit import EncodingDetector - -LXML = 'lxml' - -class LXMLTreeBuilderForXML(TreeBuilder): - DEFAULT_PARSER_CLASS = etree.XMLParser - - is_xml = True - - NAME = "lxml-xml" - ALTERNATE_NAMES = ["xml"] - - # Well, it's permissive by XML parser standards. - features = [NAME, LXML, XML, FAST, PERMISSIVE] - - CHUNK_SIZE = 512 - - # This namespace mapping is specified in the XML Namespace - # standard. - DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"} - - def default_parser(self, encoding): - # This can either return a parser object or a class, which - # will be instantiated with default arguments. - if self._default_parser is not None: - return self._default_parser - return etree.XMLParser( - target=self, strip_cdata=False, recover=True, encoding=encoding) - - def parser_for(self, encoding): - # Use the default parser. - parser = self.default_parser(encoding) - - if isinstance(parser, collections.Callable): - # Instantiate the parser with default arguments - parser = parser(target=self, strip_cdata=False, encoding=encoding) - return parser - - def __init__(self, parser=None, empty_element_tags=None): - # TODO: Issue a warning if parser is present but not a - # callable, since that means there's no way to create new - # parsers for different encodings. - self._default_parser = parser - if empty_element_tags is not None: - self.empty_element_tags = set(empty_element_tags) - self.soup = None - self.nsmaps = [self.DEFAULT_NSMAPS] - - def _getNsTag(self, tag): - # Split the namespace URL out of a fully-qualified lxml tag - # name. Copied from lxml's src/lxml/sax.py. - if tag[0] == '{': - return tuple(tag[1:].split('}', 1)) - else: - return (None, tag) - - def prepare_markup(self, markup, user_specified_encoding=None, - exclude_encodings=None, - document_declared_encoding=None): - """ - :yield: A series of 4-tuples. - (markup, encoding, declared encoding, - has undergone character replacement) - - Each 4-tuple represents a strategy for parsing the document. - """ - if isinstance(markup, unicode): - # We were given Unicode. Maybe lxml can parse Unicode on - # this system? - yield markup, None, document_declared_encoding, False - - if isinstance(markup, unicode): - # No, apparently not. Convert the Unicode to UTF-8 and - # tell lxml to parse it as UTF-8. - yield (markup.encode("utf8"), "utf8", - document_declared_encoding, False) - - # Instead of using UnicodeDammit to convert the bytestring to - # Unicode using different encodings, use EncodingDetector to - # iterate over the encodings, and tell lxml to try to parse - # the document as each one in turn. - is_html = not self.is_xml - try_encodings = [user_specified_encoding, document_declared_encoding] - detector = EncodingDetector( - markup, try_encodings, is_html, exclude_encodings) - for encoding in detector.encodings: - yield (detector.markup, encoding, document_declared_encoding, False) - - def feed(self, markup): - if isinstance(markup, bytes): - markup = BytesIO(markup) - elif isinstance(markup, unicode): - markup = StringIO(markup) - - # Call feed() at least once, even if the markup is empty, - # or the parser won't be initialized. - data = markup.read(self.CHUNK_SIZE) - try: - self.parser = self.parser_for(self.soup.original_encoding) - self.parser.feed(data) - while len(data) != 0: - # Now call feed() on the rest of the data, chunk by chunk. - data = markup.read(self.CHUNK_SIZE) - if len(data) != 0: - self.parser.feed(data) - self.parser.close() - except (UnicodeDecodeError, LookupError, etree.ParserError), e: - raise ParserRejectedMarkup(str(e)) - - def close(self): - self.nsmaps = [self.DEFAULT_NSMAPS] - - def start(self, name, attrs, nsmap={}): - # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy. - attrs = dict(attrs) - nsprefix = None - # Invert each namespace map as it comes in. - if len(self.nsmaps) > 1: - # There are no new namespaces for this tag, but - # non-default namespaces are in play, so we need a - # separate tag stack to know when they end. - self.nsmaps.append(None) - elif len(nsmap) > 0: - # A new namespace mapping has come into play. - inverted_nsmap = dict((value, key) for key, value in nsmap.items()) - self.nsmaps.append(inverted_nsmap) - # Also treat the namespace mapping as a set of attributes on the - # tag, so we can recreate it later. - attrs = attrs.copy() - for prefix, namespace in nsmap.items(): - attribute = NamespacedAttribute( - "xmlns", prefix, "http://www.w3.org/2000/xmlns/") - attrs[attribute] = namespace - - # Namespaces are in play. Find any attributes that came in - # from lxml with namespaces attached to their names, and - # turn then into NamespacedAttribute objects. - new_attrs = {} - for attr, value in attrs.items(): - namespace, attr = self._getNsTag(attr) - if namespace is None: - new_attrs[attr] = value - else: - nsprefix = self._prefix_for_namespace(namespace) - attr = NamespacedAttribute(nsprefix, attr, namespace) - new_attrs[attr] = value - attrs = new_attrs - - namespace, name = self._getNsTag(name) - nsprefix = self._prefix_for_namespace(namespace) - self.soup.handle_starttag(name, namespace, nsprefix, attrs) - - def _prefix_for_namespace(self, namespace): - """Find the currently active prefix for the given namespace.""" - if namespace is None: - return None - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - return inverted_nsmap[namespace] - return None - - def end(self, name): - self.soup.endData() - completed_tag = self.soup.tagStack[-1] - namespace, name = self._getNsTag(name) - nsprefix = None - if namespace is not None: - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - nsprefix = inverted_nsmap[namespace] - break - self.soup.handle_endtag(name, nsprefix) - if len(self.nsmaps) > 1: - # This tag, or one of its parents, introduced a namespace - # mapping, so pop it off the stack. - self.nsmaps.pop() - - def pi(self, target, data): - self.soup.endData() - self.soup.handle_data(target + ' ' + data) - self.soup.endData(ProcessingInstruction) - - def data(self, content): - self.soup.handle_data(content) - - def doctype(self, name, pubid, system): - self.soup.endData() - doctype = Doctype.for_name_and_ids(name, pubid, system) - self.soup.object_was_parsed(doctype) - - def comment(self, content): - "Handle comments as Comment objects." - self.soup.endData() - self.soup.handle_data(content) - self.soup.endData(Comment) - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" - return u'\n%s' % fragment - - -class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): - - NAME = LXML - ALTERNATE_NAMES = ["lxml-html"] - - features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE] - is_xml = False - - def default_parser(self, encoding): - return etree.HTMLParser - - def feed(self, markup): - encoding = self.soup.original_encoding - try: - self.parser = self.parser_for(encoding) - self.parser.feed(markup) - self.parser.close() - except (UnicodeDecodeError, LookupError, etree.ParserError), e: - raise ParserRejectedMarkup(str(e)) - - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" - return u'%s' % fragment diff --git a/lib/bs4/dammit.py b/lib/bs4/dammit.py deleted file mode 100644 index 317ad6d7..00000000 --- a/lib/bs4/dammit.py +++ /dev/null @@ -1,839 +0,0 @@ -# -*- coding: utf-8 -*- -"""Beautiful Soup bonus library: Unicode, Dammit - -This library converts a bytestream to Unicode through any means -necessary. It is heavily based on code from Mark Pilgrim's Universal -Feed Parser. It works best on XML and HTML, but it does not rewrite the -XML or HTML to reflect a new encoding; that's the tree builder's job. -""" - -from pdb import set_trace -import codecs -from htmlentitydefs import codepoint2name -import re -import logging -import string - -# Import a library to autodetect character encodings. -chardet_type = None -try: - # First try the fast C implementation. - # PyPI package: cchardet - import cchardet - def chardet_dammit(s): - return cchardet.detect(s)['encoding'] -except ImportError: - try: - # Fall back to the pure Python implementation - # Debian package: python-chardet - # PyPI package: chardet - import chardet - def chardet_dammit(s): - return chardet.detect(s)['encoding'] - #import chardet.constants - #chardet.constants._debug = 1 - except ImportError: - # No chardet available. - def chardet_dammit(s): - return None - -# Available from http://cjkpython.i18n.org/. -try: - import iconv_codec -except ImportError: - pass - -xml_encoding_re = re.compile( - '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I) -html_meta_re = re.compile( - '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I) - -class EntitySubstitution(object): - - """Substitute XML or HTML entities for the corresponding characters.""" - - def _populate_class_variables(): - lookup = {} - reverse_lookup = {} - characters_for_re = [] - for codepoint, name in list(codepoint2name.items()): - character = unichr(codepoint) - if codepoint != 34: - # There's no point in turning the quotation mark into - # ", unless it happens within an attribute value, which - # is handled elsewhere. - characters_for_re.append(character) - lookup[character] = name - # But we do want to turn " into the quotation mark. - reverse_lookup[name] = character - re_definition = "[%s]" % "".join(characters_for_re) - return lookup, reverse_lookup, re.compile(re_definition) - (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER, - CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables() - - CHARACTER_TO_XML_ENTITY = { - "'": "apos", - '"': "quot", - "&": "amp", - "<": "lt", - ">": "gt", - } - - BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" - "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" - ")") - - AMPERSAND_OR_BRACKET = re.compile("([<>&])") - - @classmethod - def _substitute_html_entity(cls, matchobj): - entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0)) - return "&%s;" % entity - - @classmethod - def _substitute_xml_entity(cls, matchobj): - """Used with a regular expression to substitute the - appropriate XML entity for an XML special character.""" - entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)] - return "&%s;" % entity - - @classmethod - def quoted_attribute_value(self, value): - """Make a value into a quoted XML attribute, possibly escaping it. - - Most strings will be quoted using double quotes. - - Bob's Bar -> "Bob's Bar" - - If a string contains double quotes, it will be quoted using - single quotes. - - Welcome to "my bar" -> 'Welcome to "my bar"' - - If a string contains both single and double quotes, the - double quotes will be escaped, and the string will be quoted - using double quotes. - - Welcome to "Bob's Bar" -> "Welcome to "Bob's bar" - """ - quote_with = '"' - if '"' in value: - if "'" in value: - # The string contains both single and double - # quotes. Turn the double quotes into - # entities. We quote the double quotes rather than - # the single quotes because the entity name is - # """ whether this is HTML or XML. If we - # quoted the single quotes, we'd have to decide - # between ' and &squot;. - replace_with = """ - value = value.replace('"', replace_with) - else: - # There are double quotes but no single quotes. - # We can use single quotes to quote the attribute. - quote_with = "'" - return quote_with + value + quote_with - - @classmethod - def substitute_xml(cls, value, make_quoted_attribute=False): - """Substitute XML entities for special XML characters. - - :param value: A string to be substituted. The less-than sign - will become <, the greater-than sign will become >, - and any ampersands will become &. If you want ampersands - that appear to be part of an entity definition to be left - alone, use substitute_xml_containing_entities() instead. - - :param make_quoted_attribute: If True, then the string will be - quoted, as befits an attribute value. - """ - # Escape angle brackets and ampersands. - value = cls.AMPERSAND_OR_BRACKET.sub( - cls._substitute_xml_entity, value) - - if make_quoted_attribute: - value = cls.quoted_attribute_value(value) - return value - - @classmethod - def substitute_xml_containing_entities( - cls, value, make_quoted_attribute=False): - """Substitute XML entities for special XML characters. - - :param value: A string to be substituted. The less-than sign will - become <, the greater-than sign will become >, and any - ampersands that are not part of an entity defition will - become &. - - :param make_quoted_attribute: If True, then the string will be - quoted, as befits an attribute value. - """ - # Escape angle brackets, and ampersands that aren't part of - # entities. - value = cls.BARE_AMPERSAND_OR_BRACKET.sub( - cls._substitute_xml_entity, value) - - if make_quoted_attribute: - value = cls.quoted_attribute_value(value) - return value - - @classmethod - def substitute_html(cls, s): - """Replace certain Unicode characters with named HTML entities. - - This differs from data.encode(encoding, 'xmlcharrefreplace') - in that the goal is to make the result more readable (to those - with ASCII displays) rather than to recover from - errors. There's absolutely nothing wrong with a UTF-8 string - containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that - character with "é" will make it more readable to some - people. - """ - return cls.CHARACTER_TO_HTML_ENTITY_RE.sub( - cls._substitute_html_entity, s) - - -class EncodingDetector: - """Suggests a number of possible encodings for a bytestring. - - Order of precedence: - - 1. Encodings you specifically tell EncodingDetector to try first - (the override_encodings argument to the constructor). - - 2. An encoding declared within the bytestring itself, either in an - XML declaration (if the bytestring is to be interpreted as an XML - document), or in a tag (if the bytestring is to be - interpreted as an HTML document.) - - 3. An encoding detected through textual analysis by chardet, - cchardet, or a similar external library. - - 4. UTF-8. - - 5. Windows-1252. - """ - def __init__(self, markup, override_encodings=None, is_html=False, - exclude_encodings=None): - self.override_encodings = override_encodings or [] - exclude_encodings = exclude_encodings or [] - self.exclude_encodings = set([x.lower() for x in exclude_encodings]) - self.chardet_encoding = None - self.is_html = is_html - self.declared_encoding = None - - # First order of business: strip a byte-order mark. - self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup) - - def _usable(self, encoding, tried): - if encoding is not None: - encoding = encoding.lower() - if encoding in self.exclude_encodings: - return False - if encoding not in tried: - tried.add(encoding) - return True - return False - - @property - def encodings(self): - """Yield a number of encodings that might work for this markup.""" - tried = set() - for e in self.override_encodings: - if self._usable(e, tried): - yield e - - # Did the document originally start with a byte-order mark - # that indicated its encoding? - if self._usable(self.sniffed_encoding, tried): - yield self.sniffed_encoding - - # Look within the document for an XML or HTML encoding - # declaration. - if self.declared_encoding is None: - self.declared_encoding = self.find_declared_encoding( - self.markup, self.is_html) - if self._usable(self.declared_encoding, tried): - yield self.declared_encoding - - # Use third-party character set detection to guess at the - # encoding. - if self.chardet_encoding is None: - self.chardet_encoding = chardet_dammit(self.markup) - if self._usable(self.chardet_encoding, tried): - yield self.chardet_encoding - - # As a last-ditch effort, try utf-8 and windows-1252. - for e in ('utf-8', 'windows-1252'): - if self._usable(e, tried): - yield e - - @classmethod - def strip_byte_order_mark(cls, data): - """If a byte-order mark is present, strip it and return the encoding it implies.""" - encoding = None - if isinstance(data, unicode): - # Unicode data cannot have a byte-order mark. - return data, encoding - if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16be' - data = data[2:] - elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16le' - data = data[2:] - elif data[:3] == b'\xef\xbb\xbf': - encoding = 'utf-8' - data = data[3:] - elif data[:4] == b'\x00\x00\xfe\xff': - encoding = 'utf-32be' - data = data[4:] - elif data[:4] == b'\xff\xfe\x00\x00': - encoding = 'utf-32le' - data = data[4:] - return data, encoding - - @classmethod - def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False): - """Given a document, tries to find its declared encoding. - - An XML encoding is declared at the beginning of the document. - - An HTML encoding is declared in a tag, hopefully near the - beginning of the document. - """ - if search_entire_document: - xml_endpos = html_endpos = len(markup) - else: - xml_endpos = 1024 - html_endpos = max(2048, int(len(markup) * 0.05)) - - declared_encoding = None - declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos) - if not declared_encoding_match and is_html: - declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos) - if declared_encoding_match is not None: - declared_encoding = declared_encoding_match.groups()[0].decode( - 'ascii', 'replace') - if declared_encoding: - return declared_encoding.lower() - return None - -class UnicodeDammit: - """A class for detecting the encoding of a *ML document and - converting it to a Unicode string. If the source encoding is - windows-1252, can replace MS smart quotes with their HTML or XML - equivalents.""" - - # This dictionary maps commonly seen values for "charset" in HTML - # meta tags to the corresponding Python codec names. It only covers - # values that aren't in Python's aliases and can't be determined - # by the heuristics in find_codec. - CHARSET_ALIASES = {"macintosh": "mac-roman", - "x-sjis": "shift-jis"} - - ENCODINGS_WITH_SMART_QUOTES = [ - "windows-1252", - "iso-8859-1", - "iso-8859-2", - ] - - def __init__(self, markup, override_encodings=[], - smart_quotes_to=None, is_html=False, exclude_encodings=[]): - self.smart_quotes_to = smart_quotes_to - self.tried_encodings = [] - self.contains_replacement_characters = False - self.is_html = is_html - - self.detector = EncodingDetector( - markup, override_encodings, is_html, exclude_encodings) - - # Short-circuit if the data is in Unicode to begin with. - if isinstance(markup, unicode) or markup == '': - self.markup = markup - self.unicode_markup = unicode(markup) - self.original_encoding = None - return - - # The encoding detector may have stripped a byte-order mark. - # Use the stripped markup from this point on. - self.markup = self.detector.markup - - u = None - for encoding in self.detector.encodings: - markup = self.detector.markup - u = self._convert_from(encoding) - if u is not None: - break - - if not u: - # None of the encodings worked. As an absolute last resort, - # try them again with character replacement. - - for encoding in self.detector.encodings: - if encoding != "ascii": - u = self._convert_from(encoding, "replace") - if u is not None: - logging.warning( - "Some characters could not be decoded, and were " - "replaced with REPLACEMENT CHARACTER.") - self.contains_replacement_characters = True - break - - # If none of that worked, we could at this point force it to - # ASCII, but that would destroy so much data that I think - # giving up is better. - self.unicode_markup = u - if not u: - self.original_encoding = None - - def _sub_ms_char(self, match): - """Changes a MS smart quote character to an XML or HTML - entity, or an ASCII character.""" - orig = match.group(1) - if self.smart_quotes_to == 'ascii': - sub = self.MS_CHARS_TO_ASCII.get(orig).encode() - else: - sub = self.MS_CHARS.get(orig) - if type(sub) == tuple: - if self.smart_quotes_to == 'xml': - sub = '&#x'.encode() + sub[1].encode() + ';'.encode() - else: - sub = '&'.encode() + sub[0].encode() + ';'.encode() - else: - sub = sub.encode() - return sub - - def _convert_from(self, proposed, errors="strict"): - proposed = self.find_codec(proposed) - if not proposed or (proposed, errors) in self.tried_encodings: - return None - self.tried_encodings.append((proposed, errors)) - markup = self.markup - # Convert smart quotes to HTML if coming from an encoding - # that might have them. - if (self.smart_quotes_to is not None - and proposed in self.ENCODINGS_WITH_SMART_QUOTES): - smart_quotes_re = b"([\x80-\x9f])" - smart_quotes_compiled = re.compile(smart_quotes_re) - markup = smart_quotes_compiled.sub(self._sub_ms_char, markup) - - try: - #print "Trying to convert document to %s (errors=%s)" % ( - # proposed, errors) - u = self._to_unicode(markup, proposed, errors) - self.markup = u - self.original_encoding = proposed - except Exception as e: - #print "That didn't work!" - #print e - return None - #print "Correct encoding: %s" % proposed - return self.markup - - def _to_unicode(self, data, encoding, errors="strict"): - '''Given a string and its encoding, decodes the string into Unicode. - %encoding is a string recognized by encodings.aliases''' - return unicode(data, encoding, errors) - - @property - def declared_html_encoding(self): - if not self.is_html: - return None - return self.detector.declared_encoding - - def find_codec(self, charset): - value = (self._codec(self.CHARSET_ALIASES.get(charset, charset)) - or (charset and self._codec(charset.replace("-", ""))) - or (charset and self._codec(charset.replace("-", "_"))) - or (charset and charset.lower()) - or charset - ) - if value: - return value.lower() - return None - - def _codec(self, charset): - if not charset: - return charset - codec = None - try: - codecs.lookup(charset) - codec = charset - except (LookupError, ValueError): - pass - return codec - - - # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities. - MS_CHARS = {b'\x80': ('euro', '20AC'), - b'\x81': ' ', - b'\x82': ('sbquo', '201A'), - b'\x83': ('fnof', '192'), - b'\x84': ('bdquo', '201E'), - b'\x85': ('hellip', '2026'), - b'\x86': ('dagger', '2020'), - b'\x87': ('Dagger', '2021'), - b'\x88': ('circ', '2C6'), - b'\x89': ('permil', '2030'), - b'\x8A': ('Scaron', '160'), - b'\x8B': ('lsaquo', '2039'), - b'\x8C': ('OElig', '152'), - b'\x8D': '?', - b'\x8E': ('#x17D', '17D'), - b'\x8F': '?', - b'\x90': '?', - b'\x91': ('lsquo', '2018'), - b'\x92': ('rsquo', '2019'), - b'\x93': ('ldquo', '201C'), - b'\x94': ('rdquo', '201D'), - b'\x95': ('bull', '2022'), - b'\x96': ('ndash', '2013'), - b'\x97': ('mdash', '2014'), - b'\x98': ('tilde', '2DC'), - b'\x99': ('trade', '2122'), - b'\x9a': ('scaron', '161'), - b'\x9b': ('rsaquo', '203A'), - b'\x9c': ('oelig', '153'), - b'\x9d': '?', - b'\x9e': ('#x17E', '17E'), - b'\x9f': ('Yuml', ''),} - - # A parochial partial mapping of ISO-Latin-1 to ASCII. Contains - # horrors like stripping diacritical marks to turn á into a, but also - # contains non-horrors like turning “ into ". - MS_CHARS_TO_ASCII = { - b'\x80' : 'EUR', - b'\x81' : ' ', - b'\x82' : ',', - b'\x83' : 'f', - b'\x84' : ',,', - b'\x85' : '...', - b'\x86' : '+', - b'\x87' : '++', - b'\x88' : '^', - b'\x89' : '%', - b'\x8a' : 'S', - b'\x8b' : '<', - b'\x8c' : 'OE', - b'\x8d' : '?', - b'\x8e' : 'Z', - b'\x8f' : '?', - b'\x90' : '?', - b'\x91' : "'", - b'\x92' : "'", - b'\x93' : '"', - b'\x94' : '"', - b'\x95' : '*', - b'\x96' : '-', - b'\x97' : '--', - b'\x98' : '~', - b'\x99' : '(TM)', - b'\x9a' : 's', - b'\x9b' : '>', - b'\x9c' : 'oe', - b'\x9d' : '?', - b'\x9e' : 'z', - b'\x9f' : 'Y', - b'\xa0' : ' ', - b'\xa1' : '!', - b'\xa2' : 'c', - b'\xa3' : 'GBP', - b'\xa4' : '$', #This approximation is especially parochial--this is the - #generic currency symbol. - b'\xa5' : 'YEN', - b'\xa6' : '|', - b'\xa7' : 'S', - b'\xa8' : '..', - b'\xa9' : '', - b'\xaa' : '(th)', - b'\xab' : '<<', - b'\xac' : '!', - b'\xad' : ' ', - b'\xae' : '(R)', - b'\xaf' : '-', - b'\xb0' : 'o', - b'\xb1' : '+-', - b'\xb2' : '2', - b'\xb3' : '3', - b'\xb4' : ("'", 'acute'), - b'\xb5' : 'u', - b'\xb6' : 'P', - b'\xb7' : '*', - b'\xb8' : ',', - b'\xb9' : '1', - b'\xba' : '(th)', - b'\xbb' : '>>', - b'\xbc' : '1/4', - b'\xbd' : '1/2', - b'\xbe' : '3/4', - b'\xbf' : '?', - b'\xc0' : 'A', - b'\xc1' : 'A', - b'\xc2' : 'A', - b'\xc3' : 'A', - b'\xc4' : 'A', - b'\xc5' : 'A', - b'\xc6' : 'AE', - b'\xc7' : 'C', - b'\xc8' : 'E', - b'\xc9' : 'E', - b'\xca' : 'E', - b'\xcb' : 'E', - b'\xcc' : 'I', - b'\xcd' : 'I', - b'\xce' : 'I', - b'\xcf' : 'I', - b'\xd0' : 'D', - b'\xd1' : 'N', - b'\xd2' : 'O', - b'\xd3' : 'O', - b'\xd4' : 'O', - b'\xd5' : 'O', - b'\xd6' : 'O', - b'\xd7' : '*', - b'\xd8' : 'O', - b'\xd9' : 'U', - b'\xda' : 'U', - b'\xdb' : 'U', - b'\xdc' : 'U', - b'\xdd' : 'Y', - b'\xde' : 'b', - b'\xdf' : 'B', - b'\xe0' : 'a', - b'\xe1' : 'a', - b'\xe2' : 'a', - b'\xe3' : 'a', - b'\xe4' : 'a', - b'\xe5' : 'a', - b'\xe6' : 'ae', - b'\xe7' : 'c', - b'\xe8' : 'e', - b'\xe9' : 'e', - b'\xea' : 'e', - b'\xeb' : 'e', - b'\xec' : 'i', - b'\xed' : 'i', - b'\xee' : 'i', - b'\xef' : 'i', - b'\xf0' : 'o', - b'\xf1' : 'n', - b'\xf2' : 'o', - b'\xf3' : 'o', - b'\xf4' : 'o', - b'\xf5' : 'o', - b'\xf6' : 'o', - b'\xf7' : '/', - b'\xf8' : 'o', - b'\xf9' : 'u', - b'\xfa' : 'u', - b'\xfb' : 'u', - b'\xfc' : 'u', - b'\xfd' : 'y', - b'\xfe' : 'b', - b'\xff' : 'y', - } - - # A map used when removing rogue Windows-1252/ISO-8859-1 - # characters in otherwise UTF-8 documents. - # - # Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in - # Windows-1252. - WINDOWS_1252_TO_UTF8 = { - 0x80 : b'\xe2\x82\xac', # € - 0x82 : b'\xe2\x80\x9a', # ‚ - 0x83 : b'\xc6\x92', # ƒ - 0x84 : b'\xe2\x80\x9e', # „ - 0x85 : b'\xe2\x80\xa6', # … - 0x86 : b'\xe2\x80\xa0', # † - 0x87 : b'\xe2\x80\xa1', # ‡ - 0x88 : b'\xcb\x86', # ˆ - 0x89 : b'\xe2\x80\xb0', # ‰ - 0x8a : b'\xc5\xa0', # Š - 0x8b : b'\xe2\x80\xb9', # ‹ - 0x8c : b'\xc5\x92', # Œ - 0x8e : b'\xc5\xbd', # Ž - 0x91 : b'\xe2\x80\x98', # ‘ - 0x92 : b'\xe2\x80\x99', # ’ - 0x93 : b'\xe2\x80\x9c', # “ - 0x94 : b'\xe2\x80\x9d', # ” - 0x95 : b'\xe2\x80\xa2', # • - 0x96 : b'\xe2\x80\x93', # – - 0x97 : b'\xe2\x80\x94', # — - 0x98 : b'\xcb\x9c', # ˜ - 0x99 : b'\xe2\x84\xa2', # ™ - 0x9a : b'\xc5\xa1', # š - 0x9b : b'\xe2\x80\xba', # › - 0x9c : b'\xc5\x93', # œ - 0x9e : b'\xc5\xbe', # ž - 0x9f : b'\xc5\xb8', # Ÿ - 0xa0 : b'\xc2\xa0', #   - 0xa1 : b'\xc2\xa1', # ¡ - 0xa2 : b'\xc2\xa2', # ¢ - 0xa3 : b'\xc2\xa3', # £ - 0xa4 : b'\xc2\xa4', # ¤ - 0xa5 : b'\xc2\xa5', # ¥ - 0xa6 : b'\xc2\xa6', # ¦ - 0xa7 : b'\xc2\xa7', # § - 0xa8 : b'\xc2\xa8', # ¨ - 0xa9 : b'\xc2\xa9', # © - 0xaa : b'\xc2\xaa', # ª - 0xab : b'\xc2\xab', # « - 0xac : b'\xc2\xac', # ¬ - 0xad : b'\xc2\xad', # ­ - 0xae : b'\xc2\xae', # ® - 0xaf : b'\xc2\xaf', # ¯ - 0xb0 : b'\xc2\xb0', # ° - 0xb1 : b'\xc2\xb1', # ± - 0xb2 : b'\xc2\xb2', # ² - 0xb3 : b'\xc2\xb3', # ³ - 0xb4 : b'\xc2\xb4', # ´ - 0xb5 : b'\xc2\xb5', # µ - 0xb6 : b'\xc2\xb6', # ¶ - 0xb7 : b'\xc2\xb7', # · - 0xb8 : b'\xc2\xb8', # ¸ - 0xb9 : b'\xc2\xb9', # ¹ - 0xba : b'\xc2\xba', # º - 0xbb : b'\xc2\xbb', # » - 0xbc : b'\xc2\xbc', # ¼ - 0xbd : b'\xc2\xbd', # ½ - 0xbe : b'\xc2\xbe', # ¾ - 0xbf : b'\xc2\xbf', # ¿ - 0xc0 : b'\xc3\x80', # À - 0xc1 : b'\xc3\x81', # Á - 0xc2 : b'\xc3\x82', #  - 0xc3 : b'\xc3\x83', # à - 0xc4 : b'\xc3\x84', # Ä - 0xc5 : b'\xc3\x85', # Å - 0xc6 : b'\xc3\x86', # Æ - 0xc7 : b'\xc3\x87', # Ç - 0xc8 : b'\xc3\x88', # È - 0xc9 : b'\xc3\x89', # É - 0xca : b'\xc3\x8a', # Ê - 0xcb : b'\xc3\x8b', # Ë - 0xcc : b'\xc3\x8c', # Ì - 0xcd : b'\xc3\x8d', # Í - 0xce : b'\xc3\x8e', # Î - 0xcf : b'\xc3\x8f', # Ï - 0xd0 : b'\xc3\x90', # Ð - 0xd1 : b'\xc3\x91', # Ñ - 0xd2 : b'\xc3\x92', # Ò - 0xd3 : b'\xc3\x93', # Ó - 0xd4 : b'\xc3\x94', # Ô - 0xd5 : b'\xc3\x95', # Õ - 0xd6 : b'\xc3\x96', # Ö - 0xd7 : b'\xc3\x97', # × - 0xd8 : b'\xc3\x98', # Ø - 0xd9 : b'\xc3\x99', # Ù - 0xda : b'\xc3\x9a', # Ú - 0xdb : b'\xc3\x9b', # Û - 0xdc : b'\xc3\x9c', # Ü - 0xdd : b'\xc3\x9d', # Ý - 0xde : b'\xc3\x9e', # Þ - 0xdf : b'\xc3\x9f', # ß - 0xe0 : b'\xc3\xa0', # à - 0xe1 : b'\xa1', # á - 0xe2 : b'\xc3\xa2', # â - 0xe3 : b'\xc3\xa3', # ã - 0xe4 : b'\xc3\xa4', # ä - 0xe5 : b'\xc3\xa5', # å - 0xe6 : b'\xc3\xa6', # æ - 0xe7 : b'\xc3\xa7', # ç - 0xe8 : b'\xc3\xa8', # è - 0xe9 : b'\xc3\xa9', # é - 0xea : b'\xc3\xaa', # ê - 0xeb : b'\xc3\xab', # ë - 0xec : b'\xc3\xac', # ì - 0xed : b'\xc3\xad', # í - 0xee : b'\xc3\xae', # î - 0xef : b'\xc3\xaf', # ï - 0xf0 : b'\xc3\xb0', # ð - 0xf1 : b'\xc3\xb1', # ñ - 0xf2 : b'\xc3\xb2', # ò - 0xf3 : b'\xc3\xb3', # ó - 0xf4 : b'\xc3\xb4', # ô - 0xf5 : b'\xc3\xb5', # õ - 0xf6 : b'\xc3\xb6', # ö - 0xf7 : b'\xc3\xb7', # ÷ - 0xf8 : b'\xc3\xb8', # ø - 0xf9 : b'\xc3\xb9', # ù - 0xfa : b'\xc3\xba', # ú - 0xfb : b'\xc3\xbb', # û - 0xfc : b'\xc3\xbc', # ü - 0xfd : b'\xc3\xbd', # ý - 0xfe : b'\xc3\xbe', # þ - } - - MULTIBYTE_MARKERS_AND_SIZES = [ - (0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF - (0xe0, 0xef, 3), # 3-byte characters start with E0-EF - (0xf0, 0xf4, 4), # 4-byte characters start with F0-F4 - ] - - FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0] - LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1] - - @classmethod - def detwingle(cls, in_bytes, main_encoding="utf8", - embedded_encoding="windows-1252"): - """Fix characters from one encoding embedded in some other encoding. - - Currently the only situation supported is Windows-1252 (or its - subset ISO-8859-1), embedded in UTF-8. - - The input must be a bytestring. If you've already converted - the document to Unicode, you're too late. - - The output is a bytestring in which `embedded_encoding` - characters have been converted to their `main_encoding` - equivalents. - """ - if embedded_encoding.replace('_', '-').lower() not in ( - 'windows-1252', 'windows_1252'): - raise NotImplementedError( - "Windows-1252 and ISO-8859-1 are the only currently supported " - "embedded encodings.") - - if main_encoding.lower() not in ('utf8', 'utf-8'): - raise NotImplementedError( - "UTF-8 is the only currently supported main encoding.") - - byte_chunks = [] - - chunk_start = 0 - pos = 0 - while pos < len(in_bytes): - byte = in_bytes[pos] - if not isinstance(byte, int): - # Python 2.x - byte = ord(byte) - if (byte >= cls.FIRST_MULTIBYTE_MARKER - and byte <= cls.LAST_MULTIBYTE_MARKER): - # This is the start of a UTF-8 multibyte character. Skip - # to the end. - for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES: - if byte >= start and byte <= end: - pos += size - break - elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8: - # We found a Windows-1252 character! - # Save the string up to this point as a chunk. - byte_chunks.append(in_bytes[chunk_start:pos]) - - # Now translate the Windows-1252 character into UTF-8 - # and add it as another, one-byte chunk. - byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte]) - pos += 1 - chunk_start = pos - else: - # Go on to the next character. - pos += 1 - if chunk_start == 0: - # The string is unchanged. - return in_bytes - else: - # Store the final chunk. - byte_chunks.append(in_bytes[chunk_start:]) - return b''.join(byte_chunks) - diff --git a/lib/bs4/diagnose.py b/lib/bs4/diagnose.py deleted file mode 100644 index 1b719830..00000000 --- a/lib/bs4/diagnose.py +++ /dev/null @@ -1,213 +0,0 @@ -"""Diagnostic functions, mainly for use when doing tech support.""" -import cProfile -from StringIO import StringIO -from HTMLParser import HTMLParser -import bs4 -from bs4 import BeautifulSoup, __version__ -from bs4.builder import builder_registry - -import os -import pstats -import random -import tempfile -import time -import traceback -import sys -import cProfile - -def diagnose(data): - """Diagnostic suite for isolating common problems.""" - print "Diagnostic running on Beautiful Soup %s" % __version__ - print "Python version %s" % sys.version - - basic_parsers = ["html.parser", "html5lib", "lxml"] - for name in basic_parsers: - for builder in builder_registry.builders: - if name in builder.features: - break - else: - basic_parsers.remove(name) - print ( - "I noticed that %s is not installed. Installing it may help." % - name) - - if 'lxml' in basic_parsers: - basic_parsers.append(["lxml", "xml"]) - try: - from lxml import etree - print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) - except ImportError, e: - print ( - "lxml is not installed or couldn't be imported.") - - - if 'html5lib' in basic_parsers: - try: - import html5lib - print "Found html5lib version %s" % html5lib.__version__ - except ImportError, e: - print ( - "html5lib is not installed or couldn't be imported.") - - if hasattr(data, 'read'): - data = data.read() - elif os.path.exists(data): - print '"%s" looks like a filename. Reading data from the file.' % data - data = open(data).read() - elif data.startswith("http:") or data.startswith("https:"): - print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data - print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." - return - print - - for parser in basic_parsers: - print "Trying to parse your markup with %s" % parser - success = False - try: - soup = BeautifulSoup(data, parser) - success = True - except Exception, e: - print "%s could not parse the markup." % parser - traceback.print_exc() - if success: - print "Here's what %s did with the markup:" % parser - print soup.prettify() - - print "-" * 80 - -def lxml_trace(data, html=True, **kwargs): - """Print out the lxml events that occur during parsing. - - This lets you see how lxml parses a document when no Beautiful - Soup code is running. - """ - from lxml import etree - for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): - print("%s, %4s, %s" % (event, element.tag, element.text)) - -class AnnouncingParser(HTMLParser): - """Announces HTMLParser parse events, without doing anything else.""" - - def _p(self, s): - print(s) - - def handle_starttag(self, name, attrs): - self._p("%s START" % name) - - def handle_endtag(self, name): - self._p("%s END" % name) - - def handle_data(self, data): - self._p("%s DATA" % data) - - def handle_charref(self, name): - self._p("%s CHARREF" % name) - - def handle_entityref(self, name): - self._p("%s ENTITYREF" % name) - - def handle_comment(self, data): - self._p("%s COMMENT" % data) - - def handle_decl(self, data): - self._p("%s DECL" % data) - - def unknown_decl(self, data): - self._p("%s UNKNOWN-DECL" % data) - - def handle_pi(self, data): - self._p("%s PI" % data) - -def htmlparser_trace(data): - """Print out the HTMLParser events that occur during parsing. - - This lets you see how HTMLParser parses a document when no - Beautiful Soup code is running. - """ - parser = AnnouncingParser() - parser.feed(data) - -_vowels = "aeiou" -_consonants = "bcdfghjklmnpqrstvwxyz" - -def rword(length=5): - "Generate a random word-like string." - s = '' - for i in range(length): - if i % 2 == 0: - t = _consonants - else: - t = _vowels - s += random.choice(t) - return s - -def rsentence(length=4): - "Generate a random sentence-like string." - return " ".join(rword(random.randint(4,9)) for i in range(length)) - -def rdoc(num_elements=1000): - """Randomly generate an invalid HTML document.""" - tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table'] - elements = [] - for i in range(num_elements): - choice = random.randint(0,3) - if choice == 0: - # New tag. - tag_name = random.choice(tag_names) - elements.append("<%s>" % tag_name) - elif choice == 1: - elements.append(rsentence(random.randint(1,4))) - elif choice == 2: - # Close a tag. - tag_name = random.choice(tag_names) - elements.append("" % tag_name) - return "" + "\n".join(elements) + "" - -def benchmark_parsers(num_elements=100000): - """Very basic head-to-head performance benchmark.""" - print "Comparative parser benchmark on Beautiful Soup %s" % __version__ - data = rdoc(num_elements) - print "Generated a large invalid HTML document (%d bytes)." % len(data) - - for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: - success = False - try: - a = time.time() - soup = BeautifulSoup(data, parser) - b = time.time() - success = True - except Exception, e: - print "%s could not parse the markup." % parser - traceback.print_exc() - if success: - print "BS4+%s parsed the markup in %.2fs." % (parser, b-a) - - from lxml import etree - a = time.time() - etree.HTML(data) - b = time.time() - print "Raw lxml parsed the markup in %.2fs." % (b-a) - - import html5lib - parser = html5lib.HTMLParser() - a = time.time() - parser.parse(data) - b = time.time() - print "Raw html5lib parsed the markup in %.2fs." % (b-a) - -def profile(num_elements=100000, parser="lxml"): - - filehandle = tempfile.NamedTemporaryFile() - filename = filehandle.name - - data = rdoc(num_elements) - vars = dict(bs4=bs4, data=data, parser=parser) - cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename) - - stats = pstats.Stats(filename) - # stats.strip_dirs() - stats.sort_stats("cumulative") - stats.print_stats('_html5lib|bs4', 50) - -if __name__ == '__main__': - diagnose(sys.stdin.read()) diff --git a/lib/bs4/element.py b/lib/bs4/element.py deleted file mode 100644 index c70ad5a0..00000000 --- a/lib/bs4/element.py +++ /dev/null @@ -1,1713 +0,0 @@ -from pdb import set_trace -import collections -import re -import sys -import warnings -from bs4.dammit import EntitySubstitution - -DEFAULT_OUTPUT_ENCODING = "utf-8" -PY3K = (sys.version_info[0] > 2) - -whitespace_re = re.compile("\s+") - -def _alias(attr): - """Alias one attribute name to another for backward compatibility""" - @property - def alias(self): - return getattr(self, attr) - - @alias.setter - def alias(self): - return setattr(self, attr) - return alias - - -class NamespacedAttribute(unicode): - - def __new__(cls, prefix, name, namespace=None): - if name is None: - obj = unicode.__new__(cls, prefix) - elif prefix is None: - # Not really namespaced. - obj = unicode.__new__(cls, name) - else: - obj = unicode.__new__(cls, prefix + ":" + name) - obj.prefix = prefix - obj.name = name - obj.namespace = namespace - return obj - -class AttributeValueWithCharsetSubstitution(unicode): - """A stand-in object for a character encoding specified in HTML.""" - -class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): - """A generic stand-in for the value of a meta tag's 'charset' attribute. - - When Beautiful Soup parses the markup '', the - value of the 'charset' attribute will be one of these objects. - """ - - def __new__(cls, original_value): - obj = unicode.__new__(cls, original_value) - obj.original_value = original_value - return obj - - def encode(self, encoding): - return encoding - - -class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): - """A generic stand-in for the value of a meta tag's 'content' attribute. - - When Beautiful Soup parses the markup: - - - The value of the 'content' attribute will be one of these objects. - """ - - CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) - - def __new__(cls, original_value): - match = cls.CHARSET_RE.search(original_value) - if match is None: - # No substitution necessary. - return unicode.__new__(unicode, original_value) - - obj = unicode.__new__(cls, original_value) - obj.original_value = original_value - return obj - - def encode(self, encoding): - def rewrite(match): - return match.group(1) + encoding - return self.CHARSET_RE.sub(rewrite, self.original_value) - -class HTMLAwareEntitySubstitution(EntitySubstitution): - - """Entity substitution rules that are aware of some HTML quirks. - - Specifically, the contents of - -Hello, world! - - -''' - soup = self.soup(html) - self.assertEqual("text/javascript", soup.find('script')['type']) - - def test_comment(self): - # Comments are represented as Comment objects. - markup = "

foobaz

" - self.assertSoupEquals(markup) - - soup = self.soup(markup) - comment = soup.find(text="foobar") - self.assertEqual(comment.__class__, Comment) - - # The comment is properly integrated into the tree. - foo = soup.find(text="foo") - self.assertEqual(comment, foo.next_element) - baz = soup.find(text="baz") - self.assertEqual(comment, baz.previous_element) - - def test_preserved_whitespace_in_pre_and_textarea(self): - """Whitespace must be preserved in
 and ")
-
-    def test_nested_inline_elements(self):
-        """Inline elements can be nested indefinitely."""
-        b_tag = "Inside a B tag"
-        self.assertSoupEquals(b_tag)
-
-        nested_b_tag = "

A nested tag

" - self.assertSoupEquals(nested_b_tag) - - double_nested_b_tag = "

A doubly nested tag

" - self.assertSoupEquals(nested_b_tag) - - def test_nested_block_level_elements(self): - """Block elements can be nested.""" - soup = self.soup('

Foo

') - blockquote = soup.blockquote - self.assertEqual(blockquote.p.b.string, 'Foo') - self.assertEqual(blockquote.b.string, 'Foo') - - def test_correctly_nested_tables(self): - """One table can go inside another one.""" - markup = ('' - '' - "') - - self.assertSoupEquals( - markup, - '
Here's another table:" - '' - '' - '
foo
Here\'s another table:' - '
foo
' - '
') - - self.assertSoupEquals( - "" - "" - "
Foo
Bar
Baz
") - - def test_deeply_nested_multivalued_attribute(self): - # html5lib can set the attributes of the same tag many times - # as it rearranges the tree. This has caused problems with - # multivalued attributes. - markup = '
' - soup = self.soup(markup) - self.assertEqual(["css"], soup.div.div['class']) - - def test_multivalued_attribute_on_html(self): - # html5lib uses a different API to set the attributes ot the - # tag. This has caused problems with multivalued - # attributes. - markup = '' - soup = self.soup(markup) - self.assertEqual(["a", "b"], soup.html['class']) - - def test_angle_brackets_in_attribute_values_are_escaped(self): - self.assertSoupEquals('', '') - - def test_entities_in_attributes_converted_to_unicode(self): - expect = u'

' - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - - def test_entities_in_text_converted_to_unicode(self): - expect = u'

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - - def test_quot_entity_converted_to_quotation_mark(self): - self.assertSoupEquals("

I said "good day!"

", - '

I said "good day!"

') - - def test_out_of_range_entity(self): - expect = u"\N{REPLACEMENT CHARACTER}" - self.assertSoupEquals("�", expect) - self.assertSoupEquals("�", expect) - self.assertSoupEquals("�", expect) - - def test_multipart_strings(self): - "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." - soup = self.soup("

\nfoo

") - self.assertEqual("p", soup.h2.string.next_element.name) - self.assertEqual("p", soup.p.name) - self.assertConnectedness(soup) - - def test_head_tag_between_head_and_body(self): - "Prevent recurrence of a bug in the html5lib treebuilder." - content = """ - - foo - -""" - soup = self.soup(content) - self.assertNotEqual(None, soup.html.body) - self.assertConnectedness(soup) - - def test_multiple_copies_of_a_tag(self): - "Prevent recurrence of a bug in the html5lib treebuilder." - content = """ - - - - - -""" - soup = self.soup(content) - self.assertConnectedness(soup.article) - - def test_basic_namespaces(self): - """Parsers don't need to *understand* namespaces, but at the - very least they should not choke on namespaces or lose - data.""" - - markup = b'4' - soup = self.soup(markup) - self.assertEqual(markup, soup.encode()) - html = soup.html - self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns']) - self.assertEqual( - 'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml']) - self.assertEqual( - 'http://www.w3.org/2000/svg', soup.html['xmlns:svg']) - - def test_multivalued_attribute_value_becomes_list(self): - markup = b'' - soup = self.soup(markup) - self.assertEqual(['foo', 'bar'], soup.a['class']) - - # - # Generally speaking, tests below this point are more tests of - # Beautiful Soup than tests of the tree builders. But parsers are - # weird, so we run these tests separately for every tree builder - # to detect any differences between them. - # - - def test_can_parse_unicode_document(self): - # A seemingly innocuous document... but it's in Unicode! And - # it contains characters that can't be represented in the - # encoding found in the declaration! The horror! - markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) - self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string) - - def test_soupstrainer(self): - """Parsers should be able to work with SoupStrainers.""" - strainer = SoupStrainer("b") - soup = self.soup("A bold statement", - parse_only=strainer) - self.assertEqual(soup.decode(), "bold") - - def test_single_quote_attribute_values_become_double_quotes(self): - self.assertSoupEquals("", - '') - - def test_attribute_values_with_nested_quotes_are_left_alone(self): - text = """a""" - self.assertSoupEquals(text) - - def test_attribute_values_with_double_nested_quotes_get_quoted(self): - text = """a""" - soup = self.soup(text) - soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' - self.assertSoupEquals( - soup.foo.decode(), - """a""") - - def test_ampersand_in_attribute_value_gets_escaped(self): - self.assertSoupEquals('', - '') - - self.assertSoupEquals( - 'foo', - 'foo') - - def test_escaped_ampersand_in_attribute_value_is_left_alone(self): - self.assertSoupEquals('') - - def test_entities_in_strings_converted_during_parsing(self): - # Both XML and HTML entities are converted to Unicode characters - # during parsing. - text = "

<<sacré bleu!>>

" - expected = u"

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" - self.assertSoupEquals(text, expected) - - def test_smart_quotes_converted_on_the_way_in(self): - # Microsoft smart quotes are converted to Unicode characters during - # parsing. - quote = b"

\x91Foo\x92

" - soup = self.soup(quote) - self.assertEqual( - soup.p.string, - u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") - - def test_non_breaking_spaces_converted_on_the_way_in(self): - soup = self.soup("  ") - self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) - - def test_entities_converted_on_the_way_out(self): - text = "

<<sacré bleu!>>

" - expected = u"

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") - soup = self.soup(text) - self.assertEqual(soup.p.encode("utf-8"), expected) - - def test_real_iso_latin_document(self): - # Smoke test of interrelated functionality, using an - # easy-to-understand document. - - # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. - unicode_html = u'

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' - - # That's because we're going to encode it into ISO-Latin-1, and use - # that to test. - iso_latin_html = unicode_html.encode("iso-8859-1") - - # Parse the ISO-Latin-1 HTML. - soup = self.soup(iso_latin_html) - # Encode it to UTF-8. - result = soup.encode("utf-8") - - # What do we expect the result to look like? Well, it would - # look like unicode_html, except that the META tag would say - # UTF-8 instead of ISO-Latin-1. - expected = unicode_html.replace("ISO-Latin-1", "utf-8") - - # And, of course, it would be in UTF-8, not Unicode. - expected = expected.encode("utf-8") - - # Ta-da! - self.assertEqual(result, expected) - - def test_real_shift_jis_document(self): - # Smoke test to make sure the parser can handle a document in - # Shift-JIS encoding, without choking. - shift_jis_html = ( - b'
'
-            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
-            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
-            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
-            b'
') - unicode_html = shift_jis_html.decode("shift-jis") - soup = self.soup(unicode_html) - - # Make sure the parse tree is correctly encoded to various - # encodings. - self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8")) - self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) - - def test_real_hebrew_document(self): - # A real-world test to make sure we can convert ISO-8859-9 (a - # Hebrew encoding) to UTF-8. - hebrew_document = b'Hebrew (ISO 8859-8) in Visual Directionality

Hebrew (ISO 8859-8) in Visual Directionality

\xed\xe5\xec\xf9' - soup = self.soup( - hebrew_document, from_encoding="iso8859-8") - self.assertEqual(soup.original_encoding, 'iso8859-8') - self.assertEqual( - soup.encode('utf-8'), - hebrew_document.decode("iso8859-8").encode("utf-8")) - - def test_meta_tag_reflects_current_encoding(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. - meta_tag = ('') - - # Here's a document incorporating that meta tag. - shift_jis_html = ( - '\n%s\n' - '' - 'Shift-JIS markup goes here.') % meta_tag - soup = self.soup(shift_jis_html) - - # Parse the document, and the charset is seemingly unaffected. - parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) - content = parsed_meta['content'] - self.assertEqual('text/html; charset=x-sjis', content) - - # But that value is actually a ContentMetaAttributeValue object. - self.assertTrue(isinstance(content, ContentMetaAttributeValue)) - - # And it will take on a value that reflects its current - # encoding. - self.assertEqual('text/html; charset=utf8', content.encode("utf8")) - - # For the rest of the story, see TestSubstitutions in - # test_tree.py. - - def test_html5_style_meta_tag_reflects_current_encoding(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. - meta_tag = ('') - - # Here's a document incorporating that meta tag. - shift_jis_html = ( - '\n%s\n' - '' - 'Shift-JIS markup goes here.') % meta_tag - soup = self.soup(shift_jis_html) - - # Parse the document, and the charset is seemingly unaffected. - parsed_meta = soup.find('meta', id="encoding") - charset = parsed_meta['charset'] - self.assertEqual('x-sjis', charset) - - # But that value is actually a CharsetMetaAttributeValue object. - self.assertTrue(isinstance(charset, CharsetMetaAttributeValue)) - - # And it will take on a value that reflects its current - # encoding. - self.assertEqual('utf8', charset.encode("utf8")) - - def test_tag_with_no_attributes_can_have_attributes_added(self): - data = self.soup("text") - data.a['foo'] = 'bar' - self.assertEqual('text', data.a.decode()) - -class XMLTreeBuilderSmokeTest(object): - - def test_pickle_and_unpickle_identity(self): - # Pickling a tree, then unpickling it, yields a tree identical - # to the original. - tree = self.soup("foo") - dumped = pickle.dumps(tree, 2) - loaded = pickle.loads(dumped) - self.assertEqual(loaded.__class__, BeautifulSoup) - self.assertEqual(loaded.decode(), tree.decode()) - - def test_docstring_generated(self): - soup = self.soup("") - self.assertEqual( - soup.encode(), b'\n') - - def test_real_xhtml_document(self): - """A real XHTML document should come out *exactly* the same as it went in.""" - markup = b""" - - -Hello. -Goodbye. -""" - soup = self.soup(markup) - self.assertEqual( - soup.encode("utf-8"), markup) - - def test_formatter_processes_script_tag_for_xml_documents(self): - doc = """ - -""" - soup = BeautifulSoup(doc, "lxml-xml") - # lxml would have stripped this while parsing, but we can add - # it later. - soup.script.string = 'console.log("< < hey > > ");' - encoded = soup.encode() - self.assertTrue(b"< < hey > >" in encoded) - - def test_can_parse_unicode_document(self): - markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) - self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string) - - def test_popping_namespaced_tag(self): - markup = 'b2012-07-02T20:33:42Zcd' - soup = self.soup(markup) - self.assertEqual( - unicode(soup.rss), markup) - - def test_docstring_includes_correct_encoding(self): - soup = self.soup("") - self.assertEqual( - soup.encode("latin1"), - b'\n') - - def test_large_xml_document(self): - """A large XML document should come out the same as it went in.""" - markup = (b'\n' - + b'0' * (2**12) - + b'') - soup = self.soup(markup) - self.assertEqual(soup.encode("utf-8"), markup) - - - def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): - self.assertSoupEquals("

", "

") - self.assertSoupEquals("

foo

") - - def test_namespaces_are_preserved(self): - markup = 'This tag is in the a namespaceThis tag is in the b namespace' - soup = self.soup(markup) - root = soup.root - self.assertEqual("http://example.com/", root['xmlns:a']) - self.assertEqual("http://example.net/", root['xmlns:b']) - - def test_closing_namespaced_tag(self): - markup = '

20010504

' - soup = self.soup(markup) - self.assertEqual(unicode(soup.p), markup) - - def test_namespaced_attributes(self): - markup = '' - soup = self.soup(markup) - self.assertEqual(unicode(soup.foo), markup) - - def test_namespaced_attributes_xml_namespace(self): - markup = 'bar' - soup = self.soup(markup) - self.assertEqual(unicode(soup.foo), markup) - -class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): - """Smoke test for a tree builder that supports HTML5.""" - - def test_real_xhtml_document(self): - # Since XHTML is not HTML5, HTML5 parsers are not tested to handle - # XHTML documents in any particular way. - pass - - def test_html_tags_have_namespace(self): - markup = "" - soup = self.soup(markup) - self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace) - - def test_svg_tags_have_namespace(self): - markup = '' - soup = self.soup(markup) - namespace = "http://www.w3.org/2000/svg" - self.assertEqual(namespace, soup.svg.namespace) - self.assertEqual(namespace, soup.circle.namespace) - - - def test_mathml_tags_have_namespace(self): - markup = '5' - soup = self.soup(markup) - namespace = 'http://www.w3.org/1998/Math/MathML' - self.assertEqual(namespace, soup.math.namespace) - self.assertEqual(namespace, soup.msqrt.namespace) - - def test_xml_declaration_becomes_comment(self): - markup = '' - soup = self.soup(markup) - self.assertTrue(isinstance(soup.contents[0], Comment)) - self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?') - self.assertEqual("html", soup.contents[0].next_element.name) - -def skipIf(condition, reason): - def nothing(test, *args, **kwargs): - return None - - def decorator(test_item): - if condition: - return nothing - else: - return test_item - - return decorator diff --git a/lib/cherrypy/LICENSE.txt b/lib/cherrypy/LICENSE.txt deleted file mode 100644 index 3816f933..00000000 --- a/lib/cherrypy/LICENSE.txt +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2004-2011, CherryPy Team (team@cherrypy.org) -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the CherryPy Team nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/lib/cherrypy/__init__.py b/lib/cherrypy/__init__.py deleted file mode 100644 index 7870ef0d..00000000 --- a/lib/cherrypy/__init__.py +++ /dev/null @@ -1,652 +0,0 @@ -"""CherryPy is a pythonic, object-oriented HTTP framework. - - -CherryPy consists of not one, but four separate API layers. - -The APPLICATION LAYER is the simplest. CherryPy applications are written as -a tree of classes and methods, where each branch in the tree corresponds to -a branch in the URL path. Each method is a 'page handler', which receives -GET and POST params as keyword arguments, and returns or yields the (HTML) -body of the response. The special method name 'index' is used for paths -that end in a slash, and the special method name 'default' is used to -handle multiple paths via a single handler. This layer also includes: - - * the 'exposed' attribute (and cherrypy.expose) - * cherrypy.quickstart() - * _cp_config attributes - * cherrypy.tools (including cherrypy.session) - * cherrypy.url() - -The ENVIRONMENT LAYER is used by developers at all levels. It provides -information about the current request and response, plus the application -and server environment, via a (default) set of top-level objects: - - * cherrypy.request - * cherrypy.response - * cherrypy.engine - * cherrypy.server - * cherrypy.tree - * cherrypy.config - * cherrypy.thread_data - * cherrypy.log - * cherrypy.HTTPError, NotFound, and HTTPRedirect - * cherrypy.lib - -The EXTENSION LAYER allows advanced users to construct and share their own -plugins. It consists of: - - * Hook API - * Tool API - * Toolbox API - * Dispatch API - * Config Namespace API - -Finally, there is the CORE LAYER, which uses the core API's to construct -the default components which are available at higher layers. You can think -of the default components as the 'reference implementation' for CherryPy. -Megaframeworks (and advanced users) may replace the default components -with customized or extended components. The core API's are: - - * Application API - * Engine API - * Request API - * Server API - * WSGI API - -These API's are described in the `CherryPy specification `_. -""" - -__version__ = "3.6.0" - -from cherrypy._cpcompat import urljoin as _urljoin, urlencode as _urlencode -from cherrypy._cpcompat import basestring, unicodestr, set - -from cherrypy._cperror import HTTPError, HTTPRedirect, InternalRedirect -from cherrypy._cperror import NotFound, CherryPyException, TimeoutError - -from cherrypy import _cpdispatch as dispatch - -from cherrypy import _cptools -tools = _cptools.default_toolbox -Tool = _cptools.Tool - -from cherrypy import _cprequest -from cherrypy.lib import httputil as _httputil - -from cherrypy import _cptree -tree = _cptree.Tree() -from cherrypy._cptree import Application -from cherrypy import _cpwsgi as wsgi - -from cherrypy import process -try: - from cherrypy.process import win32 - engine = win32.Win32Bus() - engine.console_control_handler = win32.ConsoleCtrlHandler(engine) - del win32 -except ImportError: - engine = process.bus - - -# Timeout monitor. We add two channels to the engine -# to which cherrypy.Application will publish. -engine.listeners['before_request'] = set() -engine.listeners['after_request'] = set() - - -class _TimeoutMonitor(process.plugins.Monitor): - - def __init__(self, bus): - self.servings = [] - process.plugins.Monitor.__init__(self, bus, self.run) - - def before_request(self): - self.servings.append((serving.request, serving.response)) - - def after_request(self): - try: - self.servings.remove((serving.request, serving.response)) - except ValueError: - pass - - def run(self): - """Check timeout on all responses. (Internal)""" - for req, resp in self.servings: - resp.check_timeout() -engine.timeout_monitor = _TimeoutMonitor(engine) -engine.timeout_monitor.subscribe() - -engine.autoreload = process.plugins.Autoreloader(engine) -engine.autoreload.subscribe() - -engine.thread_manager = process.plugins.ThreadManager(engine) -engine.thread_manager.subscribe() - -engine.signal_handler = process.plugins.SignalHandler(engine) - - -class _HandleSignalsPlugin(object): - - """Handle signals from other processes based on the configured - platform handlers above.""" - - def __init__(self, bus): - self.bus = bus - - def subscribe(self): - """Add the handlers based on the platform""" - if hasattr(self.bus, "signal_handler"): - self.bus.signal_handler.subscribe() - if hasattr(self.bus, "console_control_handler"): - self.bus.console_control_handler.subscribe() - -engine.signals = _HandleSignalsPlugin(engine) - - -from cherrypy import _cpserver -server = _cpserver.Server() -server.subscribe() - - -def quickstart(root=None, script_name="", config=None): - """Mount the given root, start the builtin server (and engine), then block. - - root: an instance of a "controller class" (a collection of page handler - methods) which represents the root of the application. - script_name: a string containing the "mount point" of the application. - This should start with a slash, and be the path portion of the URL - at which to mount the given root. For example, if root.index() will - handle requests to "http://www.example.com:8080/dept/app1/", then - the script_name argument would be "/dept/app1". - - It MUST NOT end in a slash. If the script_name refers to the root - of the URI, it MUST be an empty string (not "/"). - config: a file or dict containing application config. If this contains - a [global] section, those entries will be used in the global - (site-wide) config. - """ - if config: - _global_conf_alias.update(config) - - tree.mount(root, script_name, config) - - engine.signals.subscribe() - engine.start() - engine.block() - - -from cherrypy._cpcompat import threadlocal as _local - - -class _Serving(_local): - - """An interface for registering request and response objects. - - Rather than have a separate "thread local" object for the request and - the response, this class works as a single threadlocal container for - both objects (and any others which developers wish to define). In this - way, we can easily dump those objects when we stop/start a new HTTP - conversation, yet still refer to them as module-level globals in a - thread-safe way. - """ - - request = _cprequest.Request(_httputil.Host("127.0.0.1", 80), - _httputil.Host("127.0.0.1", 1111)) - """ - The request object for the current thread. In the main thread, - and any threads which are not receiving HTTP requests, this is None.""" - - response = _cprequest.Response() - """ - The response object for the current thread. In the main thread, - and any threads which are not receiving HTTP requests, this is None.""" - - def load(self, request, response): - self.request = request - self.response = response - - def clear(self): - """Remove all attributes of self.""" - self.__dict__.clear() - -serving = _Serving() - - -class _ThreadLocalProxy(object): - - __slots__ = ['__attrname__', '__dict__'] - - def __init__(self, attrname): - self.__attrname__ = attrname - - def __getattr__(self, name): - child = getattr(serving, self.__attrname__) - return getattr(child, name) - - def __setattr__(self, name, value): - if name in ("__attrname__", ): - object.__setattr__(self, name, value) - else: - child = getattr(serving, self.__attrname__) - setattr(child, name, value) - - def __delattr__(self, name): - child = getattr(serving, self.__attrname__) - delattr(child, name) - - def _get_dict(self): - child = getattr(serving, self.__attrname__) - d = child.__class__.__dict__.copy() - d.update(child.__dict__) - return d - __dict__ = property(_get_dict) - - def __getitem__(self, key): - child = getattr(serving, self.__attrname__) - return child[key] - - def __setitem__(self, key, value): - child = getattr(serving, self.__attrname__) - child[key] = value - - def __delitem__(self, key): - child = getattr(serving, self.__attrname__) - del child[key] - - def __contains__(self, key): - child = getattr(serving, self.__attrname__) - return key in child - - def __len__(self): - child = getattr(serving, self.__attrname__) - return len(child) - - def __nonzero__(self): - child = getattr(serving, self.__attrname__) - return bool(child) - # Python 3 - __bool__ = __nonzero__ - -# Create request and response object (the same objects will be used -# throughout the entire life of the webserver, but will redirect -# to the "serving" object) -request = _ThreadLocalProxy('request') -response = _ThreadLocalProxy('response') - -# Create thread_data object as a thread-specific all-purpose storage - - -class _ThreadData(_local): - - """A container for thread-specific data.""" -thread_data = _ThreadData() - - -# Monkeypatch pydoc to allow help() to go through the threadlocal proxy. -# Jan 2007: no Googleable examples of anyone else replacing pydoc.resolve. -# The only other way would be to change what is returned from type(request) -# and that's not possible in pure Python (you'd have to fake ob_type). -def _cherrypy_pydoc_resolve(thing, forceload=0): - """Given an object or a path to an object, get the object and its name.""" - if isinstance(thing, _ThreadLocalProxy): - thing = getattr(serving, thing.__attrname__) - return _pydoc._builtin_resolve(thing, forceload) - -try: - import pydoc as _pydoc - _pydoc._builtin_resolve = _pydoc.resolve - _pydoc.resolve = _cherrypy_pydoc_resolve -except ImportError: - pass - - -from cherrypy import _cplogging - - -class _GlobalLogManager(_cplogging.LogManager): - - """A site-wide LogManager; routes to app.log or global log as appropriate. - - This :class:`LogManager` implements - cherrypy.log() and cherrypy.log.access(). If either - function is called during a request, the message will be sent to the - logger for the current Application. If they are called outside of a - request, the message will be sent to the site-wide logger. - """ - - def __call__(self, *args, **kwargs): - """Log the given message to the app.log or global log as appropriate. - """ - # Do NOT use try/except here. See - # https://bitbucket.org/cherrypy/cherrypy/issue/945 - if hasattr(request, 'app') and hasattr(request.app, 'log'): - log = request.app.log - else: - log = self - return log.error(*args, **kwargs) - - def access(self): - """Log an access message to the app.log or global log as appropriate. - """ - try: - return request.app.log.access() - except AttributeError: - return _cplogging.LogManager.access(self) - - -log = _GlobalLogManager() -# Set a default screen handler on the global log. -log.screen = True -log.error_file = '' -# Using an access file makes CP about 10% slower. Leave off by default. -log.access_file = '' - - -def _buslog(msg, level): - log.error(msg, 'ENGINE', severity=level) -engine.subscribe('log', _buslog) - -# Helper functions for CP apps # - - -def expose(func=None, alias=None): - """Expose the function, optionally providing an alias or set of aliases.""" - def expose_(func): - func.exposed = True - if alias is not None: - if isinstance(alias, basestring): - parents[alias.replace(".", "_")] = func - else: - for a in alias: - parents[a.replace(".", "_")] = func - return func - - import sys - import types - if isinstance(func, (types.FunctionType, types.MethodType)): - if alias is None: - # @expose - func.exposed = True - return func - else: - # func = expose(func, alias) - parents = sys._getframe(1).f_locals - return expose_(func) - elif func is None: - if alias is None: - # @expose() - parents = sys._getframe(1).f_locals - return expose_ - else: - # @expose(alias="alias") or - # @expose(alias=["alias1", "alias2"]) - parents = sys._getframe(1).f_locals - return expose_ - else: - # @expose("alias") or - # @expose(["alias1", "alias2"]) - parents = sys._getframe(1).f_locals - alias = func - return expose_ - - -def popargs(*args, **kwargs): - """A decorator for _cp_dispatch - (cherrypy.dispatch.Dispatcher.dispatch_method_name). - - Optional keyword argument: handler=(Object or Function) - - Provides a _cp_dispatch function that pops off path segments into - cherrypy.request.params under the names specified. The dispatch - is then forwarded on to the next vpath element. - - Note that any existing (and exposed) member function of the class that - popargs is applied to will override that value of the argument. For - instance, if you have a method named "list" on the class decorated with - popargs, then accessing "/list" will call that function instead of popping - it off as the requested parameter. This restriction applies to all - _cp_dispatch functions. The only way around this restriction is to create - a "blank class" whose only function is to provide _cp_dispatch. - - If there are path elements after the arguments, or more arguments - are requested than are available in the vpath, then the 'handler' - keyword argument specifies the next object to handle the parameterized - request. If handler is not specified or is None, then self is used. - If handler is a function rather than an instance, then that function - will be called with the args specified and the return value from that - function used as the next object INSTEAD of adding the parameters to - cherrypy.request.args. - - This decorator may be used in one of two ways: - - As a class decorator: - @cherrypy.popargs('year', 'month', 'day') - class Blog: - def index(self, year=None, month=None, day=None): - #Process the parameters here; any url like - #/, /2009, /2009/12, or /2009/12/31 - #will fill in the appropriate parameters. - - def create(self): - #This link will still be available at /create. Defined functions - #take precedence over arguments. - - Or as a member of a class: - class Blog: - _cp_dispatch = cherrypy.popargs('year', 'month', 'day') - #... - - The handler argument may be used to mix arguments with built in functions. - For instance, the following setup allows different activities at the - day, month, and year level: - - class DayHandler: - def index(self, year, month, day): - #Do something with this day; probably list entries - - def delete(self, year, month, day): - #Delete all entries for this day - - @cherrypy.popargs('day', handler=DayHandler()) - class MonthHandler: - def index(self, year, month): - #Do something with this month; probably list entries - - def delete(self, year, month): - #Delete all entries for this month - - @cherrypy.popargs('month', handler=MonthHandler()) - class YearHandler: - def index(self, year): - #Do something with this year - - #... - - @cherrypy.popargs('year', handler=YearHandler()) - class Root: - def index(self): - #... - - """ - - # Since keyword arg comes after *args, we have to process it ourselves - # for lower versions of python. - - handler = None - handler_call = False - for k, v in kwargs.items(): - if k == 'handler': - handler = v - else: - raise TypeError( - "cherrypy.popargs() got an unexpected keyword argument '{0}'" - .format(k) - ) - - import inspect - - if handler is not None \ - and (hasattr(handler, '__call__') or inspect.isclass(handler)): - handler_call = True - - def decorated(cls_or_self=None, vpath=None): - if inspect.isclass(cls_or_self): - # cherrypy.popargs is a class decorator - cls = cls_or_self - setattr(cls, dispatch.Dispatcher.dispatch_method_name, decorated) - return cls - - # We're in the actual function - self = cls_or_self - parms = {} - for arg in args: - if not vpath: - break - parms[arg] = vpath.pop(0) - - if handler is not None: - if handler_call: - return handler(**parms) - else: - request.params.update(parms) - return handler - - request.params.update(parms) - - # If we are the ultimate handler, then to prevent our _cp_dispatch - # from being called again, we will resolve remaining elements through - # getattr() directly. - if vpath: - return getattr(self, vpath.pop(0), None) - else: - return self - - return decorated - - -def url(path="", qs="", script_name=None, base=None, relative=None): - """Create an absolute URL for the given path. - - If 'path' starts with a slash ('/'), this will return - (base + script_name + path + qs). - If it does not start with a slash, this returns - (base + script_name [+ request.path_info] + path + qs). - - If script_name is None, cherrypy.request will be used - to find a script_name, if available. - - If base is None, cherrypy.request.base will be used (if available). - Note that you can use cherrypy.tools.proxy to change this. - - Finally, note that this function can be used to obtain an absolute URL - for the current request path (minus the querystring) by passing no args. - If you call url(qs=cherrypy.request.query_string), you should get the - original browser URL (assuming no internal redirections). - - If relative is None or not provided, request.app.relative_urls will - be used (if available, else False). If False, the output will be an - absolute URL (including the scheme, host, vhost, and script_name). - If True, the output will instead be a URL that is relative to the - current request path, perhaps including '..' atoms. If relative is - the string 'server', the output will instead be a URL that is - relative to the server root; i.e., it will start with a slash. - """ - if isinstance(qs, (tuple, list, dict)): - qs = _urlencode(qs) - if qs: - qs = '?' + qs - - if request.app: - if not path.startswith("/"): - # Append/remove trailing slash from path_info as needed - # (this is to support mistyped URL's without redirecting; - # if you want to redirect, use tools.trailing_slash). - pi = request.path_info - if request.is_index is True: - if not pi.endswith('/'): - pi = pi + '/' - elif request.is_index is False: - if pi.endswith('/') and pi != '/': - pi = pi[:-1] - - if path == "": - path = pi - else: - path = _urljoin(pi, path) - - if script_name is None: - script_name = request.script_name - if base is None: - base = request.base - - newurl = base + script_name + path + qs - else: - # No request.app (we're being called outside a request). - # We'll have to guess the base from server.* attributes. - # This will produce very different results from the above - # if you're using vhosts or tools.proxy. - if base is None: - base = server.base() - - path = (script_name or "") + path - newurl = base + path + qs - - if './' in newurl: - # Normalize the URL by removing ./ and ../ - atoms = [] - for atom in newurl.split('/'): - if atom == '.': - pass - elif atom == '..': - atoms.pop() - else: - atoms.append(atom) - newurl = '/'.join(atoms) - - # At this point, we should have a fully-qualified absolute URL. - - if relative is None: - relative = getattr(request.app, "relative_urls", False) - - # See http://www.ietf.org/rfc/rfc2396.txt - if relative == 'server': - # "A relative reference beginning with a single slash character is - # termed an absolute-path reference, as defined by ..." - # This is also sometimes called "server-relative". - newurl = '/' + '/'.join(newurl.split('/', 3)[3:]) - elif relative: - # "A relative reference that does not begin with a scheme name - # or a slash character is termed a relative-path reference." - old = url(relative=False).split('/')[:-1] - new = newurl.split('/') - while old and new: - a, b = old[0], new[0] - if a != b: - break - old.pop(0) - new.pop(0) - new = (['..'] * len(old)) + new - newurl = '/'.join(new) - - return newurl - - -# import _cpconfig last so it can reference other top-level objects -from cherrypy import _cpconfig -# Use _global_conf_alias so quickstart can use 'config' as an arg -# without shadowing cherrypy.config. -config = _global_conf_alias = _cpconfig.Config() -config.defaults = { - 'tools.log_tracebacks.on': True, - 'tools.log_headers.on': True, - 'tools.trailing_slash.on': True, - 'tools.encode.on': True -} -config.namespaces["log"] = lambda k, v: setattr(log, k, v) -config.namespaces["checker"] = lambda k, v: setattr(checker, k, v) -# Must reset to get our defaults applied. -config.reset() - -from cherrypy import _cpchecker -checker = _cpchecker.Checker() -engine.subscribe('start', checker) diff --git a/lib/cherrypy/_cpchecker.py b/lib/cherrypy/_cpchecker.py deleted file mode 100644 index 4ef82597..00000000 --- a/lib/cherrypy/_cpchecker.py +++ /dev/null @@ -1,332 +0,0 @@ -import os -import warnings - -import cherrypy -from cherrypy._cpcompat import iteritems, copykeys, builtins - - -class Checker(object): - - """A checker for CherryPy sites and their mounted applications. - - When this object is called at engine startup, it executes each - of its own methods whose names start with ``check_``. If you wish - to disable selected checks, simply add a line in your global - config which sets the appropriate method to False:: - - [global] - checker.check_skipped_app_config = False - - You may also dynamically add or replace ``check_*`` methods in this way. - """ - - on = True - """If True (the default), run all checks; if False, turn off all checks.""" - - def __init__(self): - self._populate_known_types() - - def __call__(self): - """Run all check_* methods.""" - if self.on: - oldformatwarning = warnings.formatwarning - warnings.formatwarning = self.formatwarning - try: - for name in dir(self): - if name.startswith("check_"): - method = getattr(self, name) - if method and hasattr(method, '__call__'): - method() - finally: - warnings.formatwarning = oldformatwarning - - def formatwarning(self, message, category, filename, lineno, line=None): - """Function to format a warning.""" - return "CherryPy Checker:\n%s\n\n" % message - - # This value should be set inside _cpconfig. - global_config_contained_paths = False - - def check_app_config_entries_dont_start_with_script_name(self): - """Check for Application config with sections that repeat script_name. - """ - for sn, app in cherrypy.tree.apps.items(): - if not isinstance(app, cherrypy.Application): - continue - if not app.config: - continue - if sn == '': - continue - sn_atoms = sn.strip("/").split("/") - for key in app.config.keys(): - key_atoms = key.strip("/").split("/") - if key_atoms[:len(sn_atoms)] == sn_atoms: - warnings.warn( - "The application mounted at %r has config " - "entries that start with its script name: %r" % (sn, - key)) - - def check_site_config_entries_in_app_config(self): - """Check for mounted Applications that have site-scoped config.""" - for sn, app in iteritems(cherrypy.tree.apps): - if not isinstance(app, cherrypy.Application): - continue - - msg = [] - for section, entries in iteritems(app.config): - if section.startswith('/'): - for key, value in iteritems(entries): - for n in ("engine.", "server.", "tree.", "checker."): - if key.startswith(n): - msg.append("[%s] %s = %s" % - (section, key, value)) - if msg: - msg.insert(0, - "The application mounted at %r contains the " - "following config entries, which are only allowed " - "in site-wide config. Move them to a [global] " - "section and pass them to cherrypy.config.update() " - "instead of tree.mount()." % sn) - warnings.warn(os.linesep.join(msg)) - - def check_skipped_app_config(self): - """Check for mounted Applications that have no config.""" - for sn, app in cherrypy.tree.apps.items(): - if not isinstance(app, cherrypy.Application): - continue - if not app.config: - msg = "The Application mounted at %r has an empty config." % sn - if self.global_config_contained_paths: - msg += (" It looks like the config you passed to " - "cherrypy.config.update() contains application-" - "specific sections. You must explicitly pass " - "application config via " - "cherrypy.tree.mount(..., config=app_config)") - warnings.warn(msg) - return - - def check_app_config_brackets(self): - """Check for Application config with extraneous brackets in section - names. - """ - for sn, app in cherrypy.tree.apps.items(): - if not isinstance(app, cherrypy.Application): - continue - if not app.config: - continue - for key in app.config.keys(): - if key.startswith("[") or key.endswith("]"): - warnings.warn( - "The application mounted at %r has config " - "section names with extraneous brackets: %r. " - "Config *files* need brackets; config *dicts* " - "(e.g. passed to tree.mount) do not." % (sn, key)) - - def check_static_paths(self): - """Check Application config for incorrect static paths.""" - # Use the dummy Request object in the main thread. - request = cherrypy.request - for sn, app in cherrypy.tree.apps.items(): - if not isinstance(app, cherrypy.Application): - continue - request.app = app - for section in app.config: - # get_resource will populate request.config - request.get_resource(section + "/dummy.html") - conf = request.config.get - - if conf("tools.staticdir.on", False): - msg = "" - root = conf("tools.staticdir.root") - dir = conf("tools.staticdir.dir") - if dir is None: - msg = "tools.staticdir.dir is not set." - else: - fulldir = "" - if os.path.isabs(dir): - fulldir = dir - if root: - msg = ("dir is an absolute path, even " - "though a root is provided.") - testdir = os.path.join(root, dir[1:]) - if os.path.exists(testdir): - msg += ( - "\nIf you meant to serve the " - "filesystem folder at %r, remove the " - "leading slash from dir." % (testdir,)) - else: - if not root: - msg = ( - "dir is a relative path and " - "no root provided.") - else: - fulldir = os.path.join(root, dir) - if not os.path.isabs(fulldir): - msg = ("%r is not an absolute path." % ( - fulldir,)) - - if fulldir and not os.path.exists(fulldir): - if msg: - msg += "\n" - msg += ("%r (root + dir) is not an existing " - "filesystem path." % fulldir) - - if msg: - warnings.warn("%s\nsection: [%s]\nroot: %r\ndir: %r" - % (msg, section, root, dir)) - - # -------------------------- Compatibility -------------------------- # - obsolete = { - 'server.default_content_type': 'tools.response_headers.headers', - 'log_access_file': 'log.access_file', - 'log_config_options': None, - 'log_file': 'log.error_file', - 'log_file_not_found': None, - 'log_request_headers': 'tools.log_headers.on', - 'log_to_screen': 'log.screen', - 'show_tracebacks': 'request.show_tracebacks', - 'throw_errors': 'request.throw_errors', - 'profiler.on': ('cherrypy.tree.mount(profiler.make_app(' - 'cherrypy.Application(Root())))'), - } - - deprecated = {} - - def _compat(self, config): - """Process config and warn on each obsolete or deprecated entry.""" - for section, conf in config.items(): - if isinstance(conf, dict): - for k, v in conf.items(): - if k in self.obsolete: - warnings.warn("%r is obsolete. Use %r instead.\n" - "section: [%s]" % - (k, self.obsolete[k], section)) - elif k in self.deprecated: - warnings.warn("%r is deprecated. Use %r instead.\n" - "section: [%s]" % - (k, self.deprecated[k], section)) - else: - if section in self.obsolete: - warnings.warn("%r is obsolete. Use %r instead." - % (section, self.obsolete[section])) - elif section in self.deprecated: - warnings.warn("%r is deprecated. Use %r instead." - % (section, self.deprecated[section])) - - def check_compatibility(self): - """Process config and warn on each obsolete or deprecated entry.""" - self._compat(cherrypy.config) - for sn, app in cherrypy.tree.apps.items(): - if not isinstance(app, cherrypy.Application): - continue - self._compat(app.config) - - # ------------------------ Known Namespaces ------------------------ # - extra_config_namespaces = [] - - def _known_ns(self, app): - ns = ["wsgi"] - ns.extend(copykeys(app.toolboxes)) - ns.extend(copykeys(app.namespaces)) - ns.extend(copykeys(app.request_class.namespaces)) - ns.extend(copykeys(cherrypy.config.namespaces)) - ns += self.extra_config_namespaces - - for section, conf in app.config.items(): - is_path_section = section.startswith("/") - if is_path_section and isinstance(conf, dict): - for k, v in conf.items(): - atoms = k.split(".") - if len(atoms) > 1: - if atoms[0] not in ns: - # Spit out a special warning if a known - # namespace is preceded by "cherrypy." - if atoms[0] == "cherrypy" and atoms[1] in ns: - msg = ( - "The config entry %r is invalid; " - "try %r instead.\nsection: [%s]" - % (k, ".".join(atoms[1:]), section)) - else: - msg = ( - "The config entry %r is invalid, " - "because the %r config namespace " - "is unknown.\n" - "section: [%s]" % (k, atoms[0], section)) - warnings.warn(msg) - elif atoms[0] == "tools": - if atoms[1] not in dir(cherrypy.tools): - msg = ( - "The config entry %r may be invalid, " - "because the %r tool was not found.\n" - "section: [%s]" % (k, atoms[1], section)) - warnings.warn(msg) - - def check_config_namespaces(self): - """Process config and warn on each unknown config namespace.""" - for sn, app in cherrypy.tree.apps.items(): - if not isinstance(app, cherrypy.Application): - continue - self._known_ns(app) - - # -------------------------- Config Types -------------------------- # - known_config_types = {} - - def _populate_known_types(self): - b = [x for x in vars(builtins).values() - if type(x) is type(str)] - - def traverse(obj, namespace): - for name in dir(obj): - # Hack for 3.2's warning about body_params - if name == 'body_params': - continue - vtype = type(getattr(obj, name, None)) - if vtype in b: - self.known_config_types[namespace + "." + name] = vtype - - traverse(cherrypy.request, "request") - traverse(cherrypy.response, "response") - traverse(cherrypy.server, "server") - traverse(cherrypy.engine, "engine") - traverse(cherrypy.log, "log") - - def _known_types(self, config): - msg = ("The config entry %r in section %r is of type %r, " - "which does not match the expected type %r.") - - for section, conf in config.items(): - if isinstance(conf, dict): - for k, v in conf.items(): - if v is not None: - expected_type = self.known_config_types.get(k, None) - vtype = type(v) - if expected_type and vtype != expected_type: - warnings.warn(msg % (k, section, vtype.__name__, - expected_type.__name__)) - else: - k, v = section, conf - if v is not None: - expected_type = self.known_config_types.get(k, None) - vtype = type(v) - if expected_type and vtype != expected_type: - warnings.warn(msg % (k, section, vtype.__name__, - expected_type.__name__)) - - def check_config_types(self): - """Assert that config values are of the same type as default values.""" - self._known_types(cherrypy.config) - for sn, app in cherrypy.tree.apps.items(): - if not isinstance(app, cherrypy.Application): - continue - self._known_types(app.config) - - # -------------------- Specific config warnings -------------------- # - def check_localhost(self): - """Warn if any socket_host is 'localhost'. See #711.""" - for k, v in cherrypy.config.items(): - if k == 'server.socket_host' and v == 'localhost': - warnings.warn("The use of 'localhost' as a socket host can " - "cause problems on newer systems, since " - "'localhost' can map to either an IPv4 or an " - "IPv6 address. You should use '127.0.0.1' " - "or '[::1]' instead.") diff --git a/lib/cherrypy/_cpcompat.py b/lib/cherrypy/_cpcompat.py deleted file mode 100644 index 8a98b38b..00000000 --- a/lib/cherrypy/_cpcompat.py +++ /dev/null @@ -1,383 +0,0 @@ -"""Compatibility code for using CherryPy with various versions of Python. - -CherryPy 3.2 is compatible with Python versions 2.3+. This module provides a -useful abstraction over the differences between Python versions, sometimes by -preferring a newer idiom, sometimes an older one, and sometimes a custom one. - -In particular, Python 2 uses str and '' for byte strings, while Python 3 -uses str and '' for unicode strings. We will call each of these the 'native -string' type for each version. Because of this major difference, this module -provides new 'bytestr', 'unicodestr', and 'nativestr' attributes, as well as -two functions: 'ntob', which translates native strings (of type 'str') into -byte strings regardless of Python version, and 'ntou', which translates native -strings to unicode strings. This also provides a 'BytesIO' name for dealing -specifically with bytes, and a 'StringIO' name for dealing with native strings. -It also provides a 'base64_decode' function with native strings as input and -output. -""" -import os -import re -import sys -import threading - -if sys.version_info >= (3, 0): - py3k = True - bytestr = bytes - unicodestr = str - nativestr = unicodestr - basestring = (bytes, str) - - def ntob(n, encoding='ISO-8859-1'): - """Return the given native string as a byte string in the given - encoding. - """ - assert_native(n) - # In Python 3, the native string type is unicode - return n.encode(encoding) - - def ntou(n, encoding='ISO-8859-1'): - """Return the given native string as a unicode string with the given - encoding. - """ - assert_native(n) - # In Python 3, the native string type is unicode - return n - - def tonative(n, encoding='ISO-8859-1'): - """Return the given string as a native string in the given encoding.""" - # In Python 3, the native string type is unicode - if isinstance(n, bytes): - return n.decode(encoding) - return n - # type("") - from io import StringIO - # bytes: - from io import BytesIO as BytesIO -else: - # Python 2 - py3k = False - bytestr = str - unicodestr = unicode - nativestr = bytestr - basestring = basestring - - def ntob(n, encoding='ISO-8859-1'): - """Return the given native string as a byte string in the given - encoding. - """ - assert_native(n) - # In Python 2, the native string type is bytes. Assume it's already - # in the given encoding, which for ISO-8859-1 is almost always what - # was intended. - return n - - def ntou(n, encoding='ISO-8859-1'): - """Return the given native string as a unicode string with the given - encoding. - """ - assert_native(n) - # In Python 2, the native string type is bytes. - # First, check for the special encoding 'escape'. The test suite uses - # this to signal that it wants to pass a string with embedded \uXXXX - # escapes, but without having to prefix it with u'' for Python 2, - # but no prefix for Python 3. - if encoding == 'escape': - return unicode( - re.sub(r'\\u([0-9a-zA-Z]{4})', - lambda m: unichr(int(m.group(1), 16)), - n.decode('ISO-8859-1'))) - # Assume it's already in the given encoding, which for ISO-8859-1 - # is almost always what was intended. - return n.decode(encoding) - - def tonative(n, encoding='ISO-8859-1'): - """Return the given string as a native string in the given encoding.""" - # In Python 2, the native string type is bytes. - if isinstance(n, unicode): - return n.encode(encoding) - return n - try: - # type("") - from cStringIO import StringIO - except ImportError: - # type("") - from StringIO import StringIO - # bytes: - BytesIO = StringIO - - -def assert_native(n): - if not isinstance(n, nativestr): - raise TypeError("n must be a native str (got %s)" % type(n).__name__) - -try: - set = set -except NameError: - from sets import Set as set - -try: - # Python 3.1+ - from base64 import decodebytes as _base64_decodebytes -except ImportError: - # Python 3.0- - # since CherryPy claims compability with Python 2.3, we must use - # the legacy API of base64 - from base64 import decodestring as _base64_decodebytes - - -def base64_decode(n, encoding='ISO-8859-1'): - """Return the native string base64-decoded (as a native string).""" - if isinstance(n, unicodestr): - b = n.encode(encoding) - else: - b = n - b = _base64_decodebytes(b) - if nativestr is unicodestr: - return b.decode(encoding) - else: - return b - -try: - # Python 2.5+ - from hashlib import md5 -except ImportError: - from md5 import new as md5 - -try: - # Python 2.5+ - from hashlib import sha1 as sha -except ImportError: - from sha import new as sha - -try: - sorted = sorted -except NameError: - def sorted(i): - i = i[:] - i.sort() - return i - -try: - reversed = reversed -except NameError: - def reversed(x): - i = len(x) - while i > 0: - i -= 1 - yield x[i] - -try: - # Python 3 - from urllib.parse import urljoin, urlencode - from urllib.parse import quote, quote_plus - from urllib.request import unquote, urlopen - from urllib.request import parse_http_list, parse_keqv_list -except ImportError: - # Python 2 - from urlparse import urljoin - from urllib import urlencode, urlopen - from urllib import quote, quote_plus - from urllib import unquote - from urllib2 import parse_http_list, parse_keqv_list - -try: - from threading import local as threadlocal -except ImportError: - from cherrypy._cpthreadinglocal import local as threadlocal - -try: - dict.iteritems - # Python 2 - iteritems = lambda d: d.iteritems() - copyitems = lambda d: d.items() -except AttributeError: - # Python 3 - iteritems = lambda d: d.items() - copyitems = lambda d: list(d.items()) - -try: - dict.iterkeys - # Python 2 - iterkeys = lambda d: d.iterkeys() - copykeys = lambda d: d.keys() -except AttributeError: - # Python 3 - iterkeys = lambda d: d.keys() - copykeys = lambda d: list(d.keys()) - -try: - dict.itervalues - # Python 2 - itervalues = lambda d: d.itervalues() - copyvalues = lambda d: d.values() -except AttributeError: - # Python 3 - itervalues = lambda d: d.values() - copyvalues = lambda d: list(d.values()) - -try: - # Python 3 - import builtins -except ImportError: - # Python 2 - import __builtin__ as builtins - -try: - # Python 2. We try Python 2 first clients on Python 2 - # don't try to import the 'http' module from cherrypy.lib - from Cookie import SimpleCookie, CookieError - from httplib import BadStatusLine, HTTPConnection, IncompleteRead - from httplib import NotConnected - from BaseHTTPServer import BaseHTTPRequestHandler -except ImportError: - # Python 3 - from http.cookies import SimpleCookie, CookieError - from http.client import BadStatusLine, HTTPConnection, IncompleteRead - from http.client import NotConnected - from http.server import BaseHTTPRequestHandler - -# Some platforms don't expose HTTPSConnection, so handle it separately -if py3k: - try: - from http.client import HTTPSConnection - except ImportError: - # Some platforms which don't have SSL don't expose HTTPSConnection - HTTPSConnection = None -else: - try: - from httplib import HTTPSConnection - except ImportError: - HTTPSConnection = None - -try: - # Python 2 - xrange = xrange -except NameError: - # Python 3 - xrange = range - -import threading -if hasattr(threading.Thread, "daemon"): - # Python 2.6+ - def get_daemon(t): - return t.daemon - - def set_daemon(t, val): - t.daemon = val -else: - def get_daemon(t): - return t.isDaemon() - - def set_daemon(t, val): - t.setDaemon(val) - -try: - from email.utils import formatdate - - def HTTPDate(timeval=None): - return formatdate(timeval, usegmt=True) -except ImportError: - from rfc822 import formatdate as HTTPDate - -try: - # Python 3 - from urllib.parse import unquote as parse_unquote - - def unquote_qs(atom, encoding, errors='strict'): - return parse_unquote( - atom.replace('+', ' '), - encoding=encoding, - errors=errors) -except ImportError: - # Python 2 - from urllib import unquote as parse_unquote - - def unquote_qs(atom, encoding, errors='strict'): - return parse_unquote(atom.replace('+', ' ')).decode(encoding, errors) - -try: - # Prefer simplejson, which is usually more advanced than the builtin - # module. - import simplejson as json - json_decode = json.JSONDecoder().decode - _json_encode = json.JSONEncoder().iterencode -except ImportError: - if sys.version_info >= (2, 6): - # Python >=2.6 : json is part of the standard library - import json - json_decode = json.JSONDecoder().decode - _json_encode = json.JSONEncoder().iterencode - else: - json = None - - def json_decode(s): - raise ValueError('No JSON library is available') - - def _json_encode(s): - raise ValueError('No JSON library is available') -finally: - if json and py3k: - # The two Python 3 implementations (simplejson/json) - # outputs str. We need bytes. - def json_encode(value): - for chunk in _json_encode(value): - yield chunk.encode('utf8') - else: - json_encode = _json_encode - - -try: - import cPickle as pickle -except ImportError: - # In Python 2, pickle is a Python version. - # In Python 3, pickle is the sped-up C version. - import pickle - -try: - os.urandom(20) - import binascii - - def random20(): - return binascii.hexlify(os.urandom(20)).decode('ascii') -except (AttributeError, NotImplementedError): - import random - # os.urandom not available until Python 2.4. Fall back to random.random. - - def random20(): - return sha('%s' % random.random()).hexdigest() - -try: - from _thread import get_ident as get_thread_ident -except ImportError: - from thread import get_ident as get_thread_ident - -try: - # Python 3 - next = next -except NameError: - # Python 2 - def next(i): - return i.next() - -if sys.version_info >= (3, 3): - Timer = threading.Timer - Event = threading.Event -else: - # Python 3.2 and earlier - Timer = threading._Timer - Event = threading._Event - -# Prior to Python 2.6, the Thread class did not have a .daemon property. -# This mix-in adds that property. - - -class SetDaemonProperty: - - def __get_daemon(self): - return self.isDaemon() - - def __set_daemon(self, daemon): - self.setDaemon(daemon) - - if sys.version_info < (2, 6): - daemon = property(__get_daemon, __set_daemon) diff --git a/lib/cherrypy/_cpcompat_subprocess.py b/lib/cherrypy/_cpcompat_subprocess.py deleted file mode 100644 index 478f4a74..00000000 --- a/lib/cherrypy/_cpcompat_subprocess.py +++ /dev/null @@ -1,1544 +0,0 @@ -# subprocess - Subprocesses with accessible I/O streams -# -# For more information about this module, see PEP 324. -# -# This module should remain compatible with Python 2.2, see PEP 291. -# -# Copyright (c) 2003-2005 by Peter Astrand -# -# Licensed to PSF under a Contributor Agreement. -# See http://www.python.org/2.4/license for licensing details. - -r"""subprocess - Subprocesses with accessible I/O streams - -This module allows you to spawn processes, connect to their -input/output/error pipes, and obtain their return codes. This module -intends to replace several other, older modules and functions, like: - -os.system -os.spawn* -os.popen* -popen2.* -commands.* - -Information about how the subprocess module can be used to replace these -modules and functions can be found below. - - - -Using the subprocess module -=========================== -This module defines one class called Popen: - -class Popen(args, bufsize=0, executable=None, - stdin=None, stdout=None, stderr=None, - preexec_fn=None, close_fds=False, shell=False, - cwd=None, env=None, universal_newlines=False, - startupinfo=None, creationflags=0): - - -Arguments are: - -args should be a string, or a sequence of program arguments. The -program to execute is normally the first item in the args sequence or -string, but can be explicitly set by using the executable argument. - -On UNIX, with shell=False (default): In this case, the Popen class -uses os.execvp() to execute the child program. args should normally -be a sequence. A string will be treated as a sequence with the string -as the only item (the program to execute). - -On UNIX, with shell=True: If args is a string, it specifies the -command string to execute through the shell. If args is a sequence, -the first item specifies the command string, and any additional items -will be treated as additional shell arguments. - -On Windows: the Popen class uses CreateProcess() to execute the child -program, which operates on strings. If args is a sequence, it will be -converted to a string using the list2cmdline method. Please note that -not all MS Windows applications interpret the command line the same -way: The list2cmdline is designed for applications using the same -rules as the MS C runtime. - -bufsize, if given, has the same meaning as the corresponding argument -to the built-in open() function: 0 means unbuffered, 1 means line -buffered, any other positive value means use a buffer of -(approximately) that size. A negative bufsize means to use the system -default, which usually means fully buffered. The default value for -bufsize is 0 (unbuffered). - -stdin, stdout and stderr specify the executed programs' standard -input, standard output and standard error file handles, respectively. -Valid values are PIPE, an existing file descriptor (a positive -integer), an existing file object, and None. PIPE indicates that a -new pipe to the child should be created. With None, no redirection -will occur; the child's file handles will be inherited from the -parent. Additionally, stderr can be STDOUT, which indicates that the -stderr data from the applications should be captured into the same -file handle as for stdout. - -If preexec_fn is set to a callable object, this object will be called -in the child process just before the child is executed. - -If close_fds is true, all file descriptors except 0, 1 and 2 will be -closed before the child process is executed. - -if shell is true, the specified command will be executed through the -shell. - -If cwd is not None, the current directory will be changed to cwd -before the child is executed. - -If env is not None, it defines the environment variables for the new -process. - -If universal_newlines is true, the file objects stdout and stderr are -opened as a text files, but lines may be terminated by any of '\n', -the Unix end-of-line convention, '\r', the Macintosh convention or -'\r\n', the Windows convention. All of these external representations -are seen as '\n' by the Python program. Note: This feature is only -available if Python is built with universal newline support (the -default). Also, the newlines attribute of the file objects stdout, -stdin and stderr are not updated by the communicate() method. - -The startupinfo and creationflags, if given, will be passed to the -underlying CreateProcess() function. They can specify things such as -appearance of the main window and priority for the new process. -(Windows only) - - -This module also defines some shortcut functions: - -call(*popenargs, **kwargs): - Run command with arguments. Wait for command to complete, then - return the returncode attribute. - - The arguments are the same as for the Popen constructor. Example: - - retcode = call(["ls", "-l"]) - -check_call(*popenargs, **kwargs): - Run command with arguments. Wait for command to complete. If the - exit code was zero then return, otherwise raise - CalledProcessError. The CalledProcessError object will have the - return code in the returncode attribute. - - The arguments are the same as for the Popen constructor. Example: - - check_call(["ls", "-l"]) - -check_output(*popenargs, **kwargs): - Run command with arguments and return its output as a byte string. - - If the exit code was non-zero it raises a CalledProcessError. The - CalledProcessError object will have the return code in the returncode - attribute and output in the output attribute. - - The arguments are the same as for the Popen constructor. Example: - - output = check_output(["ls", "-l", "/dev/null"]) - - -Exceptions ----------- -Exceptions raised in the child process, before the new program has -started to execute, will be re-raised in the parent. Additionally, -the exception object will have one extra attribute called -'child_traceback', which is a string containing traceback information -from the childs point of view. - -The most common exception raised is OSError. This occurs, for -example, when trying to execute a non-existent file. Applications -should prepare for OSErrors. - -A ValueError will be raised if Popen is called with invalid arguments. - -check_call() and check_output() will raise CalledProcessError, if the -called process returns a non-zero return code. - - -Security --------- -Unlike some other popen functions, this implementation will never call -/bin/sh implicitly. This means that all characters, including shell -metacharacters, can safely be passed to child processes. - - -Popen objects -============= -Instances of the Popen class have the following methods: - -poll() - Check if child process has terminated. Returns returncode - attribute. - -wait() - Wait for child process to terminate. Returns returncode attribute. - -communicate(input=None) - Interact with process: Send data to stdin. Read data from stdout - and stderr, until end-of-file is reached. Wait for process to - terminate. The optional input argument should be a string to be - sent to the child process, or None, if no data should be sent to - the child. - - communicate() returns a tuple (stdout, stderr). - - Note: The data read is buffered in memory, so do not use this - method if the data size is large or unlimited. - -The following attributes are also available: - -stdin - If the stdin argument is PIPE, this attribute is a file object - that provides input to the child process. Otherwise, it is None. - -stdout - If the stdout argument is PIPE, this attribute is a file object - that provides output from the child process. Otherwise, it is - None. - -stderr - If the stderr argument is PIPE, this attribute is file object that - provides error output from the child process. Otherwise, it is - None. - -pid - The process ID of the child process. - -returncode - The child return code. A None value indicates that the process - hasn't terminated yet. A negative value -N indicates that the - child was terminated by signal N (UNIX only). - - -Replacing older functions with the subprocess module -==================================================== -In this section, "a ==> b" means that b can be used as a replacement -for a. - -Note: All functions in this section fail (more or less) silently if -the executed program cannot be found; this module raises an OSError -exception. - -In the following examples, we assume that the subprocess module is -imported with "from subprocess import *". - - -Replacing /bin/sh shell backquote ---------------------------------- -output=`mycmd myarg` -==> -output = Popen(["mycmd", "myarg"], stdout=PIPE).communicate()[0] - - -Replacing shell pipe line -------------------------- -output=`dmesg | grep hda` -==> -p1 = Popen(["dmesg"], stdout=PIPE) -p2 = Popen(["grep", "hda"], stdin=p1.stdout, stdout=PIPE) -output = p2.communicate()[0] - - -Replacing os.system() ---------------------- -sts = os.system("mycmd" + " myarg") -==> -p = Popen("mycmd" + " myarg", shell=True) -pid, sts = os.waitpid(p.pid, 0) - -Note: - -* Calling the program through the shell is usually not required. - -* It's easier to look at the returncode attribute than the - exitstatus. - -A more real-world example would look like this: - -try: - retcode = call("mycmd" + " myarg", shell=True) - if retcode < 0: - print >>sys.stderr, "Child was terminated by signal", -retcode - else: - print >>sys.stderr, "Child returned", retcode -except OSError, e: - print >>sys.stderr, "Execution failed:", e - - -Replacing os.spawn* -------------------- -P_NOWAIT example: - -pid = os.spawnlp(os.P_NOWAIT, "/bin/mycmd", "mycmd", "myarg") -==> -pid = Popen(["/bin/mycmd", "myarg"]).pid - - -P_WAIT example: - -retcode = os.spawnlp(os.P_WAIT, "/bin/mycmd", "mycmd", "myarg") -==> -retcode = call(["/bin/mycmd", "myarg"]) - - -Vector example: - -os.spawnvp(os.P_NOWAIT, path, args) -==> -Popen([path] + args[1:]) - - -Environment example: - -os.spawnlpe(os.P_NOWAIT, "/bin/mycmd", "mycmd", "myarg", env) -==> -Popen(["/bin/mycmd", "myarg"], env={"PATH": "/usr/bin"}) - - -Replacing os.popen* -------------------- -pipe = os.popen("cmd", mode='r', bufsize) -==> -pipe = Popen("cmd", shell=True, bufsize=bufsize, stdout=PIPE).stdout - -pipe = os.popen("cmd", mode='w', bufsize) -==> -pipe = Popen("cmd", shell=True, bufsize=bufsize, stdin=PIPE).stdin - - -(child_stdin, child_stdout) = os.popen2("cmd", mode, bufsize) -==> -p = Popen("cmd", shell=True, bufsize=bufsize, - stdin=PIPE, stdout=PIPE, close_fds=True) -(child_stdin, child_stdout) = (p.stdin, p.stdout) - - -(child_stdin, - child_stdout, - child_stderr) = os.popen3("cmd", mode, bufsize) -==> -p = Popen("cmd", shell=True, bufsize=bufsize, - stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True) -(child_stdin, - child_stdout, - child_stderr) = (p.stdin, p.stdout, p.stderr) - - -(child_stdin, child_stdout_and_stderr) = os.popen4("cmd", mode, - bufsize) -==> -p = Popen("cmd", shell=True, bufsize=bufsize, - stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) -(child_stdin, child_stdout_and_stderr) = (p.stdin, p.stdout) - -On Unix, os.popen2, os.popen3 and os.popen4 also accept a sequence as -the command to execute, in which case arguments will be passed -directly to the program without shell intervention. This usage can be -replaced as follows: - -(child_stdin, child_stdout) = os.popen2(["/bin/ls", "-l"], mode, - bufsize) -==> -p = Popen(["/bin/ls", "-l"], bufsize=bufsize, stdin=PIPE, stdout=PIPE) -(child_stdin, child_stdout) = (p.stdin, p.stdout) - -Return code handling translates as follows: - -pipe = os.popen("cmd", 'w') -... -rc = pipe.close() -if rc is not None and rc % 256: - print "There were some errors" -==> -process = Popen("cmd", 'w', shell=True, stdin=PIPE) -... -process.stdin.close() -if process.wait() != 0: - print "There were some errors" - - -Replacing popen2.* ------------------- -(child_stdout, child_stdin) = popen2.popen2("somestring", bufsize, mode) -==> -p = Popen(["somestring"], shell=True, bufsize=bufsize - stdin=PIPE, stdout=PIPE, close_fds=True) -(child_stdout, child_stdin) = (p.stdout, p.stdin) - -On Unix, popen2 also accepts a sequence as the command to execute, in -which case arguments will be passed directly to the program without -shell intervention. This usage can be replaced as follows: - -(child_stdout, child_stdin) = popen2.popen2(["mycmd", "myarg"], bufsize, - mode) -==> -p = Popen(["mycmd", "myarg"], bufsize=bufsize, - stdin=PIPE, stdout=PIPE, close_fds=True) -(child_stdout, child_stdin) = (p.stdout, p.stdin) - -The popen2.Popen3 and popen2.Popen4 basically works as subprocess.Popen, -except that: - -* subprocess.Popen raises an exception if the execution fails -* the capturestderr argument is replaced with the stderr argument. -* stdin=PIPE and stdout=PIPE must be specified. -* popen2 closes all filedescriptors by default, but you have to specify - close_fds=True with subprocess.Popen. -""" - -import sys -mswindows = (sys.platform == "win32") - -import os -import types -import traceback -import gc -import signal -import errno - -try: - set -except NameError: - from sets import Set as set - -# Exception classes used by this module. - - -class CalledProcessError(Exception): - - """This exception is raised when a process run by check_call() or - check_output() returns a non-zero exit status. - The exit status will be stored in the returncode attribute; - check_output() will also store the output in the output attribute. - """ - - def __init__(self, returncode, cmd, output=None): - self.returncode = returncode - self.cmd = cmd - self.output = output - - def __str__(self): - return "Command '%s' returned non-zero exit status %d" % ( - self.cmd, self.returncode) - - -if mswindows: - import threading - import msvcrt - import _subprocess - - class STARTUPINFO: - dwFlags = 0 - hStdInput = None - hStdOutput = None - hStdError = None - wShowWindow = 0 - - class pywintypes: - error = IOError -else: - import select - _has_poll = hasattr(select, 'poll') - import fcntl - import pickle - - # When select or poll has indicated that the file is writable, - # we can write up to _PIPE_BUF bytes without risk of blocking. - # POSIX defines PIPE_BUF as >= 512. - _PIPE_BUF = getattr(select, 'PIPE_BUF', 512) - - -__all__ = ["Popen", "PIPE", "STDOUT", "call", "check_call", - "check_output", "CalledProcessError"] - -if mswindows: - from _subprocess import CREATE_NEW_CONSOLE, CREATE_NEW_PROCESS_GROUP, \ - STD_INPUT_HANDLE, STD_OUTPUT_HANDLE, \ - STD_ERROR_HANDLE, SW_HIDE, \ - STARTF_USESTDHANDLES, STARTF_USESHOWWINDOW - - __all__.extend(["CREATE_NEW_CONSOLE", "CREATE_NEW_PROCESS_GROUP", - "STD_INPUT_HANDLE", "STD_OUTPUT_HANDLE", - "STD_ERROR_HANDLE", "SW_HIDE", - "STARTF_USESTDHANDLES", "STARTF_USESHOWWINDOW"]) -try: - MAXFD = os.sysconf("SC_OPEN_MAX") -except: - MAXFD = 256 - -_active = [] - - -def _cleanup(): - for inst in _active[:]: - res = inst._internal_poll(_deadstate=sys.maxint) - if res is not None: - try: - _active.remove(inst) - except ValueError: - # This can happen if two threads create a new Popen instance. - # It's harmless that it was already removed, so ignore. - pass - -PIPE = -1 -STDOUT = -2 - - -def _eintr_retry_call(func, *args): - while True: - try: - return func(*args) - except (OSError, IOError), e: - if e.errno == errno.EINTR: - continue - raise - - -def call(*popenargs, **kwargs): - """Run command with arguments. Wait for command to complete, then - return the returncode attribute. - - The arguments are the same as for the Popen constructor. Example: - - retcode = call(["ls", "-l"]) - """ - return Popen(*popenargs, **kwargs).wait() - - -def check_call(*popenargs, **kwargs): - """Run command with arguments. Wait for command to complete. If - the exit code was zero then return, otherwise raise - CalledProcessError. The CalledProcessError object will have the - return code in the returncode attribute. - - The arguments are the same as for the Popen constructor. Example: - - check_call(["ls", "-l"]) - """ - retcode = call(*popenargs, **kwargs) - if retcode: - cmd = kwargs.get("args") - if cmd is None: - cmd = popenargs[0] - raise CalledProcessError(retcode, cmd) - return 0 - - -def check_output(*popenargs, **kwargs): - r"""Run command with arguments and return its output as a byte string. - - If the exit code was non-zero it raises a CalledProcessError. The - CalledProcessError object will have the return code in the returncode - attribute and output in the output attribute. - - The arguments are the same as for the Popen constructor. Example: - - >>> check_output(["ls", "-l", "/dev/null"]) - 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n' - - The stdout argument is not allowed as it is used internally. - To capture standard error in the result, use stderr=STDOUT. - - >>> check_output(["/bin/sh", "-c", - ... "ls -l non_existent_file ; exit 0"], - ... stderr=STDOUT) - 'ls: non_existent_file: No such file or directory\n' - """ - if 'stdout' in kwargs: - raise ValueError('stdout argument not allowed, it will be overridden.') - process = Popen(stdout=PIPE, *popenargs, **kwargs) - output, unused_err = process.communicate() - retcode = process.poll() - if retcode: - cmd = kwargs.get("args") - if cmd is None: - cmd = popenargs[0] - raise CalledProcessError(retcode, cmd, output=output) - return output - - -def list2cmdline(seq): - """ - Translate a sequence of arguments into a command line - string, using the same rules as the MS C runtime: - - 1) Arguments are delimited by white space, which is either a - space or a tab. - - 2) A string surrounded by double quotation marks is - interpreted as a single argument, regardless of white space - contained within. A quoted string can be embedded in an - argument. - - 3) A double quotation mark preceded by a backslash is - interpreted as a literal double quotation mark. - - 4) Backslashes are interpreted literally, unless they - immediately precede a double quotation mark. - - 5) If backslashes immediately precede a double quotation mark, - every pair of backslashes is interpreted as a literal - backslash. If the number of backslashes is odd, the last - backslash escapes the next double quotation mark as - described in rule 3. - """ - - # See - # http://msdn.microsoft.com/en-us/library/17w5ykft.aspx - # or search http://msdn.microsoft.com for - # "Parsing C++ Command-Line Arguments" - result = [] - needquote = False - for arg in seq: - bs_buf = [] - - # Add a space to separate this argument from the others - if result: - result.append(' ') - - needquote = (" " in arg) or ("\t" in arg) or not arg - if needquote: - result.append('"') - - for c in arg: - if c == '\\': - # Don't know if we need to double yet. - bs_buf.append(c) - elif c == '"': - # Double backslashes. - result.append('\\' * len(bs_buf) * 2) - bs_buf = [] - result.append('\\"') - else: - # Normal char - if bs_buf: - result.extend(bs_buf) - bs_buf = [] - result.append(c) - - # Add remaining backslashes, if any. - if bs_buf: - result.extend(bs_buf) - - if needquote: - result.extend(bs_buf) - result.append('"') - - return ''.join(result) - - -class Popen(object): - - def __init__(self, args, bufsize=0, executable=None, - stdin=None, stdout=None, stderr=None, - preexec_fn=None, close_fds=False, shell=False, - cwd=None, env=None, universal_newlines=False, - startupinfo=None, creationflags=0): - """Create new Popen instance.""" - _cleanup() - - self._child_created = False - if not isinstance(bufsize, (int, long)): - raise TypeError("bufsize must be an integer") - - if mswindows: - if preexec_fn is not None: - raise ValueError("preexec_fn is not supported on Windows " - "platforms") - if close_fds and (stdin is not None or stdout is not None or - stderr is not None): - raise ValueError("close_fds is not supported on Windows " - "platforms if you redirect " - "stdin/stdout/stderr") - else: - # POSIX - if startupinfo is not None: - raise ValueError("startupinfo is only supported on Windows " - "platforms") - if creationflags != 0: - raise ValueError("creationflags is only supported on Windows " - "platforms") - - self.stdin = None - self.stdout = None - self.stderr = None - self.pid = None - self.returncode = None - self.universal_newlines = universal_newlines - - # Input and output objects. The general principle is like - # this: - # - # Parent Child - # ------ ----- - # p2cwrite ---stdin---> p2cread - # c2pread <--stdout--- c2pwrite - # errread <--stderr--- errwrite - # - # On POSIX, the child objects are file descriptors. On - # Windows, these are Windows file handles. The parent objects - # are file descriptors on both platforms. The parent objects - # are None when not using PIPEs. The child objects are None - # when not redirecting. - - (p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite) = self._get_handles(stdin, stdout, stderr) - - self._execute_child(args, executable, preexec_fn, close_fds, - cwd, env, universal_newlines, - startupinfo, creationflags, shell, - p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite) - - if mswindows: - if p2cwrite is not None: - p2cwrite = msvcrt.open_osfhandle(p2cwrite.Detach(), 0) - if c2pread is not None: - c2pread = msvcrt.open_osfhandle(c2pread.Detach(), 0) - if errread is not None: - errread = msvcrt.open_osfhandle(errread.Detach(), 0) - - if p2cwrite is not None: - self.stdin = os.fdopen(p2cwrite, 'wb', bufsize) - if c2pread is not None: - if universal_newlines: - self.stdout = os.fdopen(c2pread, 'rU', bufsize) - else: - self.stdout = os.fdopen(c2pread, 'rb', bufsize) - if errread is not None: - if universal_newlines: - self.stderr = os.fdopen(errread, 'rU', bufsize) - else: - self.stderr = os.fdopen(errread, 'rb', bufsize) - - def _translate_newlines(self, data): - data = data.replace("\r\n", "\n") - data = data.replace("\r", "\n") - return data - - def __del__(self, _maxint=sys.maxint, _active=_active): - # If __init__ hasn't had a chance to execute (e.g. if it - # was passed an undeclared keyword argument), we don't - # have a _child_created attribute at all. - if not getattr(self, '_child_created', False): - # We didn't get to successfully create a child process. - return - # In case the child hasn't been waited on, check if it's done. - self._internal_poll(_deadstate=_maxint) - if self.returncode is None and _active is not None: - # Child is still running, keep us alive until we can wait on it. - _active.append(self) - - def communicate(self, input=None): - """Interact with process: Send data to stdin. Read data from - stdout and stderr, until end-of-file is reached. Wait for - process to terminate. The optional input argument should be a - string to be sent to the child process, or None, if no data - should be sent to the child. - - communicate() returns a tuple (stdout, stderr).""" - - # Optimization: If we are only using one pipe, or no pipe at - # all, using select() or threads is unnecessary. - if [self.stdin, self.stdout, self.stderr].count(None) >= 2: - stdout = None - stderr = None - if self.stdin: - if input: - try: - self.stdin.write(input) - except IOError, e: - if e.errno != errno.EPIPE and e.errno != errno.EINVAL: - raise - self.stdin.close() - elif self.stdout: - stdout = _eintr_retry_call(self.stdout.read) - self.stdout.close() - elif self.stderr: - stderr = _eintr_retry_call(self.stderr.read) - self.stderr.close() - self.wait() - return (stdout, stderr) - - return self._communicate(input) - - def poll(self): - return self._internal_poll() - - if mswindows: - # - # Windows methods - # - def _get_handles(self, stdin, stdout, stderr): - """Construct and return tuple with IO objects: - p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite - """ - if stdin is None and stdout is None and stderr is None: - return (None, None, None, None, None, None) - - p2cread, p2cwrite = None, None - c2pread, c2pwrite = None, None - errread, errwrite = None, None - - if stdin is None: - p2cread = _subprocess.GetStdHandle( - _subprocess.STD_INPUT_HANDLE) - if p2cread is None: - p2cread, _ = _subprocess.CreatePipe(None, 0) - elif stdin == PIPE: - p2cread, p2cwrite = _subprocess.CreatePipe(None, 0) - elif isinstance(stdin, int): - p2cread = msvcrt.get_osfhandle(stdin) - else: - # Assuming file-like object - p2cread = msvcrt.get_osfhandle(stdin.fileno()) - p2cread = self._make_inheritable(p2cread) - - if stdout is None: - c2pwrite = _subprocess.GetStdHandle( - _subprocess.STD_OUTPUT_HANDLE) - if c2pwrite is None: - _, c2pwrite = _subprocess.CreatePipe(None, 0) - elif stdout == PIPE: - c2pread, c2pwrite = _subprocess.CreatePipe(None, 0) - elif isinstance(stdout, int): - c2pwrite = msvcrt.get_osfhandle(stdout) - else: - # Assuming file-like object - c2pwrite = msvcrt.get_osfhandle(stdout.fileno()) - c2pwrite = self._make_inheritable(c2pwrite) - - if stderr is None: - errwrite = _subprocess.GetStdHandle( - _subprocess.STD_ERROR_HANDLE) - if errwrite is None: - _, errwrite = _subprocess.CreatePipe(None, 0) - elif stderr == PIPE: - errread, errwrite = _subprocess.CreatePipe(None, 0) - elif stderr == STDOUT: - errwrite = c2pwrite - elif isinstance(stderr, int): - errwrite = msvcrt.get_osfhandle(stderr) - else: - # Assuming file-like object - errwrite = msvcrt.get_osfhandle(stderr.fileno()) - errwrite = self._make_inheritable(errwrite) - - return (p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite) - - def _make_inheritable(self, handle): - """Return a duplicate of handle, which is inheritable""" - return _subprocess.DuplicateHandle( - _subprocess.GetCurrentProcess(), - handle, - _subprocess.GetCurrentProcess(), - 0, - 1, - _subprocess.DUPLICATE_SAME_ACCESS - ) - - def _find_w9xpopen(self): - """Find and return absolut path to w9xpopen.exe""" - w9xpopen = os.path.join( - os.path.dirname(_subprocess.GetModuleFileName(0)), - "w9xpopen.exe") - if not os.path.exists(w9xpopen): - # Eeek - file-not-found - possibly an embedding - # situation - see if we can locate it in sys.exec_prefix - w9xpopen = os.path.join(os.path.dirname(sys.exec_prefix), - "w9xpopen.exe") - if not os.path.exists(w9xpopen): - raise RuntimeError("Cannot locate w9xpopen.exe, which is " - "needed for Popen to work with your " - "shell or platform.") - return w9xpopen - - def _execute_child(self, args, executable, preexec_fn, close_fds, - cwd, env, universal_newlines, - startupinfo, creationflags, shell, - p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite): - """Execute program (MS Windows version)""" - - if not isinstance(args, types.StringTypes): - args = list2cmdline(args) - - # Process startup details - if startupinfo is None: - startupinfo = STARTUPINFO() - if None not in (p2cread, c2pwrite, errwrite): - startupinfo.dwFlags |= _subprocess.STARTF_USESTDHANDLES - startupinfo.hStdInput = p2cread - startupinfo.hStdOutput = c2pwrite - startupinfo.hStdError = errwrite - - if shell: - startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW - startupinfo.wShowWindow = _subprocess.SW_HIDE - comspec = os.environ.get("COMSPEC", "cmd.exe") - args = '{} /c "{}"'.format(comspec, args) - if (_subprocess.GetVersion() >= 0x80000000 or - os.path.basename(comspec).lower() == "command.com"): - # Win9x, or using command.com on NT. We need to - # use the w9xpopen intermediate program. For more - # information, see KB Q150956 - # (http://web.archive.org/web/20011105084002/http://support.microsoft.com/support/kb/articles/Q150/9/56.asp) - w9xpopen = self._find_w9xpopen() - args = '"%s" %s' % (w9xpopen, args) - # Not passing CREATE_NEW_CONSOLE has been known to - # cause random failures on win9x. Specifically a - # dialog: "Your program accessed mem currently in - # use at xxx" and a hopeful warning about the - # stability of your system. Cost is Ctrl+C wont - # kill children. - creationflags |= _subprocess.CREATE_NEW_CONSOLE - - # Start the process - try: - try: - hp, ht, pid, tid = _subprocess.CreateProcess( - executable, args, - # no special - # security - None, None, - int(not close_fds), - creationflags, - env, - cwd, - startupinfo) - except pywintypes.error, e: - # Translate pywintypes.error to WindowsError, which is - # a subclass of OSError. FIXME: We should really - # translate errno using _sys_errlist (or similar), but - # how can this be done from Python? - raise WindowsError(*e.args) - finally: - # Child is launched. Close the parent's copy of those pipe - # handles that only the child should have open. You need - # to make sure that no handles to the write end of the - # output pipe are maintained in this process or else the - # pipe will not close when the child process exits and the - # ReadFile will hang. - if p2cread is not None: - p2cread.Close() - if c2pwrite is not None: - c2pwrite.Close() - if errwrite is not None: - errwrite.Close() - - # Retain the process handle, but close the thread handle - self._child_created = True - self._handle = hp - self.pid = pid - ht.Close() - - def _internal_poll( - self, _deadstate=None, - _WaitForSingleObject=_subprocess.WaitForSingleObject, - _WAIT_OBJECT_0=_subprocess.WAIT_OBJECT_0, - _GetExitCodeProcess=_subprocess.GetExitCodeProcess - ): - """Check if child process has terminated. Returns returncode - attribute. - - This method is called by __del__, so it can only refer to objects - in its local scope. - - """ - if self.returncode is None: - if _WaitForSingleObject(self._handle, 0) == _WAIT_OBJECT_0: - self.returncode = _GetExitCodeProcess(self._handle) - return self.returncode - - def wait(self): - """Wait for child process to terminate. Returns returncode - attribute.""" - if self.returncode is None: - _subprocess.WaitForSingleObject(self._handle, - _subprocess.INFINITE) - self.returncode = _subprocess.GetExitCodeProcess(self._handle) - return self.returncode - - def _readerthread(self, fh, buffer): - buffer.append(fh.read()) - - def _communicate(self, input): - stdout = None # Return - stderr = None # Return - - if self.stdout: - stdout = [] - stdout_thread = threading.Thread(target=self._readerthread, - args=(self.stdout, stdout)) - stdout_thread.setDaemon(True) - stdout_thread.start() - if self.stderr: - stderr = [] - stderr_thread = threading.Thread(target=self._readerthread, - args=(self.stderr, stderr)) - stderr_thread.setDaemon(True) - stderr_thread.start() - - if self.stdin: - if input is not None: - try: - self.stdin.write(input) - except IOError, e: - if e.errno != errno.EPIPE: - raise - self.stdin.close() - - if self.stdout: - stdout_thread.join() - if self.stderr: - stderr_thread.join() - - # All data exchanged. Translate lists into strings. - if stdout is not None: - stdout = stdout[0] - if stderr is not None: - stderr = stderr[0] - - # Translate newlines, if requested. We cannot let the file - # object do the translation: It is based on stdio, which is - # impossible to combine with select (unless forcing no - # buffering). - if self.universal_newlines and hasattr(file, 'newlines'): - if stdout: - stdout = self._translate_newlines(stdout) - if stderr: - stderr = self._translate_newlines(stderr) - - self.wait() - return (stdout, stderr) - - def send_signal(self, sig): - """Send a signal to the process - """ - if sig == signal.SIGTERM: - self.terminate() - elif sig == signal.CTRL_C_EVENT: - os.kill(self.pid, signal.CTRL_C_EVENT) - elif sig == signal.CTRL_BREAK_EVENT: - os.kill(self.pid, signal.CTRL_BREAK_EVENT) - else: - raise ValueError("Unsupported signal: {}".format(sig)) - - def terminate(self): - """Terminates the process - """ - _subprocess.TerminateProcess(self._handle, 1) - - kill = terminate - - else: - # - # POSIX methods - # - def _get_handles(self, stdin, stdout, stderr): - """Construct and return tuple with IO objects: - p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite - """ - p2cread, p2cwrite = None, None - c2pread, c2pwrite = None, None - errread, errwrite = None, None - - if stdin is None: - pass - elif stdin == PIPE: - p2cread, p2cwrite = self.pipe_cloexec() - elif isinstance(stdin, int): - p2cread = stdin - else: - # Assuming file-like object - p2cread = stdin.fileno() - - if stdout is None: - pass - elif stdout == PIPE: - c2pread, c2pwrite = self.pipe_cloexec() - elif isinstance(stdout, int): - c2pwrite = stdout - else: - # Assuming file-like object - c2pwrite = stdout.fileno() - - if stderr is None: - pass - elif stderr == PIPE: - errread, errwrite = self.pipe_cloexec() - elif stderr == STDOUT: - errwrite = c2pwrite - elif isinstance(stderr, int): - errwrite = stderr - else: - # Assuming file-like object - errwrite = stderr.fileno() - - return (p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite) - - def _set_cloexec_flag(self, fd, cloexec=True): - try: - cloexec_flag = fcntl.FD_CLOEXEC - except AttributeError: - cloexec_flag = 1 - - old = fcntl.fcntl(fd, fcntl.F_GETFD) - if cloexec: - fcntl.fcntl(fd, fcntl.F_SETFD, old | cloexec_flag) - else: - fcntl.fcntl(fd, fcntl.F_SETFD, old & ~cloexec_flag) - - def pipe_cloexec(self): - """Create a pipe with FDs set CLOEXEC.""" - # Pipes' FDs are set CLOEXEC by default because we don't want them - # to be inherited by other subprocesses: the CLOEXEC flag is - # removed from the child's FDs by _dup2(), between fork() and - # exec(). - # This is not atomic: we would need the pipe2() syscall for that. - r, w = os.pipe() - self._set_cloexec_flag(r) - self._set_cloexec_flag(w) - return r, w - - def _close_fds(self, but): - if hasattr(os, 'closerange'): - os.closerange(3, but) - os.closerange(but + 1, MAXFD) - else: - for i in xrange(3, MAXFD): - if i == but: - continue - try: - os.close(i) - except: - pass - - def _execute_child(self, args, executable, preexec_fn, close_fds, - cwd, env, universal_newlines, - startupinfo, creationflags, shell, - p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite): - """Execute program (POSIX version)""" - - if isinstance(args, types.StringTypes): - args = [args] - else: - args = list(args) - - if shell: - args = ["/bin/sh", "-c"] + args - if executable: - args[0] = executable - - if executable is None: - executable = args[0] - - # For transferring possible exec failure from child to parent - # The first char specifies the exception type: 0 means - # OSError, 1 means some other error. - errpipe_read, errpipe_write = self.pipe_cloexec() - try: - try: - gc_was_enabled = gc.isenabled() - # Disable gc to avoid bug where gc -> file_dealloc -> - # write to stderr -> hang. - # http://bugs.python.org/issue1336 - gc.disable() - try: - self.pid = os.fork() - except: - if gc_was_enabled: - gc.enable() - raise - self._child_created = True - if self.pid == 0: - # Child - try: - # Close parent's pipe ends - if p2cwrite is not None: - os.close(p2cwrite) - if c2pread is not None: - os.close(c2pread) - if errread is not None: - os.close(errread) - os.close(errpipe_read) - - # When duping fds, if there arises a situation - # where one of the fds is either 0, 1 or 2, it - # is possible that it is overwritten (#12607). - if c2pwrite == 0: - c2pwrite = os.dup(c2pwrite) - if errwrite == 0 or errwrite == 1: - errwrite = os.dup(errwrite) - - # Dup fds for child - def _dup2(a, b): - # dup2() removes the CLOEXEC flag but - # we must do it ourselves if dup2() - # would be a no-op (issue #10806). - if a == b: - self._set_cloexec_flag(a, False) - elif a is not None: - os.dup2(a, b) - _dup2(p2cread, 0) - _dup2(c2pwrite, 1) - _dup2(errwrite, 2) - - # Close pipe fds. Make sure we don't close the - # same fd more than once, or standard fds. - closed = set([None]) - for fd in [p2cread, c2pwrite, errwrite]: - if fd not in closed and fd > 2: - os.close(fd) - closed.add(fd) - - # Close all other fds, if asked for - if close_fds: - self._close_fds(but=errpipe_write) - - if cwd is not None: - os.chdir(cwd) - - if preexec_fn: - preexec_fn() - - if env is None: - os.execvp(executable, args) - else: - os.execvpe(executable, args, env) - - except: - exc_type, exc_value, tb = sys.exc_info() - # Save the traceback and attach it to the exception - # object - exc_lines = traceback.format_exception(exc_type, - exc_value, - tb) - exc_value.child_traceback = ''.join(exc_lines) - os.write(errpipe_write, pickle.dumps(exc_value)) - - # This exitcode won't be reported to applications, - # so it really doesn't matter what we return. - os._exit(255) - - # Parent - if gc_was_enabled: - gc.enable() - finally: - # be sure the FD is closed no matter what - os.close(errpipe_write) - - if p2cread is not None and p2cwrite is not None: - os.close(p2cread) - if c2pwrite is not None and c2pread is not None: - os.close(c2pwrite) - if errwrite is not None and errread is not None: - os.close(errwrite) - - # Wait for exec to fail or succeed; possibly raising exception - # Exception limited to 1M - data = _eintr_retry_call(os.read, errpipe_read, 1048576) - finally: - # be sure the FD is closed no matter what - os.close(errpipe_read) - - if data != "": - try: - _eintr_retry_call(os.waitpid, self.pid, 0) - except OSError, e: - if e.errno != errno.ECHILD: - raise - child_exception = pickle.loads(data) - for fd in (p2cwrite, c2pread, errread): - if fd is not None: - os.close(fd) - raise child_exception - - def _handle_exitstatus(self, sts, _WIFSIGNALED=os.WIFSIGNALED, - _WTERMSIG=os.WTERMSIG, _WIFEXITED=os.WIFEXITED, - _WEXITSTATUS=os.WEXITSTATUS): - # This method is called (indirectly) by __del__, so it cannot - # refer to anything outside of its local scope.""" - if _WIFSIGNALED(sts): - self.returncode = -_WTERMSIG(sts) - elif _WIFEXITED(sts): - self.returncode = _WEXITSTATUS(sts) - else: - # Should never happen - raise RuntimeError("Unknown child exit status!") - - def _internal_poll(self, _deadstate=None, _waitpid=os.waitpid, - _WNOHANG=os.WNOHANG, _os_error=os.error): - """Check if child process has terminated. Returns returncode - attribute. - - This method is called by __del__, so it cannot reference anything - outside of the local scope (nor can any methods it calls). - - """ - if self.returncode is None: - try: - pid, sts = _waitpid(self.pid, _WNOHANG) - if pid == self.pid: - self._handle_exitstatus(sts) - except _os_error: - if _deadstate is not None: - self.returncode = _deadstate - return self.returncode - - def wait(self): - """Wait for child process to terminate. Returns returncode - attribute.""" - if self.returncode is None: - try: - pid, sts = _eintr_retry_call(os.waitpid, self.pid, 0) - except OSError, e: - if e.errno != errno.ECHILD: - raise - # This happens if SIGCLD is set to be ignored or waiting - # for child processes has otherwise been disabled for our - # process. This child is dead, we can't get the status. - sts = 0 - self._handle_exitstatus(sts) - return self.returncode - - def _communicate(self, input): - if self.stdin: - # Flush stdio buffer. This might block, if the user has - # been writing to .stdin in an uncontrolled fashion. - self.stdin.flush() - if not input: - self.stdin.close() - - if _has_poll: - stdout, stderr = self._communicate_with_poll(input) - else: - stdout, stderr = self._communicate_with_select(input) - - # All data exchanged. Translate lists into strings. - if stdout is not None: - stdout = ''.join(stdout) - if stderr is not None: - stderr = ''.join(stderr) - - # Translate newlines, if requested. We cannot let the file - # object do the translation: It is based on stdio, which is - # impossible to combine with select (unless forcing no - # buffering). - if self.universal_newlines and hasattr(file, 'newlines'): - if stdout: - stdout = self._translate_newlines(stdout) - if stderr: - stderr = self._translate_newlines(stderr) - - self.wait() - return (stdout, stderr) - - def _communicate_with_poll(self, input): - stdout = None # Return - stderr = None # Return - fd2file = {} - fd2output = {} - - poller = select.poll() - - def register_and_append(file_obj, eventmask): - poller.register(file_obj.fileno(), eventmask) - fd2file[file_obj.fileno()] = file_obj - - def close_unregister_and_remove(fd): - poller.unregister(fd) - fd2file[fd].close() - fd2file.pop(fd) - - if self.stdin and input: - register_and_append(self.stdin, select.POLLOUT) - - select_POLLIN_POLLPRI = select.POLLIN | select.POLLPRI - if self.stdout: - register_and_append(self.stdout, select_POLLIN_POLLPRI) - fd2output[self.stdout.fileno()] = stdout = [] - if self.stderr: - register_and_append(self.stderr, select_POLLIN_POLLPRI) - fd2output[self.stderr.fileno()] = stderr = [] - - input_offset = 0 - while fd2file: - try: - ready = poller.poll() - except select.error, e: - if e.args[0] == errno.EINTR: - continue - raise - - for fd, mode in ready: - if mode & select.POLLOUT: - chunk = input[input_offset: input_offset + _PIPE_BUF] - try: - input_offset += os.write(fd, chunk) - except OSError, e: - if e.errno == errno.EPIPE: - close_unregister_and_remove(fd) - else: - raise - else: - if input_offset >= len(input): - close_unregister_and_remove(fd) - elif mode & select_POLLIN_POLLPRI: - data = os.read(fd, 4096) - if not data: - close_unregister_and_remove(fd) - fd2output[fd].append(data) - else: - # Ignore hang up or errors. - close_unregister_and_remove(fd) - - return (stdout, stderr) - - def _communicate_with_select(self, input): - read_set = [] - write_set = [] - stdout = None # Return - stderr = None # Return - - if self.stdin and input: - write_set.append(self.stdin) - if self.stdout: - read_set.append(self.stdout) - stdout = [] - if self.stderr: - read_set.append(self.stderr) - stderr = [] - - input_offset = 0 - while read_set or write_set: - try: - rlist, wlist, xlist = select.select( - read_set, write_set, []) - except select.error, e: - if e.args[0] == errno.EINTR: - continue - raise - - if self.stdin in wlist: - chunk = input[input_offset: input_offset + _PIPE_BUF] - try: - bytes_written = os.write(self.stdin.fileno(), chunk) - except OSError, e: - if e.errno == errno.EPIPE: - self.stdin.close() - write_set.remove(self.stdin) - else: - raise - else: - input_offset += bytes_written - if input_offset >= len(input): - self.stdin.close() - write_set.remove(self.stdin) - - if self.stdout in rlist: - data = os.read(self.stdout.fileno(), 1024) - if data == "": - self.stdout.close() - read_set.remove(self.stdout) - stdout.append(data) - - if self.stderr in rlist: - data = os.read(self.stderr.fileno(), 1024) - if data == "": - self.stderr.close() - read_set.remove(self.stderr) - stderr.append(data) - - return (stdout, stderr) - - def send_signal(self, sig): - """Send a signal to the process - """ - os.kill(self.pid, sig) - - def terminate(self): - """Terminate the process with SIGTERM - """ - self.send_signal(signal.SIGTERM) - - def kill(self): - """Kill the process with SIGKILL - """ - self.send_signal(signal.SIGKILL) - - -def _demo_posix(): - # - # Example 1: Simple redirection: Get process list - # - plist = Popen(["ps"], stdout=PIPE).communicate()[0] - print "Process list:" - print plist - - # - # Example 2: Change uid before executing child - # - if os.getuid() == 0: - p = Popen(["id"], preexec_fn=lambda: os.setuid(100)) - p.wait() - - # - # Example 3: Connecting several subprocesses - # - print "Looking for 'hda'..." - p1 = Popen(["dmesg"], stdout=PIPE) - p2 = Popen(["grep", "hda"], stdin=p1.stdout, stdout=PIPE) - print repr(p2.communicate()[0]) - - # - # Example 4: Catch execution error - # - print - print "Trying a weird file..." - try: - print Popen(["/this/path/does/not/exist"]).communicate() - except OSError, e: - if e.errno == errno.ENOENT: - print "The file didn't exist. I thought so..." - print "Child traceback:" - print e.child_traceback - else: - print "Error", e.errno - else: - print >>sys.stderr, "Gosh. No error." - - -def _demo_windows(): - # - # Example 1: Connecting several subprocesses - # - print "Looking for 'PROMPT' in set output..." - p1 = Popen("set", stdout=PIPE, shell=True) - p2 = Popen('find "PROMPT"', stdin=p1.stdout, stdout=PIPE) - print repr(p2.communicate()[0]) - - # - # Example 2: Simple execution of program - # - print "Executing calc..." - p = Popen("calc") - p.wait() - - -if __name__ == "__main__": - if mswindows: - _demo_windows() - else: - _demo_posix() diff --git a/lib/cherrypy/_cpconfig.py b/lib/cherrypy/_cpconfig.py deleted file mode 100644 index c11bc1d1..00000000 --- a/lib/cherrypy/_cpconfig.py +++ /dev/null @@ -1,317 +0,0 @@ -""" -Configuration system for CherryPy. - -Configuration in CherryPy is implemented via dictionaries. Keys are strings -which name the mapped value, which may be of any type. - - -Architecture ------------- - -CherryPy Requests are part of an Application, which runs in a global context, -and configuration data may apply to any of those three scopes: - -Global - Configuration entries which apply everywhere are stored in - cherrypy.config. - -Application - Entries which apply to each mounted application are stored - on the Application object itself, as 'app.config'. This is a two-level - dict where each key is a path, or "relative URL" (for example, "/" or - "/path/to/my/page"), and each value is a config dict. Usually, this - data is provided in the call to tree.mount(root(), config=conf), - although you may also use app.merge(conf). - -Request - Each Request object possesses a single 'Request.config' dict. - Early in the request process, this dict is populated by merging global - config entries, Application entries (whose path equals or is a parent - of Request.path_info), and any config acquired while looking up the - page handler (see next). - - -Declaration ------------ - -Configuration data may be supplied as a Python dictionary, as a filename, -or as an open file object. When you supply a filename or file, CherryPy -uses Python's builtin ConfigParser; you declare Application config by -writing each path as a section header:: - - [/path/to/my/page] - request.stream = True - -To declare global configuration entries, place them in a [global] section. - -You may also declare config entries directly on the classes and methods -(page handlers) that make up your CherryPy application via the ``_cp_config`` -attribute. For example:: - - class Demo: - _cp_config = {'tools.gzip.on': True} - - def index(self): - return "Hello world" - index.exposed = True - index._cp_config = {'request.show_tracebacks': False} - -.. note:: - - This behavior is only guaranteed for the default dispatcher. - Other dispatchers may have different restrictions on where - you can attach _cp_config attributes. - - -Namespaces ----------- - -Configuration keys are separated into namespaces by the first "." in the key. -Current namespaces: - -engine - Controls the 'application engine', including autoreload. - These can only be declared in the global config. - -tree - Grafts cherrypy.Application objects onto cherrypy.tree. - These can only be declared in the global config. - -hooks - Declares additional request-processing functions. - -log - Configures the logging for each application. - These can only be declared in the global or / config. - -request - Adds attributes to each Request. - -response - Adds attributes to each Response. - -server - Controls the default HTTP server via cherrypy.server. - These can only be declared in the global config. - -tools - Runs and configures additional request-processing packages. - -wsgi - Adds WSGI middleware to an Application's "pipeline". - These can only be declared in the app's root config ("/"). - -checker - Controls the 'checker', which looks for common errors in - app state (including config) when the engine starts. - Global config only. - -The only key that does not exist in a namespace is the "environment" entry. -This special entry 'imports' other config entries from a template stored in -cherrypy._cpconfig.environments[environment]. It only applies to the global -config, and only when you use cherrypy.config.update. - -You can define your own namespaces to be called at the Global, Application, -or Request level, by adding a named handler to cherrypy.config.namespaces, -app.namespaces, or app.request_class.namespaces. The name can -be any string, and the handler must be either a callable or a (Python 2.5 -style) context manager. -""" - -import cherrypy -from cherrypy._cpcompat import set, basestring -from cherrypy.lib import reprconf - -# Deprecated in CherryPy 3.2--remove in 3.3 -NamespaceSet = reprconf.NamespaceSet - - -def merge(base, other): - """Merge one app config (from a dict, file, or filename) into another. - - If the given config is a filename, it will be appended to - the list of files to monitor for "autoreload" changes. - """ - if isinstance(other, basestring): - cherrypy.engine.autoreload.files.add(other) - - # Load other into base - for section, value_map in reprconf.as_dict(other).items(): - if not isinstance(value_map, dict): - raise ValueError( - "Application config must include section headers, but the " - "config you tried to merge doesn't have any sections. " - "Wrap your config in another dict with paths as section " - "headers, for example: {'/': config}.") - base.setdefault(section, {}).update(value_map) - - -class Config(reprconf.Config): - - """The 'global' configuration data for the entire CherryPy process.""" - - def update(self, config): - """Update self from a dict, file or filename.""" - if isinstance(config, basestring): - # Filename - cherrypy.engine.autoreload.files.add(config) - reprconf.Config.update(self, config) - - def _apply(self, config): - """Update self from a dict.""" - if isinstance(config.get("global"), dict): - if len(config) > 1: - cherrypy.checker.global_config_contained_paths = True - config = config["global"] - if 'tools.staticdir.dir' in config: - config['tools.staticdir.section'] = "global" - reprconf.Config._apply(self, config) - - def __call__(self, *args, **kwargs): - """Decorator for page handlers to set _cp_config.""" - if args: - raise TypeError( - "The cherrypy.config decorator does not accept positional " - "arguments; you must use keyword arguments.") - - def tool_decorator(f): - if not hasattr(f, "_cp_config"): - f._cp_config = {} - for k, v in kwargs.items(): - f._cp_config[k] = v - return f - return tool_decorator - - -# Sphinx begin config.environments -Config.environments = environments = { - "staging": { - 'engine.autoreload.on': False, - 'checker.on': False, - 'tools.log_headers.on': False, - 'request.show_tracebacks': False, - 'request.show_mismatched_params': False, - }, - "production": { - 'engine.autoreload.on': False, - 'checker.on': False, - 'tools.log_headers.on': False, - 'request.show_tracebacks': False, - 'request.show_mismatched_params': False, - 'log.screen': False, - }, - "embedded": { - # For use with CherryPy embedded in another deployment stack. - 'engine.autoreload.on': False, - 'checker.on': False, - 'tools.log_headers.on': False, - 'request.show_tracebacks': False, - 'request.show_mismatched_params': False, - 'log.screen': False, - 'engine.SIGHUP': None, - 'engine.SIGTERM': None, - }, - "test_suite": { - 'engine.autoreload.on': False, - 'checker.on': False, - 'tools.log_headers.on': False, - 'request.show_tracebacks': True, - 'request.show_mismatched_params': True, - 'log.screen': False, - }, -} -# Sphinx end config.environments - - -def _server_namespace_handler(k, v): - """Config handler for the "server" namespace.""" - atoms = k.split(".", 1) - if len(atoms) > 1: - # Special-case config keys of the form 'server.servername.socket_port' - # to configure additional HTTP servers. - if not hasattr(cherrypy, "servers"): - cherrypy.servers = {} - - servername, k = atoms - if servername not in cherrypy.servers: - from cherrypy import _cpserver - cherrypy.servers[servername] = _cpserver.Server() - # On by default, but 'on = False' can unsubscribe it (see below). - cherrypy.servers[servername].subscribe() - - if k == 'on': - if v: - cherrypy.servers[servername].subscribe() - else: - cherrypy.servers[servername].unsubscribe() - else: - setattr(cherrypy.servers[servername], k, v) - else: - setattr(cherrypy.server, k, v) -Config.namespaces["server"] = _server_namespace_handler - - -def _engine_namespace_handler(k, v): - """Backward compatibility handler for the "engine" namespace.""" - engine = cherrypy.engine - - deprecated = { - 'autoreload_on': 'autoreload.on', - 'autoreload_frequency': 'autoreload.frequency', - 'autoreload_match': 'autoreload.match', - 'reload_files': 'autoreload.files', - 'deadlock_poll_freq': 'timeout_monitor.frequency' - } - - if k in deprecated: - engine.log( - 'WARNING: Use of engine.%s is deprecated and will be removed in a ' - 'future version. Use engine.%s instead.' % (k, deprecated[k])) - - if k == 'autoreload_on': - if v: - engine.autoreload.subscribe() - else: - engine.autoreload.unsubscribe() - elif k == 'autoreload_frequency': - engine.autoreload.frequency = v - elif k == 'autoreload_match': - engine.autoreload.match = v - elif k == 'reload_files': - engine.autoreload.files = set(v) - elif k == 'deadlock_poll_freq': - engine.timeout_monitor.frequency = v - elif k == 'SIGHUP': - engine.listeners['SIGHUP'] = set([v]) - elif k == 'SIGTERM': - engine.listeners['SIGTERM'] = set([v]) - elif "." in k: - plugin, attrname = k.split(".", 1) - plugin = getattr(engine, plugin) - if attrname == 'on': - if v and hasattr(getattr(plugin, 'subscribe', None), '__call__'): - plugin.subscribe() - return - elif ( - (not v) and - hasattr(getattr(plugin, 'unsubscribe', None), '__call__') - ): - plugin.unsubscribe() - return - setattr(plugin, attrname, v) - else: - setattr(engine, k, v) -Config.namespaces["engine"] = _engine_namespace_handler - - -def _tree_namespace_handler(k, v): - """Namespace handler for the 'tree' config namespace.""" - if isinstance(v, dict): - for script_name, app in v.items(): - cherrypy.tree.graft(app, script_name) - cherrypy.engine.log("Mounted: %s on %s" % - (app, script_name or "/")) - else: - cherrypy.tree.graft(v, v.script_name) - cherrypy.engine.log("Mounted: %s on %s" % (v, v.script_name or "/")) -Config.namespaces["tree"] = _tree_namespace_handler diff --git a/lib/cherrypy/_cpdispatch.py b/lib/cherrypy/_cpdispatch.py deleted file mode 100644 index 1c2d7df8..00000000 --- a/lib/cherrypy/_cpdispatch.py +++ /dev/null @@ -1,686 +0,0 @@ -"""CherryPy dispatchers. - -A 'dispatcher' is the object which looks up the 'page handler' callable -and collects config for the current request based on the path_info, other -request attributes, and the application architecture. The core calls the -dispatcher as early as possible, passing it a 'path_info' argument. - -The default dispatcher discovers the page handler by matching path_info -to a hierarchical arrangement of objects, starting at request.app.root. -""" - -import string -import sys -import types -try: - classtype = (type, types.ClassType) -except AttributeError: - classtype = type - -import cherrypy -from cherrypy._cpcompat import set - - -class PageHandler(object): - - """Callable which sets response.body.""" - - def __init__(self, callable, *args, **kwargs): - self.callable = callable - self.args = args - self.kwargs = kwargs - - def get_args(self): - return cherrypy.serving.request.args - - def set_args(self, args): - cherrypy.serving.request.args = args - return cherrypy.serving.request.args - - args = property( - get_args, - set_args, - doc="The ordered args should be accessible from post dispatch hooks" - ) - - def get_kwargs(self): - return cherrypy.serving.request.kwargs - - def set_kwargs(self, kwargs): - cherrypy.serving.request.kwargs = kwargs - return cherrypy.serving.request.kwargs - - kwargs = property( - get_kwargs, - set_kwargs, - doc="The named kwargs should be accessible from post dispatch hooks" - ) - - def __call__(self): - try: - return self.callable(*self.args, **self.kwargs) - except TypeError: - x = sys.exc_info()[1] - try: - test_callable_spec(self.callable, self.args, self.kwargs) - except cherrypy.HTTPError: - raise sys.exc_info()[1] - except: - raise x - raise - - -def test_callable_spec(callable, callable_args, callable_kwargs): - """ - Inspect callable and test to see if the given args are suitable for it. - - When an error occurs during the handler's invoking stage there are 2 - erroneous cases: - 1. Too many parameters passed to a function which doesn't define - one of *args or **kwargs. - 2. Too little parameters are passed to the function. - - There are 3 sources of parameters to a cherrypy handler. - 1. query string parameters are passed as keyword parameters to the - handler. - 2. body parameters are also passed as keyword parameters. - 3. when partial matching occurs, the final path atoms are passed as - positional args. - Both the query string and path atoms are part of the URI. If they are - incorrect, then a 404 Not Found should be raised. Conversely the body - parameters are part of the request; if they are invalid a 400 Bad Request. - """ - show_mismatched_params = getattr( - cherrypy.serving.request, 'show_mismatched_params', False) - try: - (args, varargs, varkw, defaults) = getargspec(callable) - except TypeError: - if isinstance(callable, object) and hasattr(callable, '__call__'): - (args, varargs, varkw, - defaults) = getargspec(callable.__call__) - else: - # If it wasn't one of our own types, re-raise - # the original error - raise - - if args and args[0] == 'self': - args = args[1:] - - arg_usage = dict([(arg, 0,) for arg in args]) - vararg_usage = 0 - varkw_usage = 0 - extra_kwargs = set() - - for i, value in enumerate(callable_args): - try: - arg_usage[args[i]] += 1 - except IndexError: - vararg_usage += 1 - - for key in callable_kwargs.keys(): - try: - arg_usage[key] += 1 - except KeyError: - varkw_usage += 1 - extra_kwargs.add(key) - - # figure out which args have defaults. - args_with_defaults = args[-len(defaults or []):] - for i, val in enumerate(defaults or []): - # Defaults take effect only when the arg hasn't been used yet. - if arg_usage[args_with_defaults[i]] == 0: - arg_usage[args_with_defaults[i]] += 1 - - missing_args = [] - multiple_args = [] - for key, usage in arg_usage.items(): - if usage == 0: - missing_args.append(key) - elif usage > 1: - multiple_args.append(key) - - if missing_args: - # In the case where the method allows body arguments - # there are 3 potential errors: - # 1. not enough query string parameters -> 404 - # 2. not enough body parameters -> 400 - # 3. not enough path parts (partial matches) -> 404 - # - # We can't actually tell which case it is, - # so I'm raising a 404 because that covers 2/3 of the - # possibilities - # - # In the case where the method does not allow body - # arguments it's definitely a 404. - message = None - if show_mismatched_params: - message = "Missing parameters: %s" % ",".join(missing_args) - raise cherrypy.HTTPError(404, message=message) - - # the extra positional arguments come from the path - 404 Not Found - if not varargs and vararg_usage > 0: - raise cherrypy.HTTPError(404) - - body_params = cherrypy.serving.request.body.params or {} - body_params = set(body_params.keys()) - qs_params = set(callable_kwargs.keys()) - body_params - - if multiple_args: - if qs_params.intersection(set(multiple_args)): - # If any of the multiple parameters came from the query string then - # it's a 404 Not Found - error = 404 - else: - # Otherwise it's a 400 Bad Request - error = 400 - - message = None - if show_mismatched_params: - message = "Multiple values for parameters: "\ - "%s" % ",".join(multiple_args) - raise cherrypy.HTTPError(error, message=message) - - if not varkw and varkw_usage > 0: - - # If there were extra query string parameters, it's a 404 Not Found - extra_qs_params = set(qs_params).intersection(extra_kwargs) - if extra_qs_params: - message = None - if show_mismatched_params: - message = "Unexpected query string "\ - "parameters: %s" % ", ".join(extra_qs_params) - raise cherrypy.HTTPError(404, message=message) - - # If there were any extra body parameters, it's a 400 Not Found - extra_body_params = set(body_params).intersection(extra_kwargs) - if extra_body_params: - message = None - if show_mismatched_params: - message = "Unexpected body parameters: "\ - "%s" % ", ".join(extra_body_params) - raise cherrypy.HTTPError(400, message=message) - - -try: - import inspect -except ImportError: - test_callable_spec = lambda callable, args, kwargs: None -else: - getargspec = inspect.getargspec - # Python 3 requires using getfullargspec if keyword-only arguments are present - if hasattr(inspect, 'getfullargspec'): - def getargspec(callable): - return inspect.getfullargspec(callable)[:4] - - -class LateParamPageHandler(PageHandler): - - """When passing cherrypy.request.params to the page handler, we do not - want to capture that dict too early; we want to give tools like the - decoding tool a chance to modify the params dict in-between the lookup - of the handler and the actual calling of the handler. This subclass - takes that into account, and allows request.params to be 'bound late' - (it's more complicated than that, but that's the effect). - """ - - def _get_kwargs(self): - kwargs = cherrypy.serving.request.params.copy() - if self._kwargs: - kwargs.update(self._kwargs) - return kwargs - - def _set_kwargs(self, kwargs): - cherrypy.serving.request.kwargs = kwargs - self._kwargs = kwargs - - kwargs = property(_get_kwargs, _set_kwargs, - doc='page handler kwargs (with ' - 'cherrypy.request.params copied in)') - - -if sys.version_info < (3, 0): - punctuation_to_underscores = string.maketrans( - string.punctuation, '_' * len(string.punctuation)) - - def validate_translator(t): - if not isinstance(t, str) or len(t) != 256: - raise ValueError( - "The translate argument must be a str of len 256.") -else: - punctuation_to_underscores = str.maketrans( - string.punctuation, '_' * len(string.punctuation)) - - def validate_translator(t): - if not isinstance(t, dict): - raise ValueError("The translate argument must be a dict.") - - -class Dispatcher(object): - - """CherryPy Dispatcher which walks a tree of objects to find a handler. - - The tree is rooted at cherrypy.request.app.root, and each hierarchical - component in the path_info argument is matched to a corresponding nested - attribute of the root object. Matching handlers must have an 'exposed' - attribute which evaluates to True. The special method name "index" - matches a URI which ends in a slash ("/"). The special method name - "default" may match a portion of the path_info (but only when no longer - substring of the path_info matches some other object). - - This is the default, built-in dispatcher for CherryPy. - """ - - dispatch_method_name = '_cp_dispatch' - """ - The name of the dispatch method that nodes may optionally implement - to provide their own dynamic dispatch algorithm. - """ - - def __init__(self, dispatch_method_name=None, - translate=punctuation_to_underscores): - validate_translator(translate) - self.translate = translate - if dispatch_method_name: - self.dispatch_method_name = dispatch_method_name - - def __call__(self, path_info): - """Set handler and config for the current request.""" - request = cherrypy.serving.request - func, vpath = self.find_handler(path_info) - - if func: - # Decode any leftover %2F in the virtual_path atoms. - vpath = [x.replace("%2F", "/") for x in vpath] - request.handler = LateParamPageHandler(func, *vpath) - else: - request.handler = cherrypy.NotFound() - - def find_handler(self, path): - """Return the appropriate page handler, plus any virtual path. - - This will return two objects. The first will be a callable, - which can be used to generate page output. Any parameters from - the query string or request body will be sent to that callable - as keyword arguments. - - The callable is found by traversing the application's tree, - starting from cherrypy.request.app.root, and matching path - components to successive objects in the tree. For example, the - URL "/path/to/handler" might return root.path.to.handler. - - The second object returned will be a list of names which are - 'virtual path' components: parts of the URL which are dynamic, - and were not used when looking up the handler. - These virtual path components are passed to the handler as - positional arguments. - """ - request = cherrypy.serving.request - app = request.app - root = app.root - dispatch_name = self.dispatch_method_name - - # Get config for the root object/path. - fullpath = [x for x in path.strip('/').split('/') if x] + ['index'] - fullpath_len = len(fullpath) - segleft = fullpath_len - nodeconf = {} - if hasattr(root, "_cp_config"): - nodeconf.update(root._cp_config) - if "/" in app.config: - nodeconf.update(app.config["/"]) - object_trail = [['root', root, nodeconf, segleft]] - - node = root - iternames = fullpath[:] - while iternames: - name = iternames[0] - # map to legal Python identifiers (e.g. replace '.' with '_') - objname = name.translate(self.translate) - - nodeconf = {} - subnode = getattr(node, objname, None) - pre_len = len(iternames) - if subnode is None: - dispatch = getattr(node, dispatch_name, None) - if dispatch and hasattr(dispatch, '__call__') and not \ - getattr(dispatch, 'exposed', False) and \ - pre_len > 1: - # Don't expose the hidden 'index' token to _cp_dispatch - # We skip this if pre_len == 1 since it makes no sense - # to call a dispatcher when we have no tokens left. - index_name = iternames.pop() - subnode = dispatch(vpath=iternames) - iternames.append(index_name) - else: - # We didn't find a path, but keep processing in case there - # is a default() handler. - iternames.pop(0) - else: - # We found the path, remove the vpath entry - iternames.pop(0) - segleft = len(iternames) - if segleft > pre_len: - # No path segment was removed. Raise an error. - raise cherrypy.CherryPyException( - "A vpath segment was added. Custom dispatchers may only " - + "remove elements. While trying to process " - + "{0} in {1}".format(name, fullpath) - ) - elif segleft == pre_len: - # Assume that the handler used the current path segment, but - # did not pop it. This allows things like - # return getattr(self, vpath[0], None) - iternames.pop(0) - segleft -= 1 - node = subnode - - if node is not None: - # Get _cp_config attached to this node. - if hasattr(node, "_cp_config"): - nodeconf.update(node._cp_config) - - # Mix in values from app.config for this path. - existing_len = fullpath_len - pre_len - if existing_len != 0: - curpath = '/' + '/'.join(fullpath[0:existing_len]) - else: - curpath = '' - new_segs = fullpath[fullpath_len - pre_len:fullpath_len - segleft] - for seg in new_segs: - curpath += '/' + seg - if curpath in app.config: - nodeconf.update(app.config[curpath]) - - object_trail.append([name, node, nodeconf, segleft]) - - def set_conf(): - """Collapse all object_trail config into cherrypy.request.config. - """ - base = cherrypy.config.copy() - # Note that we merge the config from each node - # even if that node was None. - for name, obj, conf, segleft in object_trail: - base.update(conf) - if 'tools.staticdir.dir' in conf: - base['tools.staticdir.section'] = '/' + \ - '/'.join(fullpath[0:fullpath_len - segleft]) - return base - - # Try successive objects (reverse order) - num_candidates = len(object_trail) - 1 - for i in range(num_candidates, -1, -1): - - name, candidate, nodeconf, segleft = object_trail[i] - if candidate is None: - continue - - # Try a "default" method on the current leaf. - if hasattr(candidate, "default"): - defhandler = candidate.default - if getattr(defhandler, 'exposed', False): - # Insert any extra _cp_config from the default handler. - conf = getattr(defhandler, "_cp_config", {}) - object_trail.insert( - i + 1, ["default", defhandler, conf, segleft]) - request.config = set_conf() - # See https://bitbucket.org/cherrypy/cherrypy/issue/613 - request.is_index = path.endswith("/") - return defhandler, fullpath[fullpath_len - segleft:-1] - - # Uncomment the next line to restrict positional params to - # "default". - # if i < num_candidates - 2: continue - - # Try the current leaf. - if getattr(candidate, 'exposed', False): - request.config = set_conf() - if i == num_candidates: - # We found the extra ".index". Mark request so tools - # can redirect if path_info has no trailing slash. - request.is_index = True - else: - # We're not at an 'index' handler. Mark request so tools - # can redirect if path_info has NO trailing slash. - # Note that this also includes handlers which take - # positional parameters (virtual paths). - request.is_index = False - return candidate, fullpath[fullpath_len - segleft:-1] - - # We didn't find anything - request.config = set_conf() - return None, [] - - -class MethodDispatcher(Dispatcher): - - """Additional dispatch based on cherrypy.request.method.upper(). - - Methods named GET, POST, etc will be called on an exposed class. - The method names must be all caps; the appropriate Allow header - will be output showing all capitalized method names as allowable - HTTP verbs. - - Note that the containing class must be exposed, not the methods. - """ - - def __call__(self, path_info): - """Set handler and config for the current request.""" - request = cherrypy.serving.request - resource, vpath = self.find_handler(path_info) - - if resource: - # Set Allow header - avail = [m for m in dir(resource) if m.isupper()] - if "GET" in avail and "HEAD" not in avail: - avail.append("HEAD") - avail.sort() - cherrypy.serving.response.headers['Allow'] = ", ".join(avail) - - # Find the subhandler - meth = request.method.upper() - func = getattr(resource, meth, None) - if func is None and meth == "HEAD": - func = getattr(resource, "GET", None) - if func: - # Grab any _cp_config on the subhandler. - if hasattr(func, "_cp_config"): - request.config.update(func._cp_config) - - # Decode any leftover %2F in the virtual_path atoms. - vpath = [x.replace("%2F", "/") for x in vpath] - request.handler = LateParamPageHandler(func, *vpath) - else: - request.handler = cherrypy.HTTPError(405) - else: - request.handler = cherrypy.NotFound() - - -class RoutesDispatcher(object): - - """A Routes based dispatcher for CherryPy.""" - - def __init__(self, full_result=False, **mapper_options): - """ - Routes dispatcher - - Set full_result to True if you wish the controller - and the action to be passed on to the page handler - parameters. By default they won't be. - """ - import routes - self.full_result = full_result - self.controllers = {} - self.mapper = routes.Mapper(**mapper_options) - self.mapper.controller_scan = self.controllers.keys - - def connect(self, name, route, controller, **kwargs): - self.controllers[name] = controller - self.mapper.connect(name, route, controller=name, **kwargs) - - def redirect(self, url): - raise cherrypy.HTTPRedirect(url) - - def __call__(self, path_info): - """Set handler and config for the current request.""" - func = self.find_handler(path_info) - if func: - cherrypy.serving.request.handler = LateParamPageHandler(func) - else: - cherrypy.serving.request.handler = cherrypy.NotFound() - - def find_handler(self, path_info): - """Find the right page handler, and set request.config.""" - import routes - - request = cherrypy.serving.request - - config = routes.request_config() - config.mapper = self.mapper - if hasattr(request, 'wsgi_environ'): - config.environ = request.wsgi_environ - config.host = request.headers.get('Host', None) - config.protocol = request.scheme - config.redirect = self.redirect - - result = self.mapper.match(path_info) - - config.mapper_dict = result - params = {} - if result: - params = result.copy() - if not self.full_result: - params.pop('controller', None) - params.pop('action', None) - request.params.update(params) - - # Get config for the root object/path. - request.config = base = cherrypy.config.copy() - curpath = "" - - def merge(nodeconf): - if 'tools.staticdir.dir' in nodeconf: - nodeconf['tools.staticdir.section'] = curpath or "/" - base.update(nodeconf) - - app = request.app - root = app.root - if hasattr(root, "_cp_config"): - merge(root._cp_config) - if "/" in app.config: - merge(app.config["/"]) - - # Mix in values from app.config. - atoms = [x for x in path_info.split("/") if x] - if atoms: - last = atoms.pop() - else: - last = None - for atom in atoms: - curpath = "/".join((curpath, atom)) - if curpath in app.config: - merge(app.config[curpath]) - - handler = None - if result: - controller = result.get('controller') - controller = self.controllers.get(controller, controller) - if controller: - if isinstance(controller, classtype): - controller = controller() - # Get config from the controller. - if hasattr(controller, "_cp_config"): - merge(controller._cp_config) - - action = result.get('action') - if action is not None: - handler = getattr(controller, action, None) - # Get config from the handler - if hasattr(handler, "_cp_config"): - merge(handler._cp_config) - else: - handler = controller - - # Do the last path atom here so it can - # override the controller's _cp_config. - if last: - curpath = "/".join((curpath, last)) - if curpath in app.config: - merge(app.config[curpath]) - - return handler - - -def XMLRPCDispatcher(next_dispatcher=Dispatcher()): - from cherrypy.lib import xmlrpcutil - - def xmlrpc_dispatch(path_info): - path_info = xmlrpcutil.patched_path(path_info) - return next_dispatcher(path_info) - return xmlrpc_dispatch - - -def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True, - **domains): - """ - Select a different handler based on the Host header. - - This can be useful when running multiple sites within one CP server. - It allows several domains to point to different parts of a single - website structure. For example:: - - http://www.domain.example -> root - http://www.domain2.example -> root/domain2/ - http://www.domain2.example:443 -> root/secure - - can be accomplished via the following config:: - - [/] - request.dispatch = cherrypy.dispatch.VirtualHost( - **{'www.domain2.example': '/domain2', - 'www.domain2.example:443': '/secure', - }) - - next_dispatcher - The next dispatcher object in the dispatch chain. - The VirtualHost dispatcher adds a prefix to the URL and calls - another dispatcher. Defaults to cherrypy.dispatch.Dispatcher(). - - use_x_forwarded_host - If True (the default), any "X-Forwarded-Host" - request header will be used instead of the "Host" header. This - is commonly added by HTTP servers (such as Apache) when proxying. - - ``**domains`` - A dict of {host header value: virtual prefix} pairs. - The incoming "Host" request header is looked up in this dict, - and, if a match is found, the corresponding "virtual prefix" - value will be prepended to the URL path before calling the - next dispatcher. Note that you often need separate entries - for "example.com" and "www.example.com". In addition, "Host" - headers may contain the port number. - """ - from cherrypy.lib import httputil - - def vhost_dispatch(path_info): - request = cherrypy.serving.request - header = request.headers.get - - domain = header('Host', '') - if use_x_forwarded_host: - domain = header("X-Forwarded-Host", domain) - - prefix = domains.get(domain, "") - if prefix: - path_info = httputil.urljoin(prefix, path_info) - - result = next_dispatcher(path_info) - - # Touch up staticdir config. See - # https://bitbucket.org/cherrypy/cherrypy/issue/614. - section = request.config.get('tools.staticdir.section') - if section: - section = section[len(prefix):] - request.config['tools.staticdir.section'] = section - - return result - return vhost_dispatch diff --git a/lib/cherrypy/_cperror.py b/lib/cherrypy/_cperror.py deleted file mode 100644 index 6256595b..00000000 --- a/lib/cherrypy/_cperror.py +++ /dev/null @@ -1,609 +0,0 @@ -"""Exception classes for CherryPy. - -CherryPy provides (and uses) exceptions for declaring that the HTTP response -should be a status other than the default "200 OK". You can ``raise`` them like -normal Python exceptions. You can also call them and they will raise -themselves; this means you can set an -:class:`HTTPError` -or :class:`HTTPRedirect` as the -:attr:`request.handler`. - -.. _redirectingpost: - -Redirecting POST -================ - -When you GET a resource and are redirected by the server to another Location, -there's generally no problem since GET is both a "safe method" (there should -be no side-effects) and an "idempotent method" (multiple calls are no different -than a single call). - -POST, however, is neither safe nor idempotent--if you -charge a credit card, you don't want to be charged twice by a redirect! - -For this reason, *none* of the 3xx responses permit a user-agent (browser) to -resubmit a POST on redirection without first confirming the action with the -user: - -===== ================================= =========== -300 Multiple Choices Confirm with the user -301 Moved Permanently Confirm with the user -302 Found (Object moved temporarily) Confirm with the user -303 See Other GET the new URI--no confirmation -304 Not modified (for conditional GET only--POST should not raise this error) -305 Use Proxy Confirm with the user -307 Temporary Redirect Confirm with the user -===== ================================= =========== - -However, browsers have historically implemented these restrictions poorly; -in particular, many browsers do not force the user to confirm 301, 302 -or 307 when redirecting POST. For this reason, CherryPy defaults to 303, -which most user-agents appear to have implemented correctly. Therefore, if -you raise HTTPRedirect for a POST request, the user-agent will most likely -attempt to GET the new URI (without asking for confirmation from the user). -We realize this is confusing for developers, but it's the safest thing we -could do. You are of course free to raise ``HTTPRedirect(uri, status=302)`` -or any other 3xx status if you know what you're doing, but given the -environment, we couldn't let any of those be the default. - -Custom Error Handling -===================== - -.. image:: /refman/cperrors.gif - -Anticipated HTTP responses --------------------------- - -The 'error_page' config namespace can be used to provide custom HTML output for -expected responses (like 404 Not Found). Supply a filename from which the -output will be read. The contents will be interpolated with the values -%(status)s, %(message)s, %(traceback)s, and %(version)s using plain old Python -`string formatting `_. - -:: - - _cp_config = { - 'error_page.404': os.path.join(localDir, "static/index.html") - } - - -Beginning in version 3.1, you may also provide a function or other callable as -an error_page entry. It will be passed the same status, message, traceback and -version arguments that are interpolated into templates:: - - def error_page_402(status, message, traceback, version): - return "Error %s - Well, I'm very sorry but you haven't paid!" % status - cherrypy.config.update({'error_page.402': error_page_402}) - -Also in 3.1, in addition to the numbered error codes, you may also supply -"error_page.default" to handle all codes which do not have their own error_page -entry. - - - -Unanticipated errors --------------------- - -CherryPy also has a generic error handling mechanism: whenever an unanticipated -error occurs in your code, it will call -:func:`Request.error_response` to -set the response status, headers, and body. By default, this is the same -output as -:class:`HTTPError(500) `. If you want to provide -some other behavior, you generally replace "request.error_response". - -Here is some sample code that shows how to display a custom error message and -send an e-mail containing the error:: - - from cherrypy import _cperror - - def handle_error(): - cherrypy.response.status = 500 - cherrypy.response.body = [ - "Sorry, an error occured" - ] - sendMail('error@domain.com', - 'Error in your web app', - _cperror.format_exc()) - - class Root: - _cp_config = {'request.error_response': handle_error} - - -Note that you have to explicitly set -:attr:`response.body ` -and not simply return an error message as a result. -""" - -from cgi import escape as _escape -from sys import exc_info as _exc_info -from traceback import format_exception as _format_exception -from cherrypy._cpcompat import basestring, bytestr, iteritems, ntob -from cherrypy._cpcompat import tonative, urljoin as _urljoin -from cherrypy.lib import httputil as _httputil - - -class CherryPyException(Exception): - - """A base class for CherryPy exceptions.""" - pass - - -class TimeoutError(CherryPyException): - - """Exception raised when Response.timed_out is detected.""" - pass - - -class InternalRedirect(CherryPyException): - - """Exception raised to switch to the handler for a different URL. - - This exception will redirect processing to another path within the site - (without informing the client). Provide the new path as an argument when - raising the exception. Provide any params in the querystring for the new - URL. - """ - - def __init__(self, path, query_string=""): - import cherrypy - self.request = cherrypy.serving.request - - self.query_string = query_string - if "?" in path: - # Separate any params included in the path - path, self.query_string = path.split("?", 1) - - # Note that urljoin will "do the right thing" whether url is: - # 1. a URL relative to root (e.g. "/dummy") - # 2. a URL relative to the current path - # Note that any query string will be discarded. - path = _urljoin(self.request.path_info, path) - - # Set a 'path' member attribute so that code which traps this - # error can have access to it. - self.path = path - - CherryPyException.__init__(self, path, self.query_string) - - -class HTTPRedirect(CherryPyException): - - """Exception raised when the request should be redirected. - - This exception will force a HTTP redirect to the URL or URL's you give it. - The new URL must be passed as the first argument to the Exception, - e.g., HTTPRedirect(newUrl). Multiple URLs are allowed in a list. - If a URL is absolute, it will be used as-is. If it is relative, it is - assumed to be relative to the current cherrypy.request.path_info. - - If one of the provided URL is a unicode object, it will be encoded - using the default encoding or the one passed in parameter. - - There are multiple types of redirect, from which you can select via the - ``status`` argument. If you do not provide a ``status`` arg, it defaults to - 303 (or 302 if responding with HTTP/1.0). - - Examples:: - - raise cherrypy.HTTPRedirect("") - raise cherrypy.HTTPRedirect("/abs/path", 307) - raise cherrypy.HTTPRedirect(["path1", "path2?a=1&b=2"], 301) - - See :ref:`redirectingpost` for additional caveats. - """ - - status = None - """The integer HTTP status code to emit.""" - - urls = None - """The list of URL's to emit.""" - - encoding = 'utf-8' - """The encoding when passed urls are not native strings""" - - def __init__(self, urls, status=None, encoding=None): - import cherrypy - request = cherrypy.serving.request - - if isinstance(urls, basestring): - urls = [urls] - - abs_urls = [] - for url in urls: - url = tonative(url, encoding or self.encoding) - - # Note that urljoin will "do the right thing" whether url is: - # 1. a complete URL with host (e.g. "http://www.example.com/test") - # 2. a URL relative to root (e.g. "/dummy") - # 3. a URL relative to the current path - # Note that any query string in cherrypy.request is discarded. - url = _urljoin(cherrypy.url(), url) - abs_urls.append(url) - self.urls = abs_urls - - # RFC 2616 indicates a 301 response code fits our goal; however, - # browser support for 301 is quite messy. Do 302/303 instead. See - # http://www.alanflavell.org.uk/www/post-redirect.html - if status is None: - if request.protocol >= (1, 1): - status = 303 - else: - status = 302 - else: - status = int(status) - if status < 300 or status > 399: - raise ValueError("status must be between 300 and 399.") - - self.status = status - CherryPyException.__init__(self, abs_urls, status) - - def set_response(self): - """Modify cherrypy.response status, headers, and body to represent - self. - - CherryPy uses this internally, but you can also use it to create an - HTTPRedirect object and set its output without *raising* the exception. - """ - import cherrypy - response = cherrypy.serving.response - response.status = status = self.status - - if status in (300, 301, 302, 303, 307): - response.headers['Content-Type'] = "text/html;charset=utf-8" - # "The ... URI SHOULD be given by the Location field - # in the response." - response.headers['Location'] = self.urls[0] - - # "Unless the request method was HEAD, the entity of the response - # SHOULD contain a short hypertext note with a hyperlink to the - # new URI(s)." - msg = { - 300: "This resource can be found at ", - 301: "This resource has permanently moved to ", - 302: "This resource resides temporarily at ", - 303: "This resource can be found at ", - 307: "This resource has moved temporarily to ", - }[status] - msg += '%s.' - from xml.sax import saxutils - msgs = [msg % (saxutils.quoteattr(u), u) for u in self.urls] - response.body = ntob("
\n".join(msgs), 'utf-8') - # Previous code may have set C-L, so we have to reset it - # (allow finalize to set it). - response.headers.pop('Content-Length', None) - elif status == 304: - # Not Modified. - # "The response MUST include the following header fields: - # Date, unless its omission is required by section 14.18.1" - # The "Date" header should have been set in Response.__init__ - - # "...the response SHOULD NOT include other entity-headers." - for key in ('Allow', 'Content-Encoding', 'Content-Language', - 'Content-Length', 'Content-Location', 'Content-MD5', - 'Content-Range', 'Content-Type', 'Expires', - 'Last-Modified'): - if key in response.headers: - del response.headers[key] - - # "The 304 response MUST NOT contain a message-body." - response.body = None - # Previous code may have set C-L, so we have to reset it. - response.headers.pop('Content-Length', None) - elif status == 305: - # Use Proxy. - # self.urls[0] should be the URI of the proxy. - response.headers['Location'] = self.urls[0] - response.body = None - # Previous code may have set C-L, so we have to reset it. - response.headers.pop('Content-Length', None) - else: - raise ValueError("The %s status code is unknown." % status) - - def __call__(self): - """Use this exception as a request.handler (raise self).""" - raise self - - -def clean_headers(status): - """Remove any headers which should not apply to an error response.""" - import cherrypy - - response = cherrypy.serving.response - - # Remove headers which applied to the original content, - # but do not apply to the error page. - respheaders = response.headers - for key in ["Accept-Ranges", "Age", "ETag", "Location", "Retry-After", - "Vary", "Content-Encoding", "Content-Length", "Expires", - "Content-Location", "Content-MD5", "Last-Modified"]: - if key in respheaders: - del respheaders[key] - - if status != 416: - # A server sending a response with status code 416 (Requested - # range not satisfiable) SHOULD include a Content-Range field - # with a byte-range-resp-spec of "*". The instance-length - # specifies the current length of the selected resource. - # A response with status code 206 (Partial Content) MUST NOT - # include a Content-Range field with a byte-range- resp-spec of "*". - if "Content-Range" in respheaders: - del respheaders["Content-Range"] - - -class HTTPError(CherryPyException): - - """Exception used to return an HTTP error code (4xx-5xx) to the client. - - This exception can be used to automatically send a response using a - http status code, with an appropriate error page. It takes an optional - ``status`` argument (which must be between 400 and 599); it defaults to 500 - ("Internal Server Error"). It also takes an optional ``message`` argument, - which will be returned in the response body. See - `RFC2616 `_ - for a complete list of available error codes and when to use them. - - Examples:: - - raise cherrypy.HTTPError(403) - raise cherrypy.HTTPError( - "403 Forbidden", "You are not allowed to access this resource.") - """ - - status = None - """The HTTP status code. May be of type int or str (with a Reason-Phrase). - """ - - code = None - """The integer HTTP status code.""" - - reason = None - """The HTTP Reason-Phrase string.""" - - def __init__(self, status=500, message=None): - self.status = status - try: - self.code, self.reason, defaultmsg = _httputil.valid_status(status) - except ValueError: - raise self.__class__(500, _exc_info()[1].args[0]) - - if self.code < 400 or self.code > 599: - raise ValueError("status must be between 400 and 599.") - - # See http://www.python.org/dev/peps/pep-0352/ - # self.message = message - self._message = message or defaultmsg - CherryPyException.__init__(self, status, message) - - def set_response(self): - """Modify cherrypy.response status, headers, and body to represent - self. - - CherryPy uses this internally, but you can also use it to create an - HTTPError object and set its output without *raising* the exception. - """ - import cherrypy - - response = cherrypy.serving.response - - clean_headers(self.code) - - # In all cases, finalize will be called after this method, - # so don't bother cleaning up response values here. - response.status = self.status - tb = None - if cherrypy.serving.request.show_tracebacks: - tb = format_exc() - - response.headers.pop('Content-Length', None) - - content = self.get_error_page(self.status, traceback=tb, - message=self._message) - response.body = content - - _be_ie_unfriendly(self.code) - - def get_error_page(self, *args, **kwargs): - return get_error_page(*args, **kwargs) - - def __call__(self): - """Use this exception as a request.handler (raise self).""" - raise self - - -class NotFound(HTTPError): - - """Exception raised when a URL could not be mapped to any handler (404). - - This is equivalent to raising - :class:`HTTPError("404 Not Found") `. - """ - - def __init__(self, path=None): - if path is None: - import cherrypy - request = cherrypy.serving.request - path = request.script_name + request.path_info - self.args = (path,) - HTTPError.__init__(self, 404, "The path '%s' was not found." % path) - - -_HTTPErrorTemplate = ''' - - - - %(status)s - - - -

%(status)s

-

%(message)s

-
%(traceback)s
-
- - Powered by CherryPy %(version)s - -
- - -''' - - -def get_error_page(status, **kwargs): - """Return an HTML page, containing a pretty error response. - - status should be an int or a str. - kwargs will be interpolated into the page template. - """ - import cherrypy - - try: - code, reason, message = _httputil.valid_status(status) - except ValueError: - raise cherrypy.HTTPError(500, _exc_info()[1].args[0]) - - # We can't use setdefault here, because some - # callers send None for kwarg values. - if kwargs.get('status') is None: - kwargs['status'] = "%s %s" % (code, reason) - if kwargs.get('message') is None: - kwargs['message'] = message - if kwargs.get('traceback') is None: - kwargs['traceback'] = '' - if kwargs.get('version') is None: - kwargs['version'] = cherrypy.__version__ - - for k, v in iteritems(kwargs): - if v is None: - kwargs[k] = "" - else: - kwargs[k] = _escape(kwargs[k]) - - # Use a custom template or callable for the error page? - pages = cherrypy.serving.request.error_page - error_page = pages.get(code) or pages.get('default') - - # Default template, can be overridden below. - template = _HTTPErrorTemplate - if error_page: - try: - if hasattr(error_page, '__call__'): - # The caller function may be setting headers manually, - # so we delegate to it completely. We may be returning - # an iterator as well as a string here. - # - # We *must* make sure any content is not unicode. - result = error_page(**kwargs) - if cherrypy.lib.is_iterator(result): - from cherrypy.lib.encoding import UTF8StreamEncoder - return UTF8StreamEncoder(result) - elif isinstance(result, cherrypy._cpcompat.unicodestr): - return result.encode('utf-8') - else: - if not isinstance(result, cherrypy._cpcompat.bytestr): - raise ValueError('error page function did not ' - 'return a bytestring, unicodestring or an ' - 'iterator - returned object of type %s.' - % (type(result).__name__)) - return result - else: - # Load the template from this path. - template = tonative(open(error_page, 'rb').read()) - except: - e = _format_exception(*_exc_info())[-1] - m = kwargs['message'] - if m: - m += "
" - m += "In addition, the custom error page failed:\n
%s" % e - kwargs['message'] = m - - response = cherrypy.serving.response - response.headers['Content-Type'] = "text/html;charset=utf-8" - result = template % kwargs - return result.encode('utf-8') - - - -_ie_friendly_error_sizes = { - 400: 512, 403: 256, 404: 512, 405: 256, - 406: 512, 408: 512, 409: 512, 410: 256, - 500: 512, 501: 512, 505: 512, -} - - -def _be_ie_unfriendly(status): - import cherrypy - response = cherrypy.serving.response - - # For some statuses, Internet Explorer 5+ shows "friendly error - # messages" instead of our response.body if the body is smaller - # than a given size. Fix this by returning a body over that size - # (by adding whitespace). - # See http://support.microsoft.com/kb/q218155/ - s = _ie_friendly_error_sizes.get(status, 0) - if s: - s += 1 - # Since we are issuing an HTTP error status, we assume that - # the entity is short, and we should just collapse it. - content = response.collapse_body() - l = len(content) - if l and l < s: - # IN ADDITION: the response must be written to IE - # in one chunk or it will still get replaced! Bah. - content = content + (ntob(" ") * (s - l)) - response.body = content - response.headers['Content-Length'] = str(len(content)) - - -def format_exc(exc=None): - """Return exc (or sys.exc_info if None), formatted.""" - try: - if exc is None: - exc = _exc_info() - if exc == (None, None, None): - return "" - import traceback - return "".join(traceback.format_exception(*exc)) - finally: - del exc - - -def bare_error(extrabody=None): - """Produce status, headers, body for a critical error. - - Returns a triple without calling any other questionable functions, - so it should be as error-free as possible. Call it from an HTTP server - if you get errors outside of the request. - - If extrabody is None, a friendly but rather unhelpful error message - is set in the body. If extrabody is a string, it will be appended - as-is to the body. - """ - - # The whole point of this function is to be a last line-of-defense - # in handling errors. That is, it must not raise any errors itself; - # it cannot be allowed to fail. Therefore, don't add to it! - # In particular, don't call any other CP functions. - - body = ntob("Unrecoverable error in the server.") - if extrabody is not None: - if not isinstance(extrabody, bytestr): - extrabody = extrabody.encode('utf-8') - body += ntob("\n") + extrabody - - return (ntob("500 Internal Server Error"), - [(ntob('Content-Type'), ntob('text/plain')), - (ntob('Content-Length'), ntob(str(len(body)), 'ISO-8859-1'))], - [body]) diff --git a/lib/cherrypy/_cplogging.py b/lib/cherrypy/_cplogging.py deleted file mode 100644 index 554fd7ef..00000000 --- a/lib/cherrypy/_cplogging.py +++ /dev/null @@ -1,459 +0,0 @@ -""" -Simple config -============= - -Although CherryPy uses the :mod:`Python logging module `, it does so -behind the scenes so that simple logging is simple, but complicated logging -is still possible. "Simple" logging means that you can log to the screen -(i.e. console/stdout) or to a file, and that you can easily have separate -error and access log files. - -Here are the simplified logging settings. You use these by adding lines to -your config file or dict. You should set these at either the global level or -per application (see next), but generally not both. - - * ``log.screen``: Set this to True to have both "error" and "access" messages - printed to stdout. - * ``log.access_file``: Set this to an absolute filename where you want - "access" messages written. - * ``log.error_file``: Set this to an absolute filename where you want "error" - messages written. - -Many events are automatically logged; to log your own application events, call -:func:`cherrypy.log`. - -Architecture -============ - -Separate scopes ---------------- - -CherryPy provides log managers at both the global and application layers. -This means you can have one set of logging rules for your entire site, -and another set of rules specific to each application. The global log -manager is found at :func:`cherrypy.log`, and the log manager for each -application is found at :attr:`app.log`. -If you're inside a request, the latter is reachable from -``cherrypy.request.app.log``; if you're outside a request, you'll have to -obtain a reference to the ``app``: either the return value of -:func:`tree.mount()` or, if you used -:func:`quickstart()` instead, via -``cherrypy.tree.apps['/']``. - -By default, the global logs are named "cherrypy.error" and "cherrypy.access", -and the application logs are named "cherrypy.error.2378745" and -"cherrypy.access.2378745" (the number is the id of the Application object). -This means that the application logs "bubble up" to the site logs, so if your -application has no log handlers, the site-level handlers will still log the -messages. - -Errors vs. Access ------------------ - -Each log manager handles both "access" messages (one per HTTP request) and -"error" messages (everything else). Note that the "error" log is not just for -errors! The format of access messages is highly formalized, but the error log -isn't--it receives messages from a variety of sources (including full error -tracebacks, if enabled). - -If you are logging the access log and error log to the same source, then there -is a possibility that a specially crafted error message may replicate an access -log message as described in CWE-117. In this case it is the application -developer's responsibility to manually escape data before using CherryPy's log() -functionality, or they may create an application that is vulnerable to CWE-117. -This would be achieved by using a custom handler escape any special characters, -and attached as described below. - -Custom Handlers -=============== - -The simple settings above work by manipulating Python's standard :mod:`logging` -module. So when you need something more complex, the full power of the standard -module is yours to exploit. You can borrow or create custom handlers, formats, -filters, and much more. Here's an example that skips the standard FileHandler -and uses a RotatingFileHandler instead: - -:: - - #python - log = app.log - - # Remove the default FileHandlers if present. - log.error_file = "" - log.access_file = "" - - maxBytes = getattr(log, "rot_maxBytes", 10000000) - backupCount = getattr(log, "rot_backupCount", 1000) - - # Make a new RotatingFileHandler for the error log. - fname = getattr(log, "rot_error_file", "error.log") - h = handlers.RotatingFileHandler(fname, 'a', maxBytes, backupCount) - h.setLevel(DEBUG) - h.setFormatter(_cplogging.logfmt) - log.error_log.addHandler(h) - - # Make a new RotatingFileHandler for the access log. - fname = getattr(log, "rot_access_file", "access.log") - h = handlers.RotatingFileHandler(fname, 'a', maxBytes, backupCount) - h.setLevel(DEBUG) - h.setFormatter(_cplogging.logfmt) - log.access_log.addHandler(h) - - -The ``rot_*`` attributes are pulled straight from the application log object. -Since "log.*" config entries simply set attributes on the log object, you can -add custom attributes to your heart's content. Note that these handlers are -used ''instead'' of the default, simple handlers outlined above (so don't set -the "log.error_file" config entry, for example). -""" - -import datetime -import logging -# Silence the no-handlers "warning" (stderr write!) in stdlib logging -logging.Logger.manager.emittedNoHandlerWarning = 1 -logfmt = logging.Formatter("%(message)s") -import os -import sys - -import cherrypy -from cherrypy import _cperror -from cherrypy._cpcompat import ntob, py3k - - -class NullHandler(logging.Handler): - - """A no-op logging handler to silence the logging.lastResort handler.""" - - def handle(self, record): - pass - - def emit(self, record): - pass - - def createLock(self): - self.lock = None - - -class LogManager(object): - - """An object to assist both simple and advanced logging. - - ``cherrypy.log`` is an instance of this class. - """ - - appid = None - """The id() of the Application object which owns this log manager. If this - is a global log manager, appid is None.""" - - error_log = None - """The actual :class:`logging.Logger` instance for error messages.""" - - access_log = None - """The actual :class:`logging.Logger` instance for access messages.""" - - if py3k: - access_log_format = \ - '{h} {l} {u} {t} "{r}" {s} {b} "{f}" "{a}"' - else: - access_log_format = \ - '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"' - - logger_root = None - """The "top-level" logger name. - - This string will be used as the first segment in the Logger names. - The default is "cherrypy", for example, in which case the Logger names - will be of the form:: - - cherrypy.error. - cherrypy.access. - """ - - def __init__(self, appid=None, logger_root="cherrypy"): - self.logger_root = logger_root - self.appid = appid - if appid is None: - self.error_log = logging.getLogger("%s.error" % logger_root) - self.access_log = logging.getLogger("%s.access" % logger_root) - else: - self.error_log = logging.getLogger( - "%s.error.%s" % (logger_root, appid)) - self.access_log = logging.getLogger( - "%s.access.%s" % (logger_root, appid)) - self.error_log.setLevel(logging.INFO) - self.access_log.setLevel(logging.INFO) - - # Silence the no-handlers "warning" (stderr write!) in stdlib logging - self.error_log.addHandler(NullHandler()) - self.access_log.addHandler(NullHandler()) - - cherrypy.engine.subscribe('graceful', self.reopen_files) - - def reopen_files(self): - """Close and reopen all file handlers.""" - for log in (self.error_log, self.access_log): - for h in log.handlers: - if isinstance(h, logging.FileHandler): - h.acquire() - h.stream.close() - h.stream = open(h.baseFilename, h.mode) - h.release() - - def error(self, msg='', context='', severity=logging.INFO, - traceback=False): - """Write the given ``msg`` to the error log. - - This is not just for errors! Applications may call this at any time - to log application-specific information. - - If ``traceback`` is True, the traceback of the current exception - (if any) will be appended to ``msg``. - """ - if traceback: - msg += _cperror.format_exc() - self.error_log.log(severity, ' '.join((self.time(), context, msg))) - - def __call__(self, *args, **kwargs): - """An alias for ``error``.""" - return self.error(*args, **kwargs) - - def access(self): - """Write to the access log (in Apache/NCSA Combined Log format). - - See the - `apache documentation `_ - for format details. - - CherryPy calls this automatically for you. Note there are no arguments; - it collects the data itself from - :class:`cherrypy.request`. - - Like Apache started doing in 2.0.46, non-printable and other special - characters in %r (and we expand that to all parts) are escaped using - \\xhh sequences, where hh stands for the hexadecimal representation - of the raw byte. Exceptions from this rule are " and \\, which are - escaped by prepending a backslash, and all whitespace characters, - which are written in their C-style notation (\\n, \\t, etc). - """ - request = cherrypy.serving.request - remote = request.remote - response = cherrypy.serving.response - outheaders = response.headers - inheaders = request.headers - if response.output_status is None: - status = "-" - else: - status = response.output_status.split(ntob(" "), 1)[0] - if py3k: - status = status.decode('ISO-8859-1') - - atoms = {'h': remote.name or remote.ip, - 'l': '-', - 'u': getattr(request, "login", None) or "-", - 't': self.time(), - 'r': request.request_line, - 's': status, - 'b': dict.get(outheaders, 'Content-Length', '') or "-", - 'f': dict.get(inheaders, 'Referer', ''), - 'a': dict.get(inheaders, 'User-Agent', ''), - 'o': dict.get(inheaders, 'Host', '-'), - } - if py3k: - for k, v in atoms.items(): - if not isinstance(v, str): - v = str(v) - v = v.replace('"', '\\"').encode('utf8') - # Fortunately, repr(str) escapes unprintable chars, \n, \t, etc - # and backslash for us. All we have to do is strip the quotes. - v = repr(v)[2:-1] - - # in python 3.0 the repr of bytes (as returned by encode) - # uses double \'s. But then the logger escapes them yet, again - # resulting in quadruple slashes. Remove the extra one here. - v = v.replace('\\\\', '\\') - - # Escape double-quote. - atoms[k] = v - - try: - self.access_log.log( - logging.INFO, self.access_log_format.format(**atoms)) - except: - self(traceback=True) - else: - for k, v in atoms.items(): - if isinstance(v, unicode): - v = v.encode('utf8') - elif not isinstance(v, str): - v = str(v) - # Fortunately, repr(str) escapes unprintable chars, \n, \t, etc - # and backslash for us. All we have to do is strip the quotes. - v = repr(v)[1:-1] - # Escape double-quote. - atoms[k] = v.replace('"', '\\"') - - try: - self.access_log.log( - logging.INFO, self.access_log_format % atoms) - except: - self(traceback=True) - - def time(self): - """Return now() in Apache Common Log Format (no timezone).""" - now = datetime.datetime.now() - monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', - 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] - month = monthnames[now.month - 1].capitalize() - return ('[%02d/%s/%04d:%02d:%02d:%02d]' % - (now.day, month, now.year, now.hour, now.minute, now.second)) - - def _get_builtin_handler(self, log, key): - for h in log.handlers: - if getattr(h, "_cpbuiltin", None) == key: - return h - - # ------------------------- Screen handlers ------------------------- # - def _set_screen_handler(self, log, enable, stream=None): - h = self._get_builtin_handler(log, "screen") - if enable: - if not h: - if stream is None: - stream = sys.stderr - h = logging.StreamHandler(stream) - h.setFormatter(logfmt) - h._cpbuiltin = "screen" - log.addHandler(h) - elif h: - log.handlers.remove(h) - - def _get_screen(self): - h = self._get_builtin_handler - has_h = h(self.error_log, "screen") or h(self.access_log, "screen") - return bool(has_h) - - def _set_screen(self, newvalue): - self._set_screen_handler(self.error_log, newvalue, stream=sys.stderr) - self._set_screen_handler(self.access_log, newvalue, stream=sys.stdout) - screen = property(_get_screen, _set_screen, - doc="""Turn stderr/stdout logging on or off. - - If you set this to True, it'll add the appropriate StreamHandler for - you. If you set it to False, it will remove the handler. - """) - - # -------------------------- File handlers -------------------------- # - - def _add_builtin_file_handler(self, log, fname): - h = logging.FileHandler(fname) - h.setFormatter(logfmt) - h._cpbuiltin = "file" - log.addHandler(h) - - def _set_file_handler(self, log, filename): - h = self._get_builtin_handler(log, "file") - if filename: - if h: - if h.baseFilename != os.path.abspath(filename): - h.close() - log.handlers.remove(h) - self._add_builtin_file_handler(log, filename) - else: - self._add_builtin_file_handler(log, filename) - else: - if h: - h.close() - log.handlers.remove(h) - - def _get_error_file(self): - h = self._get_builtin_handler(self.error_log, "file") - if h: - return h.baseFilename - return '' - - def _set_error_file(self, newvalue): - self._set_file_handler(self.error_log, newvalue) - error_file = property(_get_error_file, _set_error_file, - doc="""The filename for self.error_log. - - If you set this to a string, it'll add the appropriate FileHandler for - you. If you set it to ``None`` or ``''``, it will remove the handler. - """) - - def _get_access_file(self): - h = self._get_builtin_handler(self.access_log, "file") - if h: - return h.baseFilename - return '' - - def _set_access_file(self, newvalue): - self._set_file_handler(self.access_log, newvalue) - access_file = property(_get_access_file, _set_access_file, - doc="""The filename for self.access_log. - - If you set this to a string, it'll add the appropriate FileHandler for - you. If you set it to ``None`` or ``''``, it will remove the handler. - """) - - # ------------------------- WSGI handlers ------------------------- # - - def _set_wsgi_handler(self, log, enable): - h = self._get_builtin_handler(log, "wsgi") - if enable: - if not h: - h = WSGIErrorHandler() - h.setFormatter(logfmt) - h._cpbuiltin = "wsgi" - log.addHandler(h) - elif h: - log.handlers.remove(h) - - def _get_wsgi(self): - return bool(self._get_builtin_handler(self.error_log, "wsgi")) - - def _set_wsgi(self, newvalue): - self._set_wsgi_handler(self.error_log, newvalue) - wsgi = property(_get_wsgi, _set_wsgi, - doc="""Write errors to wsgi.errors. - - If you set this to True, it'll add the appropriate - :class:`WSGIErrorHandler` for you - (which writes errors to ``wsgi.errors``). - If you set it to False, it will remove the handler. - """) - - -class WSGIErrorHandler(logging.Handler): - - "A handler class which writes logging records to environ['wsgi.errors']." - - def flush(self): - """Flushes the stream.""" - try: - stream = cherrypy.serving.request.wsgi_environ.get('wsgi.errors') - except (AttributeError, KeyError): - pass - else: - stream.flush() - - def emit(self, record): - """Emit a record.""" - try: - stream = cherrypy.serving.request.wsgi_environ.get('wsgi.errors') - except (AttributeError, KeyError): - pass - else: - try: - msg = self.format(record) - fs = "%s\n" - import types - # if no unicode support... - if not hasattr(types, "UnicodeType"): - stream.write(fs % msg) - else: - try: - stream.write(fs % msg) - except UnicodeError: - stream.write(fs % msg.encode("UTF-8")) - self.flush() - except: - self.handleError(record) diff --git a/lib/cherrypy/_cpmodpy.py b/lib/cherrypy/_cpmodpy.py deleted file mode 100644 index 02154d69..00000000 --- a/lib/cherrypy/_cpmodpy.py +++ /dev/null @@ -1,353 +0,0 @@ -"""Native adapter for serving CherryPy via mod_python - -Basic usage: - -########################################## -# Application in a module called myapp.py -########################################## - -import cherrypy - -class Root: - @cherrypy.expose - def index(self): - return 'Hi there, Ho there, Hey there' - - -# We will use this method from the mod_python configuration -# as the entry point to our application -def setup_server(): - cherrypy.tree.mount(Root()) - cherrypy.config.update({'environment': 'production', - 'log.screen': False, - 'show_tracebacks': False}) - -########################################## -# mod_python settings for apache2 -# This should reside in your httpd.conf -# or a file that will be loaded at -# apache startup -########################################## - -# Start -DocumentRoot "/" -Listen 8080 -LoadModule python_module /usr/lib/apache2/modules/mod_python.so - - - PythonPath "sys.path+['/path/to/my/application']" - SetHandler python-program - PythonHandler cherrypy._cpmodpy::handler - PythonOption cherrypy.setup myapp::setup_server - PythonDebug On - -# End - -The actual path to your mod_python.so is dependent on your -environment. In this case we suppose a global mod_python -installation on a Linux distribution such as Ubuntu. - -We do set the PythonPath configuration setting so that -your application can be found by from the user running -the apache2 instance. Of course if your application -resides in the global site-package this won't be needed. - -Then restart apache2 and access http://127.0.0.1:8080 -""" - -import logging -import sys - -import cherrypy -from cherrypy._cpcompat import BytesIO, copyitems, ntob -from cherrypy._cperror import format_exc, bare_error -from cherrypy.lib import httputil - - -# ------------------------------ Request-handling - - -def setup(req): - from mod_python import apache - - # Run any setup functions defined by a "PythonOption cherrypy.setup" - # directive. - options = req.get_options() - if 'cherrypy.setup' in options: - for function in options['cherrypy.setup'].split(): - atoms = function.split('::', 1) - if len(atoms) == 1: - mod = __import__(atoms[0], globals(), locals()) - else: - modname, fname = atoms - mod = __import__(modname, globals(), locals(), [fname]) - func = getattr(mod, fname) - func() - - cherrypy.config.update({'log.screen': False, - "tools.ignore_headers.on": True, - "tools.ignore_headers.headers": ['Range'], - }) - - engine = cherrypy.engine - if hasattr(engine, "signal_handler"): - engine.signal_handler.unsubscribe() - if hasattr(engine, "console_control_handler"): - engine.console_control_handler.unsubscribe() - engine.autoreload.unsubscribe() - cherrypy.server.unsubscribe() - - def _log(msg, level): - newlevel = apache.APLOG_ERR - if logging.DEBUG >= level: - newlevel = apache.APLOG_DEBUG - elif logging.INFO >= level: - newlevel = apache.APLOG_INFO - elif logging.WARNING >= level: - newlevel = apache.APLOG_WARNING - # On Windows, req.server is required or the msg will vanish. See - # http://www.modpython.org/pipermail/mod_python/2003-October/014291.html - # Also, "When server is not specified...LogLevel does not apply..." - apache.log_error(msg, newlevel, req.server) - engine.subscribe('log', _log) - - engine.start() - - def cherrypy_cleanup(data): - engine.exit() - try: - # apache.register_cleanup wasn't available until 3.1.4. - apache.register_cleanup(cherrypy_cleanup) - except AttributeError: - req.server.register_cleanup(req, cherrypy_cleanup) - - -class _ReadOnlyRequest: - expose = ('read', 'readline', 'readlines') - - def __init__(self, req): - for method in self.expose: - self.__dict__[method] = getattr(req, method) - - -recursive = False - -_isSetUp = False - - -def handler(req): - from mod_python import apache - try: - global _isSetUp - if not _isSetUp: - setup(req) - _isSetUp = True - - # Obtain a Request object from CherryPy - local = req.connection.local_addr - local = httputil.Host( - local[0], local[1], req.connection.local_host or "") - remote = req.connection.remote_addr - remote = httputil.Host( - remote[0], remote[1], req.connection.remote_host or "") - - scheme = req.parsed_uri[0] or 'http' - req.get_basic_auth_pw() - - try: - # apache.mpm_query only became available in mod_python 3.1 - q = apache.mpm_query - threaded = q(apache.AP_MPMQ_IS_THREADED) - forked = q(apache.AP_MPMQ_IS_FORKED) - except AttributeError: - bad_value = ("You must provide a PythonOption '%s', " - "either 'on' or 'off', when running a version " - "of mod_python < 3.1") - - threaded = options.get('multithread', '').lower() - if threaded == 'on': - threaded = True - elif threaded == 'off': - threaded = False - else: - raise ValueError(bad_value % "multithread") - - forked = options.get('multiprocess', '').lower() - if forked == 'on': - forked = True - elif forked == 'off': - forked = False - else: - raise ValueError(bad_value % "multiprocess") - - sn = cherrypy.tree.script_name(req.uri or "/") - if sn is None: - send_response(req, '404 Not Found', [], '') - else: - app = cherrypy.tree.apps[sn] - method = req.method - path = req.uri - qs = req.args or "" - reqproto = req.protocol - headers = copyitems(req.headers_in) - rfile = _ReadOnlyRequest(req) - prev = None - - try: - redirections = [] - while True: - request, response = app.get_serving(local, remote, scheme, - "HTTP/1.1") - request.login = req.user - request.multithread = bool(threaded) - request.multiprocess = bool(forked) - request.app = app - request.prev = prev - - # Run the CherryPy Request object and obtain the response - try: - request.run(method, path, qs, reqproto, headers, rfile) - break - except cherrypy.InternalRedirect: - ir = sys.exc_info()[1] - app.release_serving() - prev = request - - if not recursive: - if ir.path in redirections: - raise RuntimeError( - "InternalRedirector visited the same URL " - "twice: %r" % ir.path) - else: - # Add the *previous* path_info + qs to - # redirections. - if qs: - qs = "?" + qs - redirections.append(sn + path + qs) - - # Munge environment and try again. - method = "GET" - path = ir.path - qs = ir.query_string - rfile = BytesIO() - - send_response( - req, response.output_status, response.header_list, - response.body, response.stream) - finally: - app.release_serving() - except: - tb = format_exc() - cherrypy.log(tb, 'MOD_PYTHON', severity=logging.ERROR) - s, h, b = bare_error() - send_response(req, s, h, b) - return apache.OK - - -def send_response(req, status, headers, body, stream=False): - # Set response status - req.status = int(status[:3]) - - # Set response headers - req.content_type = "text/plain" - for header, value in headers: - if header.lower() == 'content-type': - req.content_type = value - continue - req.headers_out.add(header, value) - - if stream: - # Flush now so the status and headers are sent immediately. - req.flush() - - # Set response body - if isinstance(body, basestring): - req.write(body) - else: - for seg in body: - req.write(seg) - - -# --------------- Startup tools for CherryPy + mod_python --------------- # -import os -import re -try: - import subprocess - - def popen(fullcmd): - p = subprocess.Popen(fullcmd, shell=True, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - close_fds=True) - return p.stdout -except ImportError: - def popen(fullcmd): - pipein, pipeout = os.popen4(fullcmd) - return pipeout - - -def read_process(cmd, args=""): - fullcmd = "%s %s" % (cmd, args) - pipeout = popen(fullcmd) - try: - firstline = pipeout.readline() - cmd_not_found = re.search( - ntob("(not recognized|No such file|not found)"), - firstline, - re.IGNORECASE - ) - if cmd_not_found: - raise IOError('%s must be on your system path.' % cmd) - output = firstline + pipeout.read() - finally: - pipeout.close() - return output - - -class ModPythonServer(object): - - template = """ -# Apache2 server configuration file for running CherryPy with mod_python. - -DocumentRoot "/" -Listen %(port)s -LoadModule python_module modules/mod_python.so - - - SetHandler python-program - PythonHandler %(handler)s - PythonDebug On -%(opts)s - -""" - - def __init__(self, loc="/", port=80, opts=None, apache_path="apache", - handler="cherrypy._cpmodpy::handler"): - self.loc = loc - self.port = port - self.opts = opts - self.apache_path = apache_path - self.handler = handler - - def start(self): - opts = "".join([" PythonOption %s %s\n" % (k, v) - for k, v in self.opts]) - conf_data = self.template % {"port": self.port, - "loc": self.loc, - "opts": opts, - "handler": self.handler, - } - - mpconf = os.path.join(os.path.dirname(__file__), "cpmodpy.conf") - f = open(mpconf, 'wb') - try: - f.write(conf_data) - finally: - f.close() - - response = read_process(self.apache_path, "-k start -f %s" % mpconf) - self.ready = True - return response - - def stop(self): - os.popen("apache -k stop") - self.ready = False diff --git a/lib/cherrypy/_cpnative_server.py b/lib/cherrypy/_cpnative_server.py deleted file mode 100644 index e303573d..00000000 --- a/lib/cherrypy/_cpnative_server.py +++ /dev/null @@ -1,154 +0,0 @@ -"""Native adapter for serving CherryPy via its builtin server.""" - -import logging -import sys - -import cherrypy -from cherrypy._cpcompat import BytesIO -from cherrypy._cperror import format_exc, bare_error -from cherrypy.lib import httputil -from cherrypy import wsgiserver - - -class NativeGateway(wsgiserver.Gateway): - - recursive = False - - def respond(self): - req = self.req - try: - # Obtain a Request object from CherryPy - local = req.server.bind_addr - local = httputil.Host(local[0], local[1], "") - remote = req.conn.remote_addr, req.conn.remote_port - remote = httputil.Host(remote[0], remote[1], "") - - scheme = req.scheme - sn = cherrypy.tree.script_name(req.uri or "/") - if sn is None: - self.send_response('404 Not Found', [], ['']) - else: - app = cherrypy.tree.apps[sn] - method = req.method - path = req.path - qs = req.qs or "" - headers = req.inheaders.items() - rfile = req.rfile - prev = None - - try: - redirections = [] - while True: - request, response = app.get_serving( - local, remote, scheme, "HTTP/1.1") - request.multithread = True - request.multiprocess = False - request.app = app - request.prev = prev - - # Run the CherryPy Request object and obtain the - # response - try: - request.run(method, path, qs, - req.request_protocol, headers, rfile) - break - except cherrypy.InternalRedirect: - ir = sys.exc_info()[1] - app.release_serving() - prev = request - - if not self.recursive: - if ir.path in redirections: - raise RuntimeError( - "InternalRedirector visited the same " - "URL twice: %r" % ir.path) - else: - # Add the *previous* path_info + qs to - # redirections. - if qs: - qs = "?" + qs - redirections.append(sn + path + qs) - - # Munge environment and try again. - method = "GET" - path = ir.path - qs = ir.query_string - rfile = BytesIO() - - self.send_response( - response.output_status, response.header_list, - response.body) - finally: - app.release_serving() - except: - tb = format_exc() - # print tb - cherrypy.log(tb, 'NATIVE_ADAPTER', severity=logging.ERROR) - s, h, b = bare_error() - self.send_response(s, h, b) - - def send_response(self, status, headers, body): - req = self.req - - # Set response status - req.status = str(status or "500 Server Error") - - # Set response headers - for header, value in headers: - req.outheaders.append((header, value)) - if (req.ready and not req.sent_headers): - req.sent_headers = True - req.send_headers() - - # Set response body - for seg in body: - req.write(seg) - - -class CPHTTPServer(wsgiserver.HTTPServer): - - """Wrapper for wsgiserver.HTTPServer. - - wsgiserver has been designed to not reference CherryPy in any way, - so that it can be used in other frameworks and applications. - Therefore, we wrap it here, so we can apply some attributes - from config -> cherrypy.server -> HTTPServer. - """ - - def __init__(self, server_adapter=cherrypy.server): - self.server_adapter = server_adapter - - server_name = (self.server_adapter.socket_host or - self.server_adapter.socket_file or - None) - - wsgiserver.HTTPServer.__init__( - self, server_adapter.bind_addr, NativeGateway, - minthreads=server_adapter.thread_pool, - maxthreads=server_adapter.thread_pool_max, - server_name=server_name) - - self.max_request_header_size = ( - self.server_adapter.max_request_header_size or 0) - self.max_request_body_size = ( - self.server_adapter.max_request_body_size or 0) - self.request_queue_size = self.server_adapter.socket_queue_size - self.timeout = self.server_adapter.socket_timeout - self.shutdown_timeout = self.server_adapter.shutdown_timeout - self.protocol = self.server_adapter.protocol_version - self.nodelay = self.server_adapter.nodelay - - ssl_module = self.server_adapter.ssl_module or 'pyopenssl' - if self.server_adapter.ssl_context: - adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) - self.ssl_adapter = adapter_class( - self.server_adapter.ssl_certificate, - self.server_adapter.ssl_private_key, - self.server_adapter.ssl_certificate_chain) - self.ssl_adapter.context = self.server_adapter.ssl_context - elif self.server_adapter.ssl_certificate: - adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) - self.ssl_adapter = adapter_class( - self.server_adapter.ssl_certificate, - self.server_adapter.ssl_private_key, - self.server_adapter.ssl_certificate_chain) diff --git a/lib/cherrypy/_cpreqbody.py b/lib/cherrypy/_cpreqbody.py deleted file mode 100644 index d2dbbc92..00000000 --- a/lib/cherrypy/_cpreqbody.py +++ /dev/null @@ -1,1013 +0,0 @@ -"""Request body processing for CherryPy. - -.. versionadded:: 3.2 - -Application authors have complete control over the parsing of HTTP request -entities. In short, -:attr:`cherrypy.request.body` -is now always set to an instance of -:class:`RequestBody`, -and *that* class is a subclass of :class:`Entity`. - -When an HTTP request includes an entity body, it is often desirable to -provide that information to applications in a form other than the raw bytes. -Different content types demand different approaches. Examples: - - * For a GIF file, we want the raw bytes in a stream. - * An HTML form is better parsed into its component fields, and each text field - decoded from bytes to unicode. - * A JSON body should be deserialized into a Python dict or list. - -When the request contains a Content-Type header, the media type is used as a -key to look up a value in the -:attr:`request.body.processors` dict. -If the full media -type is not found, then the major type is tried; for example, if no processor -is found for the 'image/jpeg' type, then we look for a processor for the -'image' types altogether. If neither the full type nor the major type has a -matching processor, then a default processor is used -(:func:`default_proc`). For most -types, this means no processing is done, and the body is left unread as a -raw byte stream. Processors are configurable in an 'on_start_resource' hook. - -Some processors, especially those for the 'text' types, attempt to decode bytes -to unicode. If the Content-Type request header includes a 'charset' parameter, -this is used to decode the entity. Otherwise, one or more default charsets may -be attempted, although this decision is up to each processor. If a processor -successfully decodes an Entity or Part, it should set the -:attr:`charset` attribute -on the Entity or Part to the name of the successful charset, so that -applications can easily re-encode or transcode the value if they wish. - -If the Content-Type of the request entity is of major type 'multipart', then -the above parsing process, and possibly a decoding process, is performed for -each part. - -For both the full entity and multipart parts, a Content-Disposition header may -be used to fill :attr:`name` and -:attr:`filename` attributes on the -request.body or the Part. - -.. _custombodyprocessors: - -Custom Processors -================= - -You can add your own processors for any specific or major MIME type. Simply add -it to the :attr:`processors` dict in a -hook/tool that runs at ``on_start_resource`` or ``before_request_body``. -Here's the built-in JSON tool for an example:: - - def json_in(force=True, debug=False): - request = cherrypy.serving.request - def json_processor(entity): - \"""Read application/json data into request.json.\""" - if not entity.headers.get("Content-Length", ""): - raise cherrypy.HTTPError(411) - - body = entity.fp.read() - try: - request.json = json_decode(body) - except ValueError: - raise cherrypy.HTTPError(400, 'Invalid JSON document') - if force: - request.body.processors.clear() - request.body.default_proc = cherrypy.HTTPError( - 415, 'Expected an application/json content type') - request.body.processors['application/json'] = json_processor - -We begin by defining a new ``json_processor`` function to stick in the -``processors`` dictionary. All processor functions take a single argument, -the ``Entity`` instance they are to process. It will be called whenever a -request is received (for those URI's where the tool is turned on) which -has a ``Content-Type`` of "application/json". - -First, it checks for a valid ``Content-Length`` (raising 411 if not valid), -then reads the remaining bytes on the socket. The ``fp`` object knows its -own length, so it won't hang waiting for data that never arrives. It will -return when all data has been read. Then, we decode those bytes using -Python's built-in ``json`` module, and stick the decoded result onto -``request.json`` . If it cannot be decoded, we raise 400. - -If the "force" argument is True (the default), the ``Tool`` clears the -``processors`` dict so that request entities of other ``Content-Types`` -aren't parsed at all. Since there's no entry for those invalid MIME -types, the ``default_proc`` method of ``cherrypy.request.body`` is -called. But this does nothing by default (usually to provide the page -handler an opportunity to handle it.) -But in our case, we want to raise 415, so we replace -``request.body.default_proc`` -with the error (``HTTPError`` instances, when called, raise themselves). - -If we were defining a custom processor, we can do so without making a ``Tool``. -Just add the config entry:: - - request.body.processors = {'application/json': json_processor} - -Note that you can only replace the ``processors`` dict wholesale this way, -not update the existing one. -""" - -try: - from io import DEFAULT_BUFFER_SIZE -except ImportError: - DEFAULT_BUFFER_SIZE = 8192 -import re -import sys -import tempfile -try: - from urllib import unquote_plus -except ImportError: - def unquote_plus(bs): - """Bytes version of urllib.parse.unquote_plus.""" - bs = bs.replace(ntob('+'), ntob(' ')) - atoms = bs.split(ntob('%')) - for i in range(1, len(atoms)): - item = atoms[i] - try: - pct = int(item[:2], 16) - atoms[i] = bytes([pct]) + item[2:] - except ValueError: - pass - return ntob('').join(atoms) - -import cherrypy -from cherrypy._cpcompat import basestring, ntob, ntou -from cherrypy.lib import httputil - - -# ------------------------------- Processors -------------------------------- # - -def process_urlencoded(entity): - """Read application/x-www-form-urlencoded data into entity.params.""" - qs = entity.fp.read() - for charset in entity.attempt_charsets: - try: - params = {} - for aparam in qs.split(ntob('&')): - for pair in aparam.split(ntob(';')): - if not pair: - continue - - atoms = pair.split(ntob('='), 1) - if len(atoms) == 1: - atoms.append(ntob('')) - - key = unquote_plus(atoms[0]).decode(charset) - value = unquote_plus(atoms[1]).decode(charset) - - if key in params: - if not isinstance(params[key], list): - params[key] = [params[key]] - params[key].append(value) - else: - params[key] = value - except UnicodeDecodeError: - pass - else: - entity.charset = charset - break - else: - raise cherrypy.HTTPError( - 400, "The request entity could not be decoded. The following " - "charsets were attempted: %s" % repr(entity.attempt_charsets)) - - # Now that all values have been successfully parsed and decoded, - # apply them to the entity.params dict. - for key, value in params.items(): - if key in entity.params: - if not isinstance(entity.params[key], list): - entity.params[key] = [entity.params[key]] - entity.params[key].append(value) - else: - entity.params[key] = value - - -def process_multipart(entity): - """Read all multipart parts into entity.parts.""" - ib = "" - if 'boundary' in entity.content_type.params: - # http://tools.ietf.org/html/rfc2046#section-5.1.1 - # "The grammar for parameters on the Content-type field is such that it - # is often necessary to enclose the boundary parameter values in quotes - # on the Content-type line" - ib = entity.content_type.params['boundary'].strip('"') - - if not re.match("^[ -~]{0,200}[!-~]$", ib): - raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) - - ib = ('--' + ib).encode('ascii') - - # Find the first marker - while True: - b = entity.readline() - if not b: - return - - b = b.strip() - if b == ib: - break - - # Read all parts - while True: - part = entity.part_class.from_fp(entity.fp, ib) - entity.parts.append(part) - part.process() - if part.fp.done: - break - - -def process_multipart_form_data(entity): - """Read all multipart/form-data parts into entity.parts or entity.params. - """ - process_multipart(entity) - - kept_parts = [] - for part in entity.parts: - if part.name is None: - kept_parts.append(part) - else: - if part.filename is None: - # It's a regular field - value = part.fullvalue() - else: - # It's a file upload. Retain the whole part so consumer code - # has access to its .file and .filename attributes. - value = part - - if part.name in entity.params: - if not isinstance(entity.params[part.name], list): - entity.params[part.name] = [entity.params[part.name]] - entity.params[part.name].append(value) - else: - entity.params[part.name] = value - - entity.parts = kept_parts - - -def _old_process_multipart(entity): - """The behavior of 3.2 and lower. Deprecated and will be changed in 3.3.""" - process_multipart(entity) - - params = entity.params - - for part in entity.parts: - if part.name is None: - key = ntou('parts') - else: - key = part.name - - if part.filename is None: - # It's a regular field - value = part.fullvalue() - else: - # It's a file upload. Retain the whole part so consumer code - # has access to its .file and .filename attributes. - value = part - - if key in params: - if not isinstance(params[key], list): - params[key] = [params[key]] - params[key].append(value) - else: - params[key] = value - - -# -------------------------------- Entities --------------------------------- # -class Entity(object): - - """An HTTP request body, or MIME multipart body. - - This class collects information about the HTTP request entity. When a - given entity is of MIME type "multipart", each part is parsed into its own - Entity instance, and the set of parts stored in - :attr:`entity.parts`. - - Between the ``before_request_body`` and ``before_handler`` tools, CherryPy - tries to process the request body (if any) by calling - :func:`request.body.process`. - This uses the ``content_type`` of the Entity to look up a suitable - processor in - :attr:`Entity.processors`, - a dict. - If a matching processor cannot be found for the complete Content-Type, - it tries again using the major type. For example, if a request with an - entity of type "image/jpeg" arrives, but no processor can be found for - that complete type, then one is sought for the major type "image". If a - processor is still not found, then the - :func:`default_proc` method - of the Entity is called (which does nothing by default; you can - override this too). - - CherryPy includes processors for the "application/x-www-form-urlencoded" - type, the "multipart/form-data" type, and the "multipart" major type. - CherryPy 3.2 processes these types almost exactly as older versions. - Parts are passed as arguments to the page handler using their - ``Content-Disposition.name`` if given, otherwise in a generic "parts" - argument. Each such part is either a string, or the - :class:`Part` itself if it's a file. (In this - case it will have ``file`` and ``filename`` attributes, or possibly a - ``value`` attribute). Each Part is itself a subclass of - Entity, and has its own ``process`` method and ``processors`` dict. - - There is a separate processor for the "multipart" major type which is more - flexible, and simply stores all multipart parts in - :attr:`request.body.parts`. You can - enable it with:: - - cherrypy.request.body.processors['multipart'] = _cpreqbody.process_multipart - - in an ``on_start_resource`` tool. - """ - - # http://tools.ietf.org/html/rfc2046#section-4.1.2: - # "The default character set, which must be assumed in the - # absence of a charset parameter, is US-ASCII." - # However, many browsers send data in utf-8 with no charset. - attempt_charsets = ['utf-8'] - """A list of strings, each of which should be a known encoding. - - When the Content-Type of the request body warrants it, each of the given - encodings will be tried in order. The first one to successfully decode the - entity without raising an error is stored as - :attr:`entity.charset`. This defaults - to ``['utf-8']`` (plus 'ISO-8859-1' for "text/\*" types, as required by - `HTTP/1.1 `_), - but ``['us-ascii', 'utf-8']`` for multipart parts. - """ - - charset = None - """The successful decoding; see "attempt_charsets" above.""" - - content_type = None - """The value of the Content-Type request header. - - If the Entity is part of a multipart payload, this will be the Content-Type - given in the MIME headers for this part. - """ - - default_content_type = 'application/x-www-form-urlencoded' - """This defines a default ``Content-Type`` to use if no Content-Type header - is given. The empty string is used for RequestBody, which results in the - request body not being read or parsed at all. This is by design; a missing - ``Content-Type`` header in the HTTP request entity is an error at best, - and a security hole at worst. For multipart parts, however, the MIME spec - declares that a part with no Content-Type defaults to "text/plain" - (see :class:`Part`). - """ - - filename = None - """The ``Content-Disposition.filename`` header, if available.""" - - fp = None - """The readable socket file object.""" - - headers = None - """A dict of request/multipart header names and values. - - This is a copy of the ``request.headers`` for the ``request.body``; - for multipart parts, it is the set of headers for that part. - """ - - length = None - """The value of the ``Content-Length`` header, if provided.""" - - name = None - """The "name" parameter of the ``Content-Disposition`` header, if any.""" - - params = None - """ - If the request Content-Type is 'application/x-www-form-urlencoded' or - multipart, this will be a dict of the params pulled from the entity - body; that is, it will be the portion of request.params that come - from the message body (sometimes called "POST params", although they - can be sent with various HTTP method verbs). This value is set between - the 'before_request_body' and 'before_handler' hooks (assuming that - process_request_body is True).""" - - processors = {'application/x-www-form-urlencoded': process_urlencoded, - 'multipart/form-data': process_multipart_form_data, - 'multipart': process_multipart, - } - """A dict of Content-Type names to processor methods.""" - - parts = None - """A list of Part instances if ``Content-Type`` is of major type - "multipart".""" - - part_class = None - """The class used for multipart parts. - - You can replace this with custom subclasses to alter the processing of - multipart parts. - """ - - def __init__(self, fp, headers, params=None, parts=None): - # Make an instance-specific copy of the class processors - # so Tools, etc. can replace them per-request. - self.processors = self.processors.copy() - - self.fp = fp - self.headers = headers - - if params is None: - params = {} - self.params = params - - if parts is None: - parts = [] - self.parts = parts - - # Content-Type - self.content_type = headers.elements('Content-Type') - if self.content_type: - self.content_type = self.content_type[0] - else: - self.content_type = httputil.HeaderElement.from_str( - self.default_content_type) - - # Copy the class 'attempt_charsets', prepending any Content-Type - # charset - dec = self.content_type.params.get("charset", None) - if dec: - self.attempt_charsets = [dec] + [c for c in self.attempt_charsets - if c != dec] - else: - self.attempt_charsets = self.attempt_charsets[:] - - # Length - self.length = None - clen = headers.get('Content-Length', None) - # If Transfer-Encoding is 'chunked', ignore any Content-Length. - if ( - clen is not None and - 'chunked' not in headers.get('Transfer-Encoding', '') - ): - try: - self.length = int(clen) - except ValueError: - pass - - # Content-Disposition - self.name = None - self.filename = None - disp = headers.elements('Content-Disposition') - if disp: - disp = disp[0] - if 'name' in disp.params: - self.name = disp.params['name'] - if self.name.startswith('"') and self.name.endswith('"'): - self.name = self.name[1:-1] - if 'filename' in disp.params: - self.filename = disp.params['filename'] - if ( - self.filename.startswith('"') and - self.filename.endswith('"') - ): - self.filename = self.filename[1:-1] - - # The 'type' attribute is deprecated in 3.2; remove it in 3.3. - type = property( - lambda self: self.content_type, - doc="A deprecated alias for " - ":attr:`content_type`." - ) - - def read(self, size=None, fp_out=None): - return self.fp.read(size, fp_out) - - def readline(self, size=None): - return self.fp.readline(size) - - def readlines(self, sizehint=None): - return self.fp.readlines(sizehint) - - def __iter__(self): - return self - - def __next__(self): - line = self.readline() - if not line: - raise StopIteration - return line - - def next(self): - return self.__next__() - - def read_into_file(self, fp_out=None): - """Read the request body into fp_out (or make_file() if None). - - Return fp_out. - """ - if fp_out is None: - fp_out = self.make_file() - self.read(fp_out=fp_out) - return fp_out - - def make_file(self): - """Return a file-like object into which the request body will be read. - - By default, this will return a TemporaryFile. Override as needed. - See also :attr:`cherrypy._cpreqbody.Part.maxrambytes`.""" - return tempfile.TemporaryFile() - - def fullvalue(self): - """Return this entity as a string, whether stored in a file or not.""" - if self.file: - # It was stored in a tempfile. Read it. - self.file.seek(0) - value = self.file.read() - self.file.seek(0) - else: - value = self.value - return value - - def process(self): - """Execute the best-match processor for the given media type.""" - proc = None - ct = self.content_type.value - try: - proc = self.processors[ct] - except KeyError: - toptype = ct.split('/', 1)[0] - try: - proc = self.processors[toptype] - except KeyError: - pass - if proc is None: - self.default_proc() - else: - proc(self) - - def default_proc(self): - """Called if a more-specific processor is not found for the - ``Content-Type``. - """ - # Leave the fp alone for someone else to read. This works fine - # for request.body, but the Part subclasses need to override this - # so they can move on to the next part. - pass - - -class Part(Entity): - - """A MIME part entity, part of a multipart entity.""" - - # "The default character set, which must be assumed in the absence of a - # charset parameter, is US-ASCII." - attempt_charsets = ['us-ascii', 'utf-8'] - """A list of strings, each of which should be a known encoding. - - When the Content-Type of the request body warrants it, each of the given - encodings will be tried in order. The first one to successfully decode the - entity without raising an error is stored as - :attr:`entity.charset`. This defaults - to ``['utf-8']`` (plus 'ISO-8859-1' for "text/\*" types, as required by - `HTTP/1.1 `_), - but ``['us-ascii', 'utf-8']`` for multipart parts. - """ - - boundary = None - """The MIME multipart boundary.""" - - default_content_type = 'text/plain' - """This defines a default ``Content-Type`` to use if no Content-Type header - is given. The empty string is used for RequestBody, which results in the - request body not being read or parsed at all. This is by design; a missing - ``Content-Type`` header in the HTTP request entity is an error at best, - and a security hole at worst. For multipart parts, however (this class), - the MIME spec declares that a part with no Content-Type defaults to - "text/plain". - """ - - # This is the default in stdlib cgi. We may want to increase it. - maxrambytes = 1000 - """The threshold of bytes after which point the ``Part`` will store - its data in a file (generated by - :func:`make_file`) - instead of a string. Defaults to 1000, just like the :mod:`cgi` - module in Python's standard library. - """ - - def __init__(self, fp, headers, boundary): - Entity.__init__(self, fp, headers) - self.boundary = boundary - self.file = None - self.value = None - - def from_fp(cls, fp, boundary): - headers = cls.read_headers(fp) - return cls(fp, headers, boundary) - from_fp = classmethod(from_fp) - - def read_headers(cls, fp): - headers = httputil.HeaderMap() - while True: - line = fp.readline() - if not line: - # No more data--illegal end of headers - raise EOFError("Illegal end of headers.") - - if line == ntob('\r\n'): - # Normal end of headers - break - if not line.endswith(ntob('\r\n')): - raise ValueError("MIME requires CRLF terminators: %r" % line) - - if line[0] in ntob(' \t'): - # It's a continuation line. - v = line.strip().decode('ISO-8859-1') - else: - k, v = line.split(ntob(":"), 1) - k = k.strip().decode('ISO-8859-1') - v = v.strip().decode('ISO-8859-1') - - existing = headers.get(k) - if existing: - v = ", ".join((existing, v)) - headers[k] = v - - return headers - read_headers = classmethod(read_headers) - - def read_lines_to_boundary(self, fp_out=None): - """Read bytes from self.fp and return or write them to a file. - - If the 'fp_out' argument is None (the default), all bytes read are - returned in a single byte string. - - If the 'fp_out' argument is not None, it must be a file-like - object that supports the 'write' method; all bytes read will be - written to the fp, and that fp is returned. - """ - endmarker = self.boundary + ntob("--") - delim = ntob("") - prev_lf = True - lines = [] - seen = 0 - while True: - line = self.fp.readline(1 << 16) - if not line: - raise EOFError("Illegal end of multipart body.") - if line.startswith(ntob("--")) and prev_lf: - strippedline = line.strip() - if strippedline == self.boundary: - break - if strippedline == endmarker: - self.fp.finish() - break - - line = delim + line - - if line.endswith(ntob("\r\n")): - delim = ntob("\r\n") - line = line[:-2] - prev_lf = True - elif line.endswith(ntob("\n")): - delim = ntob("\n") - line = line[:-1] - prev_lf = True - else: - delim = ntob("") - prev_lf = False - - if fp_out is None: - lines.append(line) - seen += len(line) - if seen > self.maxrambytes: - fp_out = self.make_file() - for line in lines: - fp_out.write(line) - else: - fp_out.write(line) - - if fp_out is None: - result = ntob('').join(lines) - for charset in self.attempt_charsets: - try: - result = result.decode(charset) - except UnicodeDecodeError: - pass - else: - self.charset = charset - return result - else: - raise cherrypy.HTTPError( - 400, - "The request entity could not be decoded. The following " - "charsets were attempted: %s" % repr(self.attempt_charsets) - ) - else: - fp_out.seek(0) - return fp_out - - def default_proc(self): - """Called if a more-specific processor is not found for the - ``Content-Type``. - """ - if self.filename: - # Always read into a file if a .filename was given. - self.file = self.read_into_file() - else: - result = self.read_lines_to_boundary() - if isinstance(result, basestring): - self.value = result - else: - self.file = result - - def read_into_file(self, fp_out=None): - """Read the request body into fp_out (or make_file() if None). - - Return fp_out. - """ - if fp_out is None: - fp_out = self.make_file() - self.read_lines_to_boundary(fp_out=fp_out) - return fp_out - -Entity.part_class = Part - -try: - inf = float('inf') -except ValueError: - # Python 2.4 and lower - class Infinity(object): - - def __cmp__(self, other): - return 1 - - def __sub__(self, other): - return self - inf = Infinity() - - -comma_separated_headers = [ - 'Accept', 'Accept-Charset', 'Accept-Encoding', - 'Accept-Language', 'Accept-Ranges', 'Allow', - 'Cache-Control', 'Connection', 'Content-Encoding', - 'Content-Language', 'Expect', 'If-Match', - 'If-None-Match', 'Pragma', 'Proxy-Authenticate', - 'Te', 'Trailer', 'Transfer-Encoding', 'Upgrade', - 'Vary', 'Via', 'Warning', 'Www-Authenticate' -] - - -class SizedReader: - - def __init__(self, fp, length, maxbytes, bufsize=DEFAULT_BUFFER_SIZE, - has_trailers=False): - # Wrap our fp in a buffer so peek() works - self.fp = fp - self.length = length - self.maxbytes = maxbytes - self.buffer = ntob('') - self.bufsize = bufsize - self.bytes_read = 0 - self.done = False - self.has_trailers = has_trailers - - def read(self, size=None, fp_out=None): - """Read bytes from the request body and return or write them to a file. - - A number of bytes less than or equal to the 'size' argument are read - off the socket. The actual number of bytes read are tracked in - self.bytes_read. The number may be smaller than 'size' when 1) the - client sends fewer bytes, 2) the 'Content-Length' request header - specifies fewer bytes than requested, or 3) the number of bytes read - exceeds self.maxbytes (in which case, 413 is raised). - - If the 'fp_out' argument is None (the default), all bytes read are - returned in a single byte string. - - If the 'fp_out' argument is not None, it must be a file-like - object that supports the 'write' method; all bytes read will be - written to the fp, and None is returned. - """ - - if self.length is None: - if size is None: - remaining = inf - else: - remaining = size - else: - remaining = self.length - self.bytes_read - if size and size < remaining: - remaining = size - if remaining == 0: - self.finish() - if fp_out is None: - return ntob('') - else: - return None - - chunks = [] - - # Read bytes from the buffer. - if self.buffer: - if remaining is inf: - data = self.buffer - self.buffer = ntob('') - else: - data = self.buffer[:remaining] - self.buffer = self.buffer[remaining:] - datalen = len(data) - remaining -= datalen - - # Check lengths. - self.bytes_read += datalen - if self.maxbytes and self.bytes_read > self.maxbytes: - raise cherrypy.HTTPError(413) - - # Store the data. - if fp_out is None: - chunks.append(data) - else: - fp_out.write(data) - - # Read bytes from the socket. - while remaining > 0: - chunksize = min(remaining, self.bufsize) - try: - data = self.fp.read(chunksize) - except Exception: - e = sys.exc_info()[1] - if e.__class__.__name__ == 'MaxSizeExceeded': - # Post data is too big - raise cherrypy.HTTPError( - 413, "Maximum request length: %r" % e.args[1]) - else: - raise - if not data: - self.finish() - break - datalen = len(data) - remaining -= datalen - - # Check lengths. - self.bytes_read += datalen - if self.maxbytes and self.bytes_read > self.maxbytes: - raise cherrypy.HTTPError(413) - - # Store the data. - if fp_out is None: - chunks.append(data) - else: - fp_out.write(data) - - if fp_out is None: - return ntob('').join(chunks) - - def readline(self, size=None): - """Read a line from the request body and return it.""" - chunks = [] - while size is None or size > 0: - chunksize = self.bufsize - if size is not None and size < self.bufsize: - chunksize = size - data = self.read(chunksize) - if not data: - break - pos = data.find(ntob('\n')) + 1 - if pos: - chunks.append(data[:pos]) - remainder = data[pos:] - self.buffer += remainder - self.bytes_read -= len(remainder) - break - else: - chunks.append(data) - return ntob('').join(chunks) - - def readlines(self, sizehint=None): - """Read lines from the request body and return them.""" - if self.length is not None: - if sizehint is None: - sizehint = self.length - self.bytes_read - else: - sizehint = min(sizehint, self.length - self.bytes_read) - - lines = [] - seen = 0 - while True: - line = self.readline() - if not line: - break - lines.append(line) - seen += len(line) - if seen >= sizehint: - break - return lines - - def finish(self): - self.done = True - if self.has_trailers and hasattr(self.fp, 'read_trailer_lines'): - self.trailers = {} - - try: - for line in self.fp.read_trailer_lines(): - if line[0] in ntob(' \t'): - # It's a continuation line. - v = line.strip() - else: - try: - k, v = line.split(ntob(":"), 1) - except ValueError: - raise ValueError("Illegal header line.") - k = k.strip().title() - v = v.strip() - - if k in comma_separated_headers: - existing = self.trailers.get(envname) - if existing: - v = ntob(", ").join((existing, v)) - self.trailers[k] = v - except Exception: - e = sys.exc_info()[1] - if e.__class__.__name__ == 'MaxSizeExceeded': - # Post data is too big - raise cherrypy.HTTPError( - 413, "Maximum request length: %r" % e.args[1]) - else: - raise - - -class RequestBody(Entity): - - """The entity of the HTTP request.""" - - bufsize = 8 * 1024 - """The buffer size used when reading the socket.""" - - # Don't parse the request body at all if the client didn't provide - # a Content-Type header. See - # https://bitbucket.org/cherrypy/cherrypy/issue/790 - default_content_type = '' - """This defines a default ``Content-Type`` to use if no Content-Type header - is given. The empty string is used for RequestBody, which results in the - request body not being read or parsed at all. This is by design; a missing - ``Content-Type`` header in the HTTP request entity is an error at best, - and a security hole at worst. For multipart parts, however, the MIME spec - declares that a part with no Content-Type defaults to "text/plain" - (see :class:`Part`). - """ - - maxbytes = None - """Raise ``MaxSizeExceeded`` if more bytes than this are read from - the socket. - """ - - def __init__(self, fp, headers, params=None, request_params=None): - Entity.__init__(self, fp, headers, params) - - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 - # When no explicit charset parameter is provided by the - # sender, media subtypes of the "text" type are defined - # to have a default charset value of "ISO-8859-1" when - # received via HTTP. - if self.content_type.value.startswith('text/'): - for c in ('ISO-8859-1', 'iso-8859-1', 'Latin-1', 'latin-1'): - if c in self.attempt_charsets: - break - else: - self.attempt_charsets.append('ISO-8859-1') - - # Temporary fix while deprecating passing .parts as .params. - self.processors['multipart'] = _old_process_multipart - - if request_params is None: - request_params = {} - self.request_params = request_params - - def process(self): - """Process the request entity based on its Content-Type.""" - # "The presence of a message-body in a request is signaled by the - # inclusion of a Content-Length or Transfer-Encoding header field in - # the request's message-headers." - # It is possible to send a POST request with no body, for example; - # however, app developers are responsible in that case to set - # cherrypy.request.process_body to False so this method isn't called. - h = cherrypy.serving.request.headers - if 'Content-Length' not in h and 'Transfer-Encoding' not in h: - raise cherrypy.HTTPError(411) - - self.fp = SizedReader(self.fp, self.length, - self.maxbytes, bufsize=self.bufsize, - has_trailers='Trailer' in h) - super(RequestBody, self).process() - - # Body params should also be a part of the request_params - # add them in here. - request_params = self.request_params - for key, value in self.params.items(): - # Python 2 only: keyword arguments must be byte strings (type - # 'str'). - if sys.version_info < (3, 0): - if isinstance(key, unicode): - key = key.encode('ISO-8859-1') - - if key in request_params: - if not isinstance(request_params[key], list): - request_params[key] = [request_params[key]] - request_params[key].append(value) - else: - request_params[key] = value diff --git a/lib/cherrypy/_cprequest.py b/lib/cherrypy/_cprequest.py deleted file mode 100644 index 290bd2eb..00000000 --- a/lib/cherrypy/_cprequest.py +++ /dev/null @@ -1,973 +0,0 @@ - -import os -import sys -import time -import warnings - -import cherrypy -from cherrypy._cpcompat import basestring, copykeys, ntob, unicodestr -from cherrypy._cpcompat import SimpleCookie, CookieError, py3k -from cherrypy import _cpreqbody, _cpconfig -from cherrypy._cperror import format_exc, bare_error -from cherrypy.lib import httputil, file_generator - - -class Hook(object): - - """A callback and its metadata: failsafe, priority, and kwargs.""" - - callback = None - """ - The bare callable that this Hook object is wrapping, which will - be called when the Hook is called.""" - - failsafe = False - """ - If True, the callback is guaranteed to run even if other callbacks - from the same call point raise exceptions.""" - - priority = 50 - """ - Defines the order of execution for a list of Hooks. Priority numbers - should be limited to the closed interval [0, 100], but values outside - this range are acceptable, as are fractional values.""" - - kwargs = {} - """ - A set of keyword arguments that will be passed to the - callable on each call.""" - - def __init__(self, callback, failsafe=None, priority=None, **kwargs): - self.callback = callback - - if failsafe is None: - failsafe = getattr(callback, "failsafe", False) - self.failsafe = failsafe - - if priority is None: - priority = getattr(callback, "priority", 50) - self.priority = priority - - self.kwargs = kwargs - - def __lt__(self, other): - # Python 3 - return self.priority < other.priority - - def __cmp__(self, other): - # Python 2 - return cmp(self.priority, other.priority) - - def __call__(self): - """Run self.callback(**self.kwargs).""" - return self.callback(**self.kwargs) - - def __repr__(self): - cls = self.__class__ - return ("%s.%s(callback=%r, failsafe=%r, priority=%r, %s)" - % (cls.__module__, cls.__name__, self.callback, - self.failsafe, self.priority, - ", ".join(['%s=%r' % (k, v) - for k, v in self.kwargs.items()]))) - - -class HookMap(dict): - - """A map of call points to lists of callbacks (Hook objects).""" - - def __new__(cls, points=None): - d = dict.__new__(cls) - for p in points or []: - d[p] = [] - return d - - def __init__(self, *a, **kw): - pass - - def attach(self, point, callback, failsafe=None, priority=None, **kwargs): - """Append a new Hook made from the supplied arguments.""" - self[point].append(Hook(callback, failsafe, priority, **kwargs)) - - def run(self, point): - """Execute all registered Hooks (callbacks) for the given point.""" - exc = None - hooks = self[point] - hooks.sort() - for hook in hooks: - # Some hooks are guaranteed to run even if others at - # the same hookpoint fail. We will still log the failure, - # but proceed on to the next hook. The only way - # to stop all processing from one of these hooks is - # to raise SystemExit and stop the whole server. - if exc is None or hook.failsafe: - try: - hook() - except (KeyboardInterrupt, SystemExit): - raise - except (cherrypy.HTTPError, cherrypy.HTTPRedirect, - cherrypy.InternalRedirect): - exc = sys.exc_info()[1] - except: - exc = sys.exc_info()[1] - cherrypy.log(traceback=True, severity=40) - if exc: - raise exc - - def __copy__(self): - newmap = self.__class__() - # We can't just use 'update' because we want copies of the - # mutable values (each is a list) as well. - for k, v in self.items(): - newmap[k] = v[:] - return newmap - copy = __copy__ - - def __repr__(self): - cls = self.__class__ - return "%s.%s(points=%r)" % ( - cls.__module__, - cls.__name__, - copykeys(self) - ) - - -# Config namespace handlers - -def hooks_namespace(k, v): - """Attach bare hooks declared in config.""" - # Use split again to allow multiple hooks for a single - # hookpoint per path (e.g. "hooks.before_handler.1"). - # Little-known fact you only get from reading source ;) - hookpoint = k.split(".", 1)[0] - if isinstance(v, basestring): - v = cherrypy.lib.attributes(v) - if not isinstance(v, Hook): - v = Hook(v) - cherrypy.serving.request.hooks[hookpoint].append(v) - - -def request_namespace(k, v): - """Attach request attributes declared in config.""" - # Provides config entries to set request.body attrs (like - # attempt_charsets). - if k[:5] == 'body.': - setattr(cherrypy.serving.request.body, k[5:], v) - else: - setattr(cherrypy.serving.request, k, v) - - -def response_namespace(k, v): - """Attach response attributes declared in config.""" - # Provides config entries to set default response headers - # http://cherrypy.org/ticket/889 - if k[:8] == 'headers.': - cherrypy.serving.response.headers[k.split('.', 1)[1]] = v - else: - setattr(cherrypy.serving.response, k, v) - - -def error_page_namespace(k, v): - """Attach error pages declared in config.""" - if k != 'default': - k = int(k) - cherrypy.serving.request.error_page[k] = v - - -hookpoints = ['on_start_resource', 'before_request_body', - 'before_handler', 'before_finalize', - 'on_end_resource', 'on_end_request', - 'before_error_response', 'after_error_response'] - - -class Request(object): - - """An HTTP request. - - This object represents the metadata of an HTTP request message; - that is, it contains attributes which describe the environment - in which the request URL, headers, and body were sent (if you - want tools to interpret the headers and body, those are elsewhere, - mostly in Tools). This 'metadata' consists of socket data, - transport characteristics, and the Request-Line. This object - also contains data regarding the configuration in effect for - the given URL, and the execution plan for generating a response. - """ - - prev = None - """ - The previous Request object (if any). This should be None - unless we are processing an InternalRedirect.""" - - # Conversation/connection attributes - local = httputil.Host("127.0.0.1", 80) - "An httputil.Host(ip, port, hostname) object for the server socket." - - remote = httputil.Host("127.0.0.1", 1111) - "An httputil.Host(ip, port, hostname) object for the client socket." - - scheme = "http" - """ - The protocol used between client and server. In most cases, - this will be either 'http' or 'https'.""" - - server_protocol = "HTTP/1.1" - """ - The HTTP version for which the HTTP server is at least - conditionally compliant.""" - - base = "" - """The (scheme://host) portion of the requested URL. - In some cases (e.g. when proxying via mod_rewrite), this may contain - path segments which cherrypy.url uses when constructing url's, but - which otherwise are ignored by CherryPy. Regardless, this value - MUST NOT end in a slash.""" - - # Request-Line attributes - request_line = "" - """ - The complete Request-Line received from the client. This is a - single string consisting of the request method, URI, and protocol - version (joined by spaces). Any final CRLF is removed.""" - - method = "GET" - """ - Indicates the HTTP method to be performed on the resource identified - by the Request-URI. Common methods include GET, HEAD, POST, PUT, and - DELETE. CherryPy allows any extension method; however, various HTTP - servers and gateways may restrict the set of allowable methods. - CherryPy applications SHOULD restrict the set (on a per-URI basis).""" - - query_string = "" - """ - The query component of the Request-URI, a string of information to be - interpreted by the resource. The query portion of a URI follows the - path component, and is separated by a '?'. For example, the URI - 'http://www.cherrypy.org/wiki?a=3&b=4' has the query component, - 'a=3&b=4'.""" - - query_string_encoding = 'utf8' - """ - The encoding expected for query string arguments after % HEX HEX decoding). - If a query string is provided that cannot be decoded with this encoding, - 404 is raised (since technically it's a different URI). If you want - arbitrary encodings to not error, set this to 'Latin-1'; you can then - encode back to bytes and re-decode to whatever encoding you like later. - """ - - protocol = (1, 1) - """The HTTP protocol version corresponding to the set - of features which should be allowed in the response. If BOTH - the client's request message AND the server's level of HTTP - compliance is HTTP/1.1, this attribute will be the tuple (1, 1). - If either is 1.0, this attribute will be the tuple (1, 0). - Lower HTTP protocol versions are not explicitly supported.""" - - params = {} - """ - A dict which combines query string (GET) and request entity (POST) - variables. This is populated in two stages: GET params are added - before the 'on_start_resource' hook, and POST params are added - between the 'before_request_body' and 'before_handler' hooks.""" - - # Message attributes - header_list = [] - """ - A list of the HTTP request headers as (name, value) tuples. - In general, you should use request.headers (a dict) instead.""" - - headers = httputil.HeaderMap() - """ - A dict-like object containing the request headers. Keys are header - names (in Title-Case format); however, you may get and set them in - a case-insensitive manner. That is, headers['Content-Type'] and - headers['content-type'] refer to the same value. Values are header - values (decoded according to :rfc:`2047` if necessary). See also: - httputil.HeaderMap, httputil.HeaderElement.""" - - cookie = SimpleCookie() - """See help(Cookie).""" - - rfile = None - """ - If the request included an entity (body), it will be available - as a stream in this attribute. However, the rfile will normally - be read for you between the 'before_request_body' hook and the - 'before_handler' hook, and the resulting string is placed into - either request.params or the request.body attribute. - - You may disable the automatic consumption of the rfile by setting - request.process_request_body to False, either in config for the desired - path, or in an 'on_start_resource' or 'before_request_body' hook. - - WARNING: In almost every case, you should not attempt to read from the - rfile stream after CherryPy's automatic mechanism has read it. If you - turn off the automatic parsing of rfile, you should read exactly the - number of bytes specified in request.headers['Content-Length']. - Ignoring either of these warnings may result in a hung request thread - or in corruption of the next (pipelined) request. - """ - - process_request_body = True - """ - If True, the rfile (if any) is automatically read and parsed, - and the result placed into request.params or request.body.""" - - methods_with_bodies = ("POST", "PUT") - """ - A sequence of HTTP methods for which CherryPy will automatically - attempt to read a body from the rfile. If you are going to change - this property, modify it on the configuration (recommended) - or on the "hook point" `on_start_resource`. - """ - - body = None - """ - If the request Content-Type is 'application/x-www-form-urlencoded' - or multipart, this will be None. Otherwise, this will be an instance - of :class:`RequestBody` (which you - can .read()); this value is set between the 'before_request_body' and - 'before_handler' hooks (assuming that process_request_body is True).""" - - # Dispatch attributes - dispatch = cherrypy.dispatch.Dispatcher() - """ - The object which looks up the 'page handler' callable and collects - config for the current request based on the path_info, other - request attributes, and the application architecture. The core - calls the dispatcher as early as possible, passing it a 'path_info' - argument. - - The default dispatcher discovers the page handler by matching path_info - to a hierarchical arrangement of objects, starting at request.app.root. - See help(cherrypy.dispatch) for more information.""" - - script_name = "" - """ - The 'mount point' of the application which is handling this request. - - This attribute MUST NOT end in a slash. If the script_name refers to - the root of the URI, it MUST be an empty string (not "/"). - """ - - path_info = "/" - """ - The 'relative path' portion of the Request-URI. This is relative - to the script_name ('mount point') of the application which is - handling this request.""" - - login = None - """ - When authentication is used during the request processing this is - set to 'False' if it failed and to the 'username' value if it succeeded. - The default 'None' implies that no authentication happened.""" - - # Note that cherrypy.url uses "if request.app:" to determine whether - # the call is during a real HTTP request or not. So leave this None. - app = None - """The cherrypy.Application object which is handling this request.""" - - handler = None - """ - The function, method, or other callable which CherryPy will call to - produce the response. The discovery of the handler and the arguments - it will receive are determined by the request.dispatch object. - By default, the handler is discovered by walking a tree of objects - starting at request.app.root, and is then passed all HTTP params - (from the query string and POST body) as keyword arguments.""" - - toolmaps = {} - """ - A nested dict of all Toolboxes and Tools in effect for this request, - of the form: {Toolbox.namespace: {Tool.name: config dict}}.""" - - config = None - """ - A flat dict of all configuration entries which apply to the - current request. These entries are collected from global config, - application config (based on request.path_info), and from handler - config (exactly how is governed by the request.dispatch object in - effect for this request; by default, handler config can be attached - anywhere in the tree between request.app.root and the final handler, - and inherits downward).""" - - is_index = None - """ - This will be True if the current request is mapped to an 'index' - resource handler (also, a 'default' handler if path_info ends with - a slash). The value may be used to automatically redirect the - user-agent to a 'more canonical' URL which either adds or removes - the trailing slash. See cherrypy.tools.trailing_slash.""" - - hooks = HookMap(hookpoints) - """ - A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}. - Each key is a str naming the hook point, and each value is a list - of hooks which will be called at that hook point during this request. - The list of hooks is generally populated as early as possible (mostly - from Tools specified in config), but may be extended at any time. - See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools.""" - - error_response = cherrypy.HTTPError(500).set_response - """ - The no-arg callable which will handle unexpected, untrapped errors - during request processing. This is not used for expected exceptions - (like NotFound, HTTPError, or HTTPRedirect) which are raised in - response to expected conditions (those should be customized either - via request.error_page or by overriding HTTPError.set_response). - By default, error_response uses HTTPError(500) to return a generic - error response to the user-agent.""" - - error_page = {} - """ - A dict of {error code: response filename or callable} pairs. - - The error code must be an int representing a given HTTP error code, - or the string 'default', which will be used if no matching entry - is found for a given numeric code. - - If a filename is provided, the file should contain a Python string- - formatting template, and can expect by default to receive format - values with the mapping keys %(status)s, %(message)s, %(traceback)s, - and %(version)s. The set of format mappings can be extended by - overriding HTTPError.set_response. - - If a callable is provided, it will be called by default with keyword - arguments 'status', 'message', 'traceback', and 'version', as for a - string-formatting template. The callable must return a string or - iterable of strings which will be set to response.body. It may also - override headers or perform any other processing. - - If no entry is given for an error code, and no 'default' entry exists, - a default template will be used. - """ - - show_tracebacks = True - """ - If True, unexpected errors encountered during request processing will - include a traceback in the response body.""" - - show_mismatched_params = True - """ - If True, mismatched parameters encountered during PageHandler invocation - processing will be included in the response body.""" - - throws = (KeyboardInterrupt, SystemExit, cherrypy.InternalRedirect) - """The sequence of exceptions which Request.run does not trap.""" - - throw_errors = False - """ - If True, Request.run will not trap any errors (except HTTPRedirect and - HTTPError, which are more properly called 'exceptions', not errors).""" - - closed = False - """True once the close method has been called, False otherwise.""" - - stage = None - """ - A string containing the stage reached in the request-handling process. - This is useful when debugging a live server with hung requests.""" - - namespaces = _cpconfig.NamespaceSet( - **{"hooks": hooks_namespace, - "request": request_namespace, - "response": response_namespace, - "error_page": error_page_namespace, - "tools": cherrypy.tools, - }) - - def __init__(self, local_host, remote_host, scheme="http", - server_protocol="HTTP/1.1"): - """Populate a new Request object. - - local_host should be an httputil.Host object with the server info. - remote_host should be an httputil.Host object with the client info. - scheme should be a string, either "http" or "https". - """ - self.local = local_host - self.remote = remote_host - self.scheme = scheme - self.server_protocol = server_protocol - - self.closed = False - - # Put a *copy* of the class error_page into self. - self.error_page = self.error_page.copy() - - # Put a *copy* of the class namespaces into self. - self.namespaces = self.namespaces.copy() - - self.stage = None - - def close(self): - """Run cleanup code. (Core)""" - if not self.closed: - self.closed = True - self.stage = 'on_end_request' - self.hooks.run('on_end_request') - self.stage = 'close' - - def run(self, method, path, query_string, req_protocol, headers, rfile): - r"""Process the Request. (Core) - - method, path, query_string, and req_protocol should be pulled directly - from the Request-Line (e.g. "GET /path?key=val HTTP/1.0"). - - path - This should be %XX-unquoted, but query_string should not be. - - When using Python 2, they both MUST be byte strings, - not unicode strings. - - When using Python 3, they both MUST be unicode strings, - not byte strings, and preferably not bytes \x00-\xFF - disguised as unicode. - - headers - A list of (name, value) tuples. - - rfile - A file-like object containing the HTTP request entity. - - When run() is done, the returned object should have 3 attributes: - - * status, e.g. "200 OK" - * header_list, a list of (name, value) tuples - * body, an iterable yielding strings - - Consumer code (HTTP servers) should then access these response - attributes to build the outbound stream. - - """ - response = cherrypy.serving.response - self.stage = 'run' - try: - self.error_response = cherrypy.HTTPError(500).set_response - - self.method = method - path = path or "/" - self.query_string = query_string or '' - self.params = {} - - # Compare request and server HTTP protocol versions, in case our - # server does not support the requested protocol. Limit our output - # to min(req, server). We want the following output: - # request server actual written supported response - # protocol protocol response protocol feature set - # a 1.0 1.0 1.0 1.0 - # b 1.0 1.1 1.1 1.0 - # c 1.1 1.0 1.0 1.0 - # d 1.1 1.1 1.1 1.1 - # Notice that, in (b), the response will be "HTTP/1.1" even though - # the client only understands 1.0. RFC 2616 10.5.6 says we should - # only return 505 if the _major_ version is different. - rp = int(req_protocol[5]), int(req_protocol[7]) - sp = int(self.server_protocol[5]), int(self.server_protocol[7]) - self.protocol = min(rp, sp) - response.headers.protocol = self.protocol - - # Rebuild first line of the request (e.g. "GET /path HTTP/1.0"). - url = path - if query_string: - url += '?' + query_string - self.request_line = '%s %s %s' % (method, url, req_protocol) - - self.header_list = list(headers) - self.headers = httputil.HeaderMap() - - self.rfile = rfile - self.body = None - - self.cookie = SimpleCookie() - self.handler = None - - # path_info should be the path from the - # app root (script_name) to the handler. - self.script_name = self.app.script_name - self.path_info = pi = path[len(self.script_name):] - - self.stage = 'respond' - self.respond(pi) - - except self.throws: - raise - except: - if self.throw_errors: - raise - else: - # Failure in setup, error handler or finalize. Bypass them. - # Can't use handle_error because we may not have hooks yet. - cherrypy.log(traceback=True, severity=40) - if self.show_tracebacks: - body = format_exc() - else: - body = "" - r = bare_error(body) - response.output_status, response.header_list, response.body = r - - if self.method == "HEAD": - # HEAD requests MUST NOT return a message-body in the response. - response.body = [] - - try: - cherrypy.log.access() - except: - cherrypy.log.error(traceback=True) - - if response.timed_out: - raise cherrypy.TimeoutError() - - return response - - # Uncomment for stage debugging - # stage = property(lambda self: self._stage, lambda self, v: print(v)) - - def respond(self, path_info): - """Generate a response for the resource at self.path_info. (Core)""" - response = cherrypy.serving.response - try: - try: - try: - if self.app is None: - raise cherrypy.NotFound() - - # Get the 'Host' header, so we can HTTPRedirect properly. - self.stage = 'process_headers' - self.process_headers() - - # Make a copy of the class hooks - self.hooks = self.__class__.hooks.copy() - self.toolmaps = {} - - self.stage = 'get_resource' - self.get_resource(path_info) - - self.body = _cpreqbody.RequestBody( - self.rfile, self.headers, request_params=self.params) - - self.namespaces(self.config) - - self.stage = 'on_start_resource' - self.hooks.run('on_start_resource') - - # Parse the querystring - self.stage = 'process_query_string' - self.process_query_string() - - # Process the body - if self.process_request_body: - if self.method not in self.methods_with_bodies: - self.process_request_body = False - self.stage = 'before_request_body' - self.hooks.run('before_request_body') - if self.process_request_body: - self.body.process() - - # Run the handler - self.stage = 'before_handler' - self.hooks.run('before_handler') - if self.handler: - self.stage = 'handler' - response.body = self.handler() - - # Finalize - self.stage = 'before_finalize' - self.hooks.run('before_finalize') - response.finalize() - except (cherrypy.HTTPRedirect, cherrypy.HTTPError): - inst = sys.exc_info()[1] - inst.set_response() - self.stage = 'before_finalize (HTTPError)' - self.hooks.run('before_finalize') - response.finalize() - finally: - self.stage = 'on_end_resource' - self.hooks.run('on_end_resource') - except self.throws: - raise - except: - if self.throw_errors: - raise - self.handle_error() - - def process_query_string(self): - """Parse the query string into Python structures. (Core)""" - try: - p = httputil.parse_query_string( - self.query_string, encoding=self.query_string_encoding) - except UnicodeDecodeError: - raise cherrypy.HTTPError( - 404, "The given query string could not be processed. Query " - "strings for this resource must be encoded with %r." % - self.query_string_encoding) - - # Python 2 only: keyword arguments must be byte strings (type 'str'). - if not py3k: - for key, value in p.items(): - if isinstance(key, unicode): - del p[key] - p[key.encode(self.query_string_encoding)] = value - self.params.update(p) - - def process_headers(self): - """Parse HTTP header data into Python structures. (Core)""" - # Process the headers into self.headers - headers = self.headers - for name, value in self.header_list: - # Call title() now (and use dict.__method__(headers)) - # so title doesn't have to be called twice. - name = name.title() - value = value.strip() - - # Warning: if there is more than one header entry for cookies - # (AFAIK, only Konqueror does that), only the last one will - # remain in headers (but they will be correctly stored in - # request.cookie). - if "=?" in value: - dict.__setitem__(headers, name, httputil.decode_TEXT(value)) - else: - dict.__setitem__(headers, name, value) - - # Handle cookies differently because on Konqueror, multiple - # cookies come on different lines with the same key - if name == 'Cookie': - try: - self.cookie.load(value) - except CookieError: - msg = "Illegal cookie name %s" % value.split('=')[0] - raise cherrypy.HTTPError(400, msg) - - if not dict.__contains__(headers, 'Host'): - # All Internet-based HTTP/1.1 servers MUST respond with a 400 - # (Bad Request) status code to any HTTP/1.1 request message - # which lacks a Host header field. - if self.protocol >= (1, 1): - msg = "HTTP/1.1 requires a 'Host' request header." - raise cherrypy.HTTPError(400, msg) - host = dict.get(headers, 'Host') - if not host: - host = self.local.name or self.local.ip - self.base = "%s://%s" % (self.scheme, host) - - def get_resource(self, path): - """Call a dispatcher (which sets self.handler and .config). (Core)""" - # First, see if there is a custom dispatch at this URI. Custom - # dispatchers can only be specified in app.config, not in _cp_config - # (since custom dispatchers may not even have an app.root). - dispatch = self.app.find_config( - path, "request.dispatch", self.dispatch) - - # dispatch() should set self.handler and self.config - dispatch(path) - - def handle_error(self): - """Handle the last unanticipated exception. (Core)""" - try: - self.hooks.run("before_error_response") - if self.error_response: - self.error_response() - self.hooks.run("after_error_response") - cherrypy.serving.response.finalize() - except cherrypy.HTTPRedirect: - inst = sys.exc_info()[1] - inst.set_response() - cherrypy.serving.response.finalize() - - # ------------------------- Properties ------------------------- # - - def _get_body_params(self): - warnings.warn( - "body_params is deprecated in CherryPy 3.2, will be removed in " - "CherryPy 3.3.", - DeprecationWarning - ) - return self.body.params - body_params = property(_get_body_params, - doc=""" - If the request Content-Type is 'application/x-www-form-urlencoded' or - multipart, this will be a dict of the params pulled from the entity - body; that is, it will be the portion of request.params that come - from the message body (sometimes called "POST params", although they - can be sent with various HTTP method verbs). This value is set between - the 'before_request_body' and 'before_handler' hooks (assuming that - process_request_body is True). - - Deprecated in 3.2, will be removed for 3.3 in favor of - :attr:`request.body.params`.""") - - -class ResponseBody(object): - - """The body of the HTTP response (the response entity).""" - - if py3k: - unicode_err = ("Page handlers MUST return bytes. Use tools.encode " - "if you wish to return unicode.") - - def __get__(self, obj, objclass=None): - if obj is None: - # When calling on the class instead of an instance... - return self - else: - return obj._body - - def __set__(self, obj, value): - # Convert the given value to an iterable object. - if py3k and isinstance(value, str): - raise ValueError(self.unicode_err) - - if isinstance(value, basestring): - # strings get wrapped in a list because iterating over a single - # item list is much faster than iterating over every character - # in a long string. - if value: - value = [value] - else: - # [''] doesn't evaluate to False, so replace it with []. - value = [] - elif py3k and isinstance(value, list): - # every item in a list must be bytes... - for i, item in enumerate(value): - if isinstance(item, str): - raise ValueError(self.unicode_err) - # Don't use isinstance here; io.IOBase which has an ABC takes - # 1000 times as long as, say, isinstance(value, str) - elif hasattr(value, 'read'): - value = file_generator(value) - elif value is None: - value = [] - obj._body = value - - -class Response(object): - - """An HTTP Response, including status, headers, and body.""" - - status = "" - """The HTTP Status-Code and Reason-Phrase.""" - - header_list = [] - """ - A list of the HTTP response headers as (name, value) tuples. - In general, you should use response.headers (a dict) instead. This - attribute is generated from response.headers and is not valid until - after the finalize phase.""" - - headers = httputil.HeaderMap() - """ - A dict-like object containing the response headers. Keys are header - names (in Title-Case format); however, you may get and set them in - a case-insensitive manner. That is, headers['Content-Type'] and - headers['content-type'] refer to the same value. Values are header - values (decoded according to :rfc:`2047` if necessary). - - .. seealso:: classes :class:`HeaderMap`, :class:`HeaderElement` - """ - - cookie = SimpleCookie() - """See help(Cookie).""" - - body = ResponseBody() - """The body (entity) of the HTTP response.""" - - time = None - """The value of time.time() when created. Use in HTTP dates.""" - - timeout = 300 - """Seconds after which the response will be aborted.""" - - timed_out = False - """ - Flag to indicate the response should be aborted, because it has - exceeded its timeout.""" - - stream = False - """If False, buffer the response body.""" - - def __init__(self): - self.status = None - self.header_list = None - self._body = [] - self.time = time.time() - - self.headers = httputil.HeaderMap() - # Since we know all our keys are titled strings, we can - # bypass HeaderMap.update and get a big speed boost. - dict.update(self.headers, { - "Content-Type": 'text/html', - "Server": "CherryPy/" + cherrypy.__version__, - "Date": httputil.HTTPDate(self.time), - }) - self.cookie = SimpleCookie() - - def collapse_body(self): - """Collapse self.body to a single string; replace it and return it.""" - if isinstance(self.body, basestring): - return self.body - - newbody = [] - for chunk in self.body: - if py3k and not isinstance(chunk, bytes): - raise TypeError("Chunk %s is not of type 'bytes'." % - repr(chunk)) - newbody.append(chunk) - newbody = ntob('').join(newbody) - - self.body = newbody - return newbody - - def finalize(self): - """Transform headers (and cookies) into self.header_list. (Core)""" - try: - code, reason, _ = httputil.valid_status(self.status) - except ValueError: - raise cherrypy.HTTPError(500, sys.exc_info()[1].args[0]) - - headers = self.headers - - self.status = "%s %s" % (code, reason) - self.output_status = ntob(str(code), 'ascii') + \ - ntob(" ") + headers.encode(reason) - - if self.stream: - # The upshot: wsgiserver will chunk the response if - # you pop Content-Length (or set it explicitly to None). - # Note that lib.static sets C-L to the file's st_size. - if dict.get(headers, 'Content-Length') is None: - dict.pop(headers, 'Content-Length', None) - elif code < 200 or code in (204, 205, 304): - # "All 1xx (informational), 204 (no content), - # and 304 (not modified) responses MUST NOT - # include a message-body." - dict.pop(headers, 'Content-Length', None) - self.body = ntob("") - else: - # Responses which are not streamed should have a Content-Length, - # but allow user code to set Content-Length if desired. - if dict.get(headers, 'Content-Length') is None: - content = self.collapse_body() - dict.__setitem__(headers, 'Content-Length', len(content)) - - # Transform our header dict into a list of tuples. - self.header_list = h = headers.output() - - cookie = self.cookie.output() - if cookie: - for line in cookie.split("\n"): - if line.endswith("\r"): - # Python 2.4 emits cookies joined by LF but 2.5+ by CRLF. - line = line[:-1] - name, value = line.split(": ", 1) - if isinstance(name, unicodestr): - name = name.encode("ISO-8859-1") - if isinstance(value, unicodestr): - value = headers.encode(value) - h.append((name, value)) - - def check_timeout(self): - """If now > self.time + self.timeout, set self.timed_out. - - This purposefully sets a flag, rather than raising an error, - so that a monitor thread can interrupt the Response thread. - """ - if time.time() > self.time + self.timeout: - self.timed_out = True diff --git a/lib/cherrypy/_cpserver.py b/lib/cherrypy/_cpserver.py deleted file mode 100644 index a31e7428..00000000 --- a/lib/cherrypy/_cpserver.py +++ /dev/null @@ -1,226 +0,0 @@ -"""Manage HTTP servers with CherryPy.""" - -import warnings - -import cherrypy -from cherrypy.lib import attributes -from cherrypy._cpcompat import basestring, py3k - -# We import * because we want to export check_port -# et al as attributes of this module. -from cherrypy.process.servers import * - - -class Server(ServerAdapter): - - """An adapter for an HTTP server. - - You can set attributes (like socket_host and socket_port) - on *this* object (which is probably cherrypy.server), and call - quickstart. For example:: - - cherrypy.server.socket_port = 80 - cherrypy.quickstart() - """ - - socket_port = 8080 - """The TCP port on which to listen for connections.""" - - _socket_host = '127.0.0.1' - - def _get_socket_host(self): - return self._socket_host - - def _set_socket_host(self, value): - if value == '': - raise ValueError("The empty string ('') is not an allowed value. " - "Use '0.0.0.0' instead to listen on all active " - "interfaces (INADDR_ANY).") - self._socket_host = value - socket_host = property( - _get_socket_host, - _set_socket_host, - doc="""The hostname or IP address on which to listen for connections. - - Host values may be any IPv4 or IPv6 address, or any valid hostname. - The string 'localhost' is a synonym for '127.0.0.1' (or '::1', if - your hosts file prefers IPv6). The string '0.0.0.0' is a special - IPv4 entry meaning "any active interface" (INADDR_ANY), and '::' - is the similar IN6ADDR_ANY for IPv6. The empty string or None are - not allowed.""") - - socket_file = None - """If given, the name of the UNIX socket to use instead of TCP/IP. - - When this option is not None, the `socket_host` and `socket_port` options - are ignored.""" - - socket_queue_size = 5 - """The 'backlog' argument to socket.listen(); specifies the maximum number - of queued connections (default 5).""" - - socket_timeout = 10 - """The timeout in seconds for accepted connections (default 10).""" - - accepted_queue_size = -1 - """The maximum number of requests which will be queued up before - the server refuses to accept it (default -1, meaning no limit).""" - - accepted_queue_timeout = 10 - """The timeout in seconds for attempting to add a request to the - queue when the queue is full (default 10).""" - - shutdown_timeout = 5 - """The time to wait for HTTP worker threads to clean up.""" - - protocol_version = 'HTTP/1.1' - """The version string to write in the Status-Line of all HTTP responses, - for example, "HTTP/1.1" (the default). Depending on the HTTP server used, - this should also limit the supported features used in the response.""" - - thread_pool = 10 - """The number of worker threads to start up in the pool.""" - - thread_pool_max = -1 - """The maximum size of the worker-thread pool. Use -1 to indicate no limit. - """ - - max_request_header_size = 500 * 1024 - """The maximum number of bytes allowable in the request headers. - If exceeded, the HTTP server should return "413 Request Entity Too Large". - """ - - max_request_body_size = 100 * 1024 * 1024 - """The maximum number of bytes allowable in the request body. If exceeded, - the HTTP server should return "413 Request Entity Too Large".""" - - instance = None - """If not None, this should be an HTTP server instance (such as - CPWSGIServer) which cherrypy.server will control. Use this when you need - more control over object instantiation than is available in the various - configuration options.""" - - ssl_context = None - """When using PyOpenSSL, an instance of SSL.Context.""" - - ssl_certificate = None - """The filename of the SSL certificate to use.""" - - ssl_certificate_chain = None - """When using PyOpenSSL, the certificate chain to pass to - Context.load_verify_locations.""" - - ssl_private_key = None - """The filename of the private key to use with SSL.""" - - if py3k: - ssl_module = 'builtin' - """The name of a registered SSL adaptation module to use with - the builtin WSGI server. Builtin options are: 'builtin' (to - use the SSL library built into recent versions of Python). - You may also register your own classes in the - wsgiserver.ssl_adapters dict.""" - else: - ssl_module = 'pyopenssl' - """The name of a registered SSL adaptation module to use with the - builtin WSGI server. Builtin options are 'builtin' (to use the SSL - library built into recent versions of Python) and 'pyopenssl' (to - use the PyOpenSSL project, which you must install separately). You - may also register your own classes in the wsgiserver.ssl_adapters - dict.""" - - statistics = False - """Turns statistics-gathering on or off for aware HTTP servers.""" - - nodelay = True - """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" - - wsgi_version = (1, 0) - """The WSGI version tuple to use with the builtin WSGI server. - The provided options are (1, 0) [which includes support for PEP 3333, - which declares it covers WSGI version 1.0.1 but still mandates the - wsgi.version (1, 0)] and ('u', 0), an experimental unicode version. - You may create and register your own experimental versions of the WSGI - protocol by adding custom classes to the wsgiserver.wsgi_gateways dict.""" - - def __init__(self): - self.bus = cherrypy.engine - self.httpserver = None - self.interrupt = None - self.running = False - - def httpserver_from_self(self, httpserver=None): - """Return a (httpserver, bind_addr) pair based on self attributes.""" - if httpserver is None: - httpserver = self.instance - if httpserver is None: - from cherrypy import _cpwsgi_server - httpserver = _cpwsgi_server.CPWSGIServer(self) - if isinstance(httpserver, basestring): - # Is anyone using this? Can I add an arg? - httpserver = attributes(httpserver)(self) - return httpserver, self.bind_addr - - def start(self): - """Start the HTTP server.""" - if not self.httpserver: - self.httpserver, self.bind_addr = self.httpserver_from_self() - ServerAdapter.start(self) - start.priority = 75 - - def _get_bind_addr(self): - if self.socket_file: - return self.socket_file - if self.socket_host is None and self.socket_port is None: - return None - return (self.socket_host, self.socket_port) - - def _set_bind_addr(self, value): - if value is None: - self.socket_file = None - self.socket_host = None - self.socket_port = None - elif isinstance(value, basestring): - self.socket_file = value - self.socket_host = None - self.socket_port = None - else: - try: - self.socket_host, self.socket_port = value - self.socket_file = None - except ValueError: - raise ValueError("bind_addr must be a (host, port) tuple " - "(for TCP sockets) or a string (for Unix " - "domain sockets), not %r" % value) - bind_addr = property( - _get_bind_addr, - _set_bind_addr, - doc='A (host, port) tuple for TCP sockets or ' - 'a str for Unix domain sockets.') - - def base(self): - """Return the base (scheme://host[:port] or sock file) for this server. - """ - if self.socket_file: - return self.socket_file - - host = self.socket_host - if host in ('0.0.0.0', '::'): - # 0.0.0.0 is INADDR_ANY and :: is IN6ADDR_ANY. - # Look up the host name, which should be the - # safest thing to spit out in a URL. - import socket - host = socket.gethostname() - - port = self.socket_port - - if self.ssl_certificate: - scheme = "https" - if port != 443: - host += ":%s" % port - else: - scheme = "http" - if port != 80: - host += ":%s" % port - - return "%s://%s" % (scheme, host) diff --git a/lib/cherrypy/_cpthreadinglocal.py b/lib/cherrypy/_cpthreadinglocal.py deleted file mode 100644 index 238c3224..00000000 --- a/lib/cherrypy/_cpthreadinglocal.py +++ /dev/null @@ -1,241 +0,0 @@ -# This is a backport of Python-2.4's threading.local() implementation - -"""Thread-local objects - -(Note that this module provides a Python version of thread - threading.local class. Depending on the version of Python you're - using, there may be a faster one available. You should always import - the local class from threading.) - -Thread-local objects support the management of thread-local data. -If you have data that you want to be local to a thread, simply create -a thread-local object and use its attributes: - - >>> mydata = local() - >>> mydata.number = 42 - >>> mydata.number - 42 - -You can also access the local-object's dictionary: - - >>> mydata.__dict__ - {'number': 42} - >>> mydata.__dict__.setdefault('widgets', []) - [] - >>> mydata.widgets - [] - -What's important about thread-local objects is that their data are -local to a thread. If we access the data in a different thread: - - >>> log = [] - >>> def f(): - ... items = mydata.__dict__.items() - ... items.sort() - ... log.append(items) - ... mydata.number = 11 - ... log.append(mydata.number) - - >>> import threading - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[], 11] - -we get different data. Furthermore, changes made in the other thread -don't affect data seen in this thread: - - >>> mydata.number - 42 - -Of course, values you get from a local object, including a __dict__ -attribute, are for whatever thread was current at the time the -attribute was read. For that reason, you generally don't want to save -these values across threads, as they apply only to the thread they -came from. - -You can create custom local objects by subclassing the local class: - - >>> class MyLocal(local): - ... number = 2 - ... initialized = False - ... def __init__(self, **kw): - ... if self.initialized: - ... raise SystemError('__init__ called too many times') - ... self.initialized = True - ... self.__dict__.update(kw) - ... def squared(self): - ... return self.number ** 2 - -This can be useful to support default values, methods and -initialization. Note that if you define an __init__ method, it will be -called each time the local object is used in a separate thread. This -is necessary to initialize each thread's dictionary. - -Now if we create a local object: - - >>> mydata = MyLocal(color='red') - -Now we have a default number: - - >>> mydata.number - 2 - -an initial color: - - >>> mydata.color - 'red' - >>> del mydata.color - -And a method that operates on the data: - - >>> mydata.squared() - 4 - -As before, we can access the data in a separate thread: - - >>> log = [] - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[('color', 'red'), ('initialized', True)], 11] - -without affecting this thread's data: - - >>> mydata.number - 2 - >>> mydata.color - Traceback (most recent call last): - ... - AttributeError: 'MyLocal' object has no attribute 'color' - -Note that subclasses can define slots, but they are not thread -local. They are shared across threads: - - >>> class MyLocal(local): - ... __slots__ = 'number' - - >>> mydata = MyLocal() - >>> mydata.number = 42 - >>> mydata.color = 'red' - -So, the separate thread: - - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - -affects what we see: - - >>> mydata.number - 11 - ->>> del mydata -""" - -# Threading import is at end - - -class _localbase(object): - __slots__ = '_local__key', '_local__args', '_local__lock' - - def __new__(cls, *args, **kw): - self = object.__new__(cls) - key = 'thread.local.' + str(id(self)) - object.__setattr__(self, '_local__key', key) - object.__setattr__(self, '_local__args', (args, kw)) - object.__setattr__(self, '_local__lock', RLock()) - - if args or kw and (cls.__init__ is object.__init__): - raise TypeError("Initialization arguments are not supported") - - # We need to create the thread dict in anticipation of - # __init__ being called, to make sure we don't call it - # again ourselves. - dict = object.__getattribute__(self, '__dict__') - currentThread().__dict__[key] = dict - - return self - - -def _patch(self): - key = object.__getattribute__(self, '_local__key') - d = currentThread().__dict__.get(key) - if d is None: - d = {} - currentThread().__dict__[key] = d - object.__setattr__(self, '__dict__', d) - - # we have a new instance dict, so call out __init__ if we have - # one - cls = type(self) - if cls.__init__ is not object.__init__: - args, kw = object.__getattribute__(self, '_local__args') - cls.__init__(self, *args, **kw) - else: - object.__setattr__(self, '__dict__', d) - - -class local(_localbase): - - def __getattribute__(self, name): - lock = object.__getattribute__(self, '_local__lock') - lock.acquire() - try: - _patch(self) - return object.__getattribute__(self, name) - finally: - lock.release() - - def __setattr__(self, name, value): - lock = object.__getattribute__(self, '_local__lock') - lock.acquire() - try: - _patch(self) - return object.__setattr__(self, name, value) - finally: - lock.release() - - def __delattr__(self, name): - lock = object.__getattribute__(self, '_local__lock') - lock.acquire() - try: - _patch(self) - return object.__delattr__(self, name) - finally: - lock.release() - - def __del__(): - threading_enumerate = enumerate - __getattribute__ = object.__getattribute__ - - def __del__(self): - key = __getattribute__(self, '_local__key') - - try: - threads = list(threading_enumerate()) - except: - # if enumerate fails, as it seems to do during - # shutdown, we'll skip cleanup under the assumption - # that there is nothing to clean up - return - - for thread in threads: - try: - __dict__ = thread.__dict__ - except AttributeError: - # Thread is dying, rest in peace - continue - - if key in __dict__: - try: - del __dict__[key] - except KeyError: - pass # didn't have anything in this thread - - return __del__ - __del__ = __del__() - -from threading import currentThread, enumerate, RLock diff --git a/lib/cherrypy/_cptools.py b/lib/cherrypy/_cptools.py deleted file mode 100644 index 06a56e87..00000000 --- a/lib/cherrypy/_cptools.py +++ /dev/null @@ -1,529 +0,0 @@ -"""CherryPy tools. A "tool" is any helper, adapted to CP. - -Tools are usually designed to be used in a variety of ways (although some -may only offer one if they choose): - - Library calls - All tools are callables that can be used wherever needed. - The arguments are straightforward and should be detailed within the - docstring. - - Function decorators - All tools, when called, may be used as decorators which configure - individual CherryPy page handlers (methods on the CherryPy tree). - That is, "@tools.anytool()" should "turn on" the tool via the - decorated function's _cp_config attribute. - - CherryPy config - If a tool exposes a "_setup" callable, it will be called - once per Request (if the feature is "turned on" via config). - -Tools may be implemented as any object with a namespace. The builtins -are generally either modules or instances of the tools.Tool class. -""" - -import sys -import warnings - -import cherrypy - - -def _getargs(func): - """Return the names of all static arguments to the given function.""" - # Use this instead of importing inspect for less mem overhead. - import types - if sys.version_info >= (3, 0): - if isinstance(func, types.MethodType): - func = func.__func__ - co = func.__code__ - else: - if isinstance(func, types.MethodType): - func = func.im_func - co = func.func_code - return co.co_varnames[:co.co_argcount] - - -_attr_error = ( - "CherryPy Tools cannot be turned on directly. Instead, turn them " - "on via config, or use them as decorators on your page handlers." -) - - -class Tool(object): - - """A registered function for use with CherryPy request-processing hooks. - - help(tool.callable) should give you more information about this Tool. - """ - - namespace = "tools" - - def __init__(self, point, callable, name=None, priority=50): - self._point = point - self.callable = callable - self._name = name - self._priority = priority - self.__doc__ = self.callable.__doc__ - self._setargs() - - def _get_on(self): - raise AttributeError(_attr_error) - - def _set_on(self, value): - raise AttributeError(_attr_error) - on = property(_get_on, _set_on) - - def _setargs(self): - """Copy func parameter names to obj attributes.""" - try: - for arg in _getargs(self.callable): - setattr(self, arg, None) - except (TypeError, AttributeError): - if hasattr(self.callable, "__call__"): - for arg in _getargs(self.callable.__call__): - setattr(self, arg, None) - # IronPython 1.0 raises NotImplementedError because - # inspect.getargspec tries to access Python bytecode - # in co_code attribute. - except NotImplementedError: - pass - # IronPython 1B1 may raise IndexError in some cases, - # but if we trap it here it doesn't prevent CP from working. - except IndexError: - pass - - def _merged_args(self, d=None): - """Return a dict of configuration entries for this Tool.""" - if d: - conf = d.copy() - else: - conf = {} - - tm = cherrypy.serving.request.toolmaps[self.namespace] - if self._name in tm: - conf.update(tm[self._name]) - - if "on" in conf: - del conf["on"] - - return conf - - def __call__(self, *args, **kwargs): - """Compile-time decorator (turn on the tool in config). - - For example:: - - @tools.proxy() - def whats_my_base(self): - return cherrypy.request.base - whats_my_base.exposed = True - """ - if args: - raise TypeError("The %r Tool does not accept positional " - "arguments; you must use keyword arguments." - % self._name) - - def tool_decorator(f): - if not hasattr(f, "_cp_config"): - f._cp_config = {} - subspace = self.namespace + "." + self._name + "." - f._cp_config[subspace + "on"] = True - for k, v in kwargs.items(): - f._cp_config[subspace + k] = v - return f - return tool_decorator - - def _setup(self): - """Hook this tool into cherrypy.request. - - The standard CherryPy request object will automatically call this - method when the tool is "turned on" in config. - """ - conf = self._merged_args() - p = conf.pop("priority", None) - if p is None: - p = getattr(self.callable, "priority", self._priority) - cherrypy.serving.request.hooks.attach(self._point, self.callable, - priority=p, **conf) - - -class HandlerTool(Tool): - - """Tool which is called 'before main', that may skip normal handlers. - - If the tool successfully handles the request (by setting response.body), - if should return True. This will cause CherryPy to skip any 'normal' page - handler. If the tool did not handle the request, it should return False - to tell CherryPy to continue on and call the normal page handler. If the - tool is declared AS a page handler (see the 'handler' method), returning - False will raise NotFound. - """ - - def __init__(self, callable, name=None): - Tool.__init__(self, 'before_handler', callable, name) - - def handler(self, *args, **kwargs): - """Use this tool as a CherryPy page handler. - - For example:: - - class Root: - nav = tools.staticdir.handler(section="/nav", dir="nav", - root=absDir) - """ - def handle_func(*a, **kw): - handled = self.callable(*args, **self._merged_args(kwargs)) - if not handled: - raise cherrypy.NotFound() - return cherrypy.serving.response.body - handle_func.exposed = True - return handle_func - - def _wrapper(self, **kwargs): - if self.callable(**kwargs): - cherrypy.serving.request.handler = None - - def _setup(self): - """Hook this tool into cherrypy.request. - - The standard CherryPy request object will automatically call this - method when the tool is "turned on" in config. - """ - conf = self._merged_args() - p = conf.pop("priority", None) - if p is None: - p = getattr(self.callable, "priority", self._priority) - cherrypy.serving.request.hooks.attach(self._point, self._wrapper, - priority=p, **conf) - - -class HandlerWrapperTool(Tool): - - """Tool which wraps request.handler in a provided wrapper function. - - The 'newhandler' arg must be a handler wrapper function that takes a - 'next_handler' argument, plus ``*args`` and ``**kwargs``. Like all - page handler - functions, it must return an iterable for use as cherrypy.response.body. - - For example, to allow your 'inner' page handlers to return dicts - which then get interpolated into a template:: - - def interpolator(next_handler, *args, **kwargs): - filename = cherrypy.request.config.get('template') - cherrypy.response.template = env.get_template(filename) - response_dict = next_handler(*args, **kwargs) - return cherrypy.response.template.render(**response_dict) - cherrypy.tools.jinja = HandlerWrapperTool(interpolator) - """ - - def __init__(self, newhandler, point='before_handler', name=None, - priority=50): - self.newhandler = newhandler - self._point = point - self._name = name - self._priority = priority - - def callable(self, *args, **kwargs): - innerfunc = cherrypy.serving.request.handler - - def wrap(*args, **kwargs): - return self.newhandler(innerfunc, *args, **kwargs) - cherrypy.serving.request.handler = wrap - - -class ErrorTool(Tool): - - """Tool which is used to replace the default request.error_response.""" - - def __init__(self, callable, name=None): - Tool.__init__(self, None, callable, name) - - def _wrapper(self): - self.callable(**self._merged_args()) - - def _setup(self): - """Hook this tool into cherrypy.request. - - The standard CherryPy request object will automatically call this - method when the tool is "turned on" in config. - """ - cherrypy.serving.request.error_response = self._wrapper - - -# Builtin tools # - -from cherrypy.lib import cptools, encoding, auth, static, jsontools -from cherrypy.lib import sessions as _sessions, xmlrpcutil as _xmlrpc -from cherrypy.lib import caching as _caching -from cherrypy.lib import auth_basic, auth_digest - - -class SessionTool(Tool): - - """Session Tool for CherryPy. - - sessions.locking - When 'implicit' (the default), the session will be locked for you, - just before running the page handler. - - When 'early', the session will be locked before reading the request - body. This is off by default for safety reasons; for example, - a large upload would block the session, denying an AJAX - progress meter - (`issue `_). - - When 'explicit' (or any other value), you need to call - cherrypy.session.acquire_lock() yourself before using - session data. - """ - - def __init__(self): - # _sessions.init must be bound after headers are read - Tool.__init__(self, 'before_request_body', _sessions.init) - - def _lock_session(self): - cherrypy.serving.session.acquire_lock() - - def _setup(self): - """Hook this tool into cherrypy.request. - - The standard CherryPy request object will automatically call this - method when the tool is "turned on" in config. - """ - hooks = cherrypy.serving.request.hooks - - conf = self._merged_args() - - p = conf.pop("priority", None) - if p is None: - p = getattr(self.callable, "priority", self._priority) - - hooks.attach(self._point, self.callable, priority=p, **conf) - - locking = conf.pop('locking', 'implicit') - if locking == 'implicit': - hooks.attach('before_handler', self._lock_session) - elif locking == 'early': - # Lock before the request body (but after _sessions.init runs!) - hooks.attach('before_request_body', self._lock_session, - priority=60) - else: - # Don't lock - pass - - hooks.attach('before_finalize', _sessions.save) - hooks.attach('on_end_request', _sessions.close) - - def regenerate(self): - """Drop the current session and make a new one (with a new id).""" - sess = cherrypy.serving.session - sess.regenerate() - - # Grab cookie-relevant tool args - conf = dict([(k, v) for k, v in self._merged_args().items() - if k in ('path', 'path_header', 'name', 'timeout', - 'domain', 'secure')]) - _sessions.set_response_cookie(**conf) - - -class XMLRPCController(object): - - """A Controller (page handler collection) for XML-RPC. - - To use it, have your controllers subclass this base class (it will - turn on the tool for you). - - You can also supply the following optional config entries:: - - tools.xmlrpc.encoding: 'utf-8' - tools.xmlrpc.allow_none: 0 - - XML-RPC is a rather discontinuous layer over HTTP; dispatching to the - appropriate handler must first be performed according to the URL, and - then a second dispatch step must take place according to the RPC method - specified in the request body. It also allows a superfluous "/RPC2" - prefix in the URL, supplies its own handler args in the body, and - requires a 200 OK "Fault" response instead of 404 when the desired - method is not found. - - Therefore, XML-RPC cannot be implemented for CherryPy via a Tool alone. - This Controller acts as the dispatch target for the first half (based - on the URL); it then reads the RPC method from the request body and - does its own second dispatch step based on that method. It also reads - body params, and returns a Fault on error. - - The XMLRPCDispatcher strips any /RPC2 prefix; if you aren't using /RPC2 - in your URL's, you can safely skip turning on the XMLRPCDispatcher. - Otherwise, you need to use declare it in config:: - - request.dispatch: cherrypy.dispatch.XMLRPCDispatcher() - """ - - # Note we're hard-coding this into the 'tools' namespace. We could do - # a huge amount of work to make it relocatable, but the only reason why - # would be if someone actually disabled the default_toolbox. Meh. - _cp_config = {'tools.xmlrpc.on': True} - - def default(self, *vpath, **params): - rpcparams, rpcmethod = _xmlrpc.process_body() - - subhandler = self - for attr in str(rpcmethod).split('.'): - subhandler = getattr(subhandler, attr, None) - - if subhandler and getattr(subhandler, "exposed", False): - body = subhandler(*(vpath + rpcparams), **params) - - else: - # https://bitbucket.org/cherrypy/cherrypy/issue/533 - # if a method is not found, an xmlrpclib.Fault should be returned - # raising an exception here will do that; see - # cherrypy.lib.xmlrpcutil.on_error - raise Exception('method "%s" is not supported' % attr) - - conf = cherrypy.serving.request.toolmaps['tools'].get("xmlrpc", {}) - _xmlrpc.respond(body, - conf.get('encoding', 'utf-8'), - conf.get('allow_none', 0)) - return cherrypy.serving.response.body - default.exposed = True - - -class SessionAuthTool(HandlerTool): - - def _setargs(self): - for name in dir(cptools.SessionAuth): - if not name.startswith("__"): - setattr(self, name, None) - - -class CachingTool(Tool): - - """Caching Tool for CherryPy.""" - - def _wrapper(self, **kwargs): - request = cherrypy.serving.request - if _caching.get(**kwargs): - request.handler = None - else: - if request.cacheable: - # Note the devious technique here of adding hooks on the fly - request.hooks.attach('before_finalize', _caching.tee_output, - priority=90) - _wrapper.priority = 20 - - def _setup(self): - """Hook caching into cherrypy.request.""" - conf = self._merged_args() - - p = conf.pop("priority", None) - cherrypy.serving.request.hooks.attach('before_handler', self._wrapper, - priority=p, **conf) - - -class Toolbox(object): - - """A collection of Tools. - - This object also functions as a config namespace handler for itself. - Custom toolboxes should be added to each Application's toolboxes dict. - """ - - def __init__(self, namespace): - self.namespace = namespace - - def __setattr__(self, name, value): - # If the Tool._name is None, supply it from the attribute name. - if isinstance(value, Tool): - if value._name is None: - value._name = name - value.namespace = self.namespace - object.__setattr__(self, name, value) - - def __enter__(self): - """Populate request.toolmaps from tools specified in config.""" - cherrypy.serving.request.toolmaps[self.namespace] = map = {} - - def populate(k, v): - toolname, arg = k.split(".", 1) - bucket = map.setdefault(toolname, {}) - bucket[arg] = v - return populate - - def __exit__(self, exc_type, exc_val, exc_tb): - """Run tool._setup() for each tool in our toolmap.""" - map = cherrypy.serving.request.toolmaps.get(self.namespace) - if map: - for name, settings in map.items(): - if settings.get("on", False): - tool = getattr(self, name) - tool._setup() - - -class DeprecatedTool(Tool): - - _name = None - warnmsg = "This Tool is deprecated." - - def __init__(self, point, warnmsg=None): - self.point = point - if warnmsg is not None: - self.warnmsg = warnmsg - - def __call__(self, *args, **kwargs): - warnings.warn(self.warnmsg) - - def tool_decorator(f): - return f - return tool_decorator - - def _setup(self): - warnings.warn(self.warnmsg) - - -default_toolbox = _d = Toolbox("tools") -_d.session_auth = SessionAuthTool(cptools.session_auth) -_d.allow = Tool('on_start_resource', cptools.allow) -_d.proxy = Tool('before_request_body', cptools.proxy, priority=30) -_d.response_headers = Tool('on_start_resource', cptools.response_headers) -_d.log_tracebacks = Tool('before_error_response', cptools.log_traceback) -_d.log_headers = Tool('before_error_response', cptools.log_request_headers) -_d.log_hooks = Tool('on_end_request', cptools.log_hooks, priority=100) -_d.err_redirect = ErrorTool(cptools.redirect) -_d.etags = Tool('before_finalize', cptools.validate_etags, priority=75) -_d.decode = Tool('before_request_body', encoding.decode) -# the order of encoding, gzip, caching is important -_d.encode = Tool('before_handler', encoding.ResponseEncoder, priority=70) -_d.gzip = Tool('before_finalize', encoding.gzip, priority=80) -_d.staticdir = HandlerTool(static.staticdir) -_d.staticfile = HandlerTool(static.staticfile) -_d.sessions = SessionTool() -_d.xmlrpc = ErrorTool(_xmlrpc.on_error) -_d.caching = CachingTool('before_handler', _caching.get, 'caching') -_d.expires = Tool('before_finalize', _caching.expires) -_d.tidy = DeprecatedTool( - 'before_finalize', - "The tidy tool has been removed from the standard distribution of " - "CherryPy. The most recent version can be found at " - "http://tools.cherrypy.org/browser.") -_d.nsgmls = DeprecatedTool( - 'before_finalize', - "The nsgmls tool has been removed from the standard distribution of " - "CherryPy. The most recent version can be found at " - "http://tools.cherrypy.org/browser.") -_d.ignore_headers = Tool('before_request_body', cptools.ignore_headers) -_d.referer = Tool('before_request_body', cptools.referer) -_d.basic_auth = Tool('on_start_resource', auth.basic_auth) -_d.digest_auth = Tool('on_start_resource', auth.digest_auth) -_d.trailing_slash = Tool('before_handler', cptools.trailing_slash, priority=60) -_d.flatten = Tool('before_finalize', cptools.flatten) -_d.accept = Tool('on_start_resource', cptools.accept) -_d.redirect = Tool('on_start_resource', cptools.redirect) -_d.autovary = Tool('on_start_resource', cptools.autovary, priority=0) -_d.json_in = Tool('before_request_body', jsontools.json_in, priority=30) -_d.json_out = Tool('before_handler', jsontools.json_out, priority=30) -_d.auth_basic = Tool('before_handler', auth_basic.basic_auth, priority=1) -_d.auth_digest = Tool('before_handler', auth_digest.digest_auth, priority=1) - -del _d, cptools, encoding, auth, static diff --git a/lib/cherrypy/_cptree.py b/lib/cherrypy/_cptree.py deleted file mode 100644 index a31b2793..00000000 --- a/lib/cherrypy/_cptree.py +++ /dev/null @@ -1,299 +0,0 @@ -"""CherryPy Application and Tree objects.""" - -import os - -import cherrypy -from cherrypy._cpcompat import ntou, py3k -from cherrypy import _cpconfig, _cplogging, _cprequest, _cpwsgi, tools -from cherrypy.lib import httputil - - -class Application(object): - - """A CherryPy Application. - - Servers and gateways should not instantiate Request objects directly. - Instead, they should ask an Application object for a request object. - - An instance of this class may also be used as a WSGI callable - (WSGI application object) for itself. - """ - - root = None - """The top-most container of page handlers for this app. Handlers should - be arranged in a hierarchy of attributes, matching the expected URI - hierarchy; the default dispatcher then searches this hierarchy for a - matching handler. When using a dispatcher other than the default, - this value may be None.""" - - config = {} - """A dict of {path: pathconf} pairs, where 'pathconf' is itself a dict - of {key: value} pairs.""" - - namespaces = _cpconfig.NamespaceSet() - toolboxes = {'tools': cherrypy.tools} - - log = None - """A LogManager instance. See _cplogging.""" - - wsgiapp = None - """A CPWSGIApp instance. See _cpwsgi.""" - - request_class = _cprequest.Request - response_class = _cprequest.Response - - relative_urls = False - - def __init__(self, root, script_name="", config=None): - self.log = _cplogging.LogManager(id(self), cherrypy.log.logger_root) - self.root = root - self.script_name = script_name - self.wsgiapp = _cpwsgi.CPWSGIApp(self) - - self.namespaces = self.namespaces.copy() - self.namespaces["log"] = lambda k, v: setattr(self.log, k, v) - self.namespaces["wsgi"] = self.wsgiapp.namespace_handler - - self.config = self.__class__.config.copy() - if config: - self.merge(config) - - def __repr__(self): - return "%s.%s(%r, %r)" % (self.__module__, self.__class__.__name__, - self.root, self.script_name) - - script_name_doc = """The URI "mount point" for this app. A mount point - is that portion of the URI which is constant for all URIs that are - serviced by this application; it does not include scheme, host, or proxy - ("virtual host") portions of the URI. - - For example, if script_name is "/my/cool/app", then the URL - "http://www.example.com/my/cool/app/page1" might be handled by a - "page1" method on the root object. - - The value of script_name MUST NOT end in a slash. If the script_name - refers to the root of the URI, it MUST be an empty string (not "/"). - - If script_name is explicitly set to None, then the script_name will be - provided for each call from request.wsgi_environ['SCRIPT_NAME']. - """ - - def _get_script_name(self): - if self._script_name is not None: - return self._script_name - - # A `_script_name` with a value of None signals that the script name - # should be pulled from WSGI environ. - return cherrypy.serving.request.wsgi_environ['SCRIPT_NAME'].rstrip("/") - - def _set_script_name(self, value): - if value: - value = value.rstrip("/") - self._script_name = value - script_name = property(fget=_get_script_name, fset=_set_script_name, - doc=script_name_doc) - - def merge(self, config): - """Merge the given config into self.config.""" - _cpconfig.merge(self.config, config) - - # Handle namespaces specified in config. - self.namespaces(self.config.get("/", {})) - - def find_config(self, path, key, default=None): - """Return the most-specific value for key along path, or default.""" - trail = path or "/" - while trail: - nodeconf = self.config.get(trail, {}) - - if key in nodeconf: - return nodeconf[key] - - lastslash = trail.rfind("/") - if lastslash == -1: - break - elif lastslash == 0 and trail != "/": - trail = "/" - else: - trail = trail[:lastslash] - - return default - - def get_serving(self, local, remote, scheme, sproto): - """Create and return a Request and Response object.""" - req = self.request_class(local, remote, scheme, sproto) - req.app = self - - for name, toolbox in self.toolboxes.items(): - req.namespaces[name] = toolbox - - resp = self.response_class() - cherrypy.serving.load(req, resp) - cherrypy.engine.publish('acquire_thread') - cherrypy.engine.publish('before_request') - - return req, resp - - def release_serving(self): - """Release the current serving (request and response).""" - req = cherrypy.serving.request - - cherrypy.engine.publish('after_request') - - try: - req.close() - except: - cherrypy.log(traceback=True, severity=40) - - cherrypy.serving.clear() - - def __call__(self, environ, start_response): - return self.wsgiapp(environ, start_response) - - -class Tree(object): - - """A registry of CherryPy applications, mounted at diverse points. - - An instance of this class may also be used as a WSGI callable - (WSGI application object), in which case it dispatches to all - mounted apps. - """ - - apps = {} - """ - A dict of the form {script name: application}, where "script name" - is a string declaring the URI mount point (no trailing slash), and - "application" is an instance of cherrypy.Application (or an arbitrary - WSGI callable if you happen to be using a WSGI server).""" - - def __init__(self): - self.apps = {} - - def mount(self, root, script_name="", config=None): - """Mount a new app from a root object, script_name, and config. - - root - An instance of a "controller class" (a collection of page - handler methods) which represents the root of the application. - This may also be an Application instance, or None if using - a dispatcher other than the default. - - script_name - A string containing the "mount point" of the application. - This should start with a slash, and be the path portion of the - URL at which to mount the given root. For example, if root.index() - will handle requests to "http://www.example.com:8080/dept/app1/", - then the script_name argument would be "/dept/app1". - - It MUST NOT end in a slash. If the script_name refers to the - root of the URI, it MUST be an empty string (not "/"). - - config - A file or dict containing application config. - """ - if script_name is None: - raise TypeError( - "The 'script_name' argument may not be None. Application " - "objects may, however, possess a script_name of None (in " - "order to inpect the WSGI environ for SCRIPT_NAME upon each " - "request). You cannot mount such Applications on this Tree; " - "you must pass them to a WSGI server interface directly.") - - # Next line both 1) strips trailing slash and 2) maps "/" -> "". - script_name = script_name.rstrip("/") - - if isinstance(root, Application): - app = root - if script_name != "" and script_name != app.script_name: - raise ValueError( - "Cannot specify a different script name and pass an " - "Application instance to cherrypy.mount") - script_name = app.script_name - else: - app = Application(root, script_name) - - # If mounted at "", add favicon.ico - if (script_name == "" and root is not None - and not hasattr(root, "favicon_ico")): - favicon = os.path.join(os.getcwd(), os.path.dirname(__file__), - "favicon.ico") - root.favicon_ico = tools.staticfile.handler(favicon) - - if config: - app.merge(config) - - self.apps[script_name] = app - - return app - - def graft(self, wsgi_callable, script_name=""): - """Mount a wsgi callable at the given script_name.""" - # Next line both 1) strips trailing slash and 2) maps "/" -> "". - script_name = script_name.rstrip("/") - self.apps[script_name] = wsgi_callable - - def script_name(self, path=None): - """The script_name of the app at the given path, or None. - - If path is None, cherrypy.request is used. - """ - if path is None: - try: - request = cherrypy.serving.request - path = httputil.urljoin(request.script_name, - request.path_info) - except AttributeError: - return None - - while True: - if path in self.apps: - return path - - if path == "": - return None - - # Move one node up the tree and try again. - path = path[:path.rfind("/")] - - def __call__(self, environ, start_response): - # If you're calling this, then you're probably setting SCRIPT_NAME - # to '' (some WSGI servers always set SCRIPT_NAME to ''). - # Try to look up the app using the full path. - env1x = environ - if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): - env1x = _cpwsgi.downgrade_wsgi_ux_to_1x(environ) - path = httputil.urljoin(env1x.get('SCRIPT_NAME', ''), - env1x.get('PATH_INFO', '')) - sn = self.script_name(path or "/") - if sn is None: - start_response('404 Not Found', []) - return [] - - app = self.apps[sn] - - # Correct the SCRIPT_NAME and PATH_INFO environ entries. - environ = environ.copy() - if not py3k: - if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): - # Python 2/WSGI u.0: all strings MUST be of type unicode - enc = environ[ntou('wsgi.url_encoding')] - environ[ntou('SCRIPT_NAME')] = sn.decode(enc) - environ[ntou('PATH_INFO')] = path[ - len(sn.rstrip("/")):].decode(enc) - else: - # Python 2/WSGI 1.x: all strings MUST be of type str - environ['SCRIPT_NAME'] = sn - environ['PATH_INFO'] = path[len(sn.rstrip("/")):] - else: - if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): - # Python 3/WSGI u.0: all strings MUST be full unicode - environ['SCRIPT_NAME'] = sn - environ['PATH_INFO'] = path[len(sn.rstrip("/")):] - else: - # Python 3/WSGI 1.x: all strings MUST be ISO-8859-1 str - environ['SCRIPT_NAME'] = sn.encode( - 'utf-8').decode('ISO-8859-1') - environ['PATH_INFO'] = path[ - len(sn.rstrip("/")):].encode('utf-8').decode('ISO-8859-1') - return app(environ, start_response) diff --git a/lib/cherrypy/_cpwsgi.py b/lib/cherrypy/_cpwsgi.py deleted file mode 100644 index f6db68b0..00000000 --- a/lib/cherrypy/_cpwsgi.py +++ /dev/null @@ -1,438 +0,0 @@ -"""WSGI interface (see PEP 333 and 3333). - -Note that WSGI environ keys and values are 'native strings'; that is, -whatever the type of "" is. For Python 2, that's a byte string; for Python 3, -it's a unicode string. But PEP 3333 says: "even if Python's str type is -actually Unicode "under the hood", the content of native strings must -still be translatable to bytes via the Latin-1 encoding!" -""" - -import sys as _sys - -import cherrypy as _cherrypy -from cherrypy._cpcompat import BytesIO, bytestr, ntob, ntou, py3k, unicodestr -from cherrypy import _cperror -from cherrypy.lib import httputil -from cherrypy.lib import is_closable_iterator - -def downgrade_wsgi_ux_to_1x(environ): - """Return a new environ dict for WSGI 1.x from the given WSGI u.x environ. - """ - env1x = {} - - url_encoding = environ[ntou('wsgi.url_encoding')] - for k, v in list(environ.items()): - if k in [ntou('PATH_INFO'), ntou('SCRIPT_NAME'), ntou('QUERY_STRING')]: - v = v.encode(url_encoding) - elif isinstance(v, unicodestr): - v = v.encode('ISO-8859-1') - env1x[k.encode('ISO-8859-1')] = v - - return env1x - - -class VirtualHost(object): - - """Select a different WSGI application based on the Host header. - - This can be useful when running multiple sites within one CP server. - It allows several domains to point to different applications. For example:: - - root = Root() - RootApp = cherrypy.Application(root) - Domain2App = cherrypy.Application(root) - SecureApp = cherrypy.Application(Secure()) - - vhost = cherrypy._cpwsgi.VirtualHost(RootApp, - domains={'www.domain2.example': Domain2App, - 'www.domain2.example:443': SecureApp, - }) - - cherrypy.tree.graft(vhost) - """ - default = None - """Required. The default WSGI application.""" - - use_x_forwarded_host = True - """If True (the default), any "X-Forwarded-Host" - request header will be used instead of the "Host" header. This - is commonly added by HTTP servers (such as Apache) when proxying.""" - - domains = {} - """A dict of {host header value: application} pairs. - The incoming "Host" request header is looked up in this dict, - and, if a match is found, the corresponding WSGI application - will be called instead of the default. Note that you often need - separate entries for "example.com" and "www.example.com". - In addition, "Host" headers may contain the port number. - """ - - def __init__(self, default, domains=None, use_x_forwarded_host=True): - self.default = default - self.domains = domains or {} - self.use_x_forwarded_host = use_x_forwarded_host - - def __call__(self, environ, start_response): - domain = environ.get('HTTP_HOST', '') - if self.use_x_forwarded_host: - domain = environ.get("HTTP_X_FORWARDED_HOST", domain) - - nextapp = self.domains.get(domain) - if nextapp is None: - nextapp = self.default - return nextapp(environ, start_response) - - -class InternalRedirector(object): - - """WSGI middleware that handles raised cherrypy.InternalRedirect.""" - - def __init__(self, nextapp, recursive=False): - self.nextapp = nextapp - self.recursive = recursive - - def __call__(self, environ, start_response): - redirections = [] - while True: - environ = environ.copy() - try: - return self.nextapp(environ, start_response) - except _cherrypy.InternalRedirect: - ir = _sys.exc_info()[1] - sn = environ.get('SCRIPT_NAME', '') - path = environ.get('PATH_INFO', '') - qs = environ.get('QUERY_STRING', '') - - # Add the *previous* path_info + qs to redirections. - old_uri = sn + path - if qs: - old_uri += "?" + qs - redirections.append(old_uri) - - if not self.recursive: - # Check to see if the new URI has been redirected to - # already - new_uri = sn + ir.path - if ir.query_string: - new_uri += "?" + ir.query_string - if new_uri in redirections: - ir.request.close() - raise RuntimeError("InternalRedirector visited the " - "same URL twice: %r" % new_uri) - - # Munge the environment and try again. - environ['REQUEST_METHOD'] = "GET" - environ['PATH_INFO'] = ir.path - environ['QUERY_STRING'] = ir.query_string - environ['wsgi.input'] = BytesIO() - environ['CONTENT_LENGTH'] = "0" - environ['cherrypy.previous_request'] = ir.request - - -class ExceptionTrapper(object): - - """WSGI middleware that traps exceptions.""" - - def __init__(self, nextapp, throws=(KeyboardInterrupt, SystemExit)): - self.nextapp = nextapp - self.throws = throws - - def __call__(self, environ, start_response): - return _TrappedResponse( - self.nextapp, - environ, - start_response, - self.throws - ) - - -class _TrappedResponse(object): - - response = iter([]) - - def __init__(self, nextapp, environ, start_response, throws): - self.nextapp = nextapp - self.environ = environ - self.start_response = start_response - self.throws = throws - self.started_response = False - self.response = self.trap( - self.nextapp, self.environ, self.start_response) - self.iter_response = iter(self.response) - - def __iter__(self): - self.started_response = True - return self - - if py3k: - def __next__(self): - return self.trap(next, self.iter_response) - else: - def next(self): - return self.trap(self.iter_response.next) - - def close(self): - if hasattr(self.response, 'close'): - self.response.close() - - def trap(self, func, *args, **kwargs): - try: - return func(*args, **kwargs) - except self.throws: - raise - except StopIteration: - raise - except: - tb = _cperror.format_exc() - #print('trapped (started %s):' % self.started_response, tb) - _cherrypy.log(tb, severity=40) - if not _cherrypy.request.show_tracebacks: - tb = "" - s, h, b = _cperror.bare_error(tb) - if py3k: - # What fun. - s = s.decode('ISO-8859-1') - h = [(k.decode('ISO-8859-1'), v.decode('ISO-8859-1')) - for k, v in h] - if self.started_response: - # Empty our iterable (so future calls raise StopIteration) - self.iter_response = iter([]) - else: - self.iter_response = iter(b) - - try: - self.start_response(s, h, _sys.exc_info()) - except: - # "The application must not trap any exceptions raised by - # start_response, if it called start_response with exc_info. - # Instead, it should allow such exceptions to propagate - # back to the server or gateway." - # But we still log and call close() to clean up ourselves. - _cherrypy.log(traceback=True, severity=40) - raise - - if self.started_response: - return ntob("").join(b) - else: - return b - - -# WSGI-to-CP Adapter # - - -class AppResponse(object): - - """WSGI response iterable for CherryPy applications.""" - - def __init__(self, environ, start_response, cpapp): - self.cpapp = cpapp - try: - if not py3k: - if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): - environ = downgrade_wsgi_ux_to_1x(environ) - self.environ = environ - self.run() - - r = _cherrypy.serving.response - - outstatus = r.output_status - if not isinstance(outstatus, bytestr): - raise TypeError("response.output_status is not a byte string.") - - outheaders = [] - for k, v in r.header_list: - if not isinstance(k, bytestr): - raise TypeError( - "response.header_list key %r is not a byte string." % - k) - if not isinstance(v, bytestr): - raise TypeError( - "response.header_list value %r is not a byte string." % - v) - outheaders.append((k, v)) - - if py3k: - # According to PEP 3333, when using Python 3, the response - # status and headers must be bytes masquerading as unicode; - # that is, they must be of type "str" but are restricted to - # code points in the "latin-1" set. - outstatus = outstatus.decode('ISO-8859-1') - outheaders = [(k.decode('ISO-8859-1'), v.decode('ISO-8859-1')) - for k, v in outheaders] - - self.iter_response = iter(r.body) - self.write = start_response(outstatus, outheaders) - except: - self.close() - raise - - def __iter__(self): - return self - - if py3k: - def __next__(self): - return next(self.iter_response) - else: - def next(self): - return self.iter_response.next() - - def close(self): - """Close and de-reference the current request and response. (Core)""" - streaming = _cherrypy.serving.response.stream - self.cpapp.release_serving() - - # We avoid the expense of examining the iterator to see if it's - # closable unless we are streaming the response, as that's the - # only situation where we are going to have an iterator which - # may not have been exhausted yet. - if streaming and is_closable_iterator(self.iter_response): - iter_close = self.iter_response.close - try: - iter_close() - except Exception: - _cherrypy.log(traceback=True, severity=40) - - def run(self): - """Create a Request object using environ.""" - env = self.environ.get - - local = httputil.Host('', int(env('SERVER_PORT', 80)), - env('SERVER_NAME', '')) - remote = httputil.Host(env('REMOTE_ADDR', ''), - int(env('REMOTE_PORT', -1) or -1), - env('REMOTE_HOST', '')) - scheme = env('wsgi.url_scheme') - sproto = env('ACTUAL_SERVER_PROTOCOL', "HTTP/1.1") - request, resp = self.cpapp.get_serving(local, remote, scheme, sproto) - - # LOGON_USER is served by IIS, and is the name of the - # user after having been mapped to a local account. - # Both IIS and Apache set REMOTE_USER, when possible. - request.login = env('LOGON_USER') or env('REMOTE_USER') or None - request.multithread = self.environ['wsgi.multithread'] - request.multiprocess = self.environ['wsgi.multiprocess'] - request.wsgi_environ = self.environ - request.prev = env('cherrypy.previous_request', None) - - meth = self.environ['REQUEST_METHOD'] - - path = httputil.urljoin(self.environ.get('SCRIPT_NAME', ''), - self.environ.get('PATH_INFO', '')) - qs = self.environ.get('QUERY_STRING', '') - - if py3k: - # This isn't perfect; if the given PATH_INFO is in the - # wrong encoding, it may fail to match the appropriate config - # section URI. But meh. - old_enc = self.environ.get('wsgi.url_encoding', 'ISO-8859-1') - new_enc = self.cpapp.find_config(self.environ.get('PATH_INFO', ''), - "request.uri_encoding", 'utf-8') - if new_enc.lower() != old_enc.lower(): - # Even though the path and qs are unicode, the WSGI server - # is required by PEP 3333 to coerce them to ISO-8859-1 - # masquerading as unicode. So we have to encode back to - # bytes and then decode again using the "correct" encoding. - try: - u_path = path.encode(old_enc).decode(new_enc) - u_qs = qs.encode(old_enc).decode(new_enc) - except (UnicodeEncodeError, UnicodeDecodeError): - # Just pass them through without transcoding and hope. - pass - else: - # Only set transcoded values if they both succeed. - path = u_path - qs = u_qs - - rproto = self.environ.get('SERVER_PROTOCOL') - headers = self.translate_headers(self.environ) - rfile = self.environ['wsgi.input'] - request.run(meth, path, qs, rproto, headers, rfile) - - headerNames = {'HTTP_CGI_AUTHORIZATION': 'Authorization', - 'CONTENT_LENGTH': 'Content-Length', - 'CONTENT_TYPE': 'Content-Type', - 'REMOTE_HOST': 'Remote-Host', - 'REMOTE_ADDR': 'Remote-Addr', - } - - def translate_headers(self, environ): - """Translate CGI-environ header names to HTTP header names.""" - for cgiName in environ: - # We assume all incoming header keys are uppercase already. - if cgiName in self.headerNames: - yield self.headerNames[cgiName], environ[cgiName] - elif cgiName[:5] == "HTTP_": - # Hackish attempt at recovering original header names. - translatedHeader = cgiName[5:].replace("_", "-") - yield translatedHeader, environ[cgiName] - - -class CPWSGIApp(object): - - """A WSGI application object for a CherryPy Application.""" - - pipeline = [('ExceptionTrapper', ExceptionTrapper), - ('InternalRedirector', InternalRedirector), - ] - """A list of (name, wsgiapp) pairs. Each 'wsgiapp' MUST be a - constructor that takes an initial, positional 'nextapp' argument, - plus optional keyword arguments, and returns a WSGI application - (that takes environ and start_response arguments). The 'name' can - be any you choose, and will correspond to keys in self.config.""" - - head = None - """Rather than nest all apps in the pipeline on each call, it's only - done the first time, and the result is memoized into self.head. Set - this to None again if you change self.pipeline after calling self.""" - - config = {} - """A dict whose keys match names listed in the pipeline. Each - value is a further dict which will be passed to the corresponding - named WSGI callable (from the pipeline) as keyword arguments.""" - - response_class = AppResponse - """The class to instantiate and return as the next app in the WSGI chain. - """ - - def __init__(self, cpapp, pipeline=None): - self.cpapp = cpapp - self.pipeline = self.pipeline[:] - if pipeline: - self.pipeline.extend(pipeline) - self.config = self.config.copy() - - def tail(self, environ, start_response): - """WSGI application callable for the actual CherryPy application. - - You probably shouldn't call this; call self.__call__ instead, - so that any WSGI middleware in self.pipeline can run first. - """ - return self.response_class(environ, start_response, self.cpapp) - - def __call__(self, environ, start_response): - head = self.head - if head is None: - # Create and nest the WSGI apps in our pipeline (in reverse order). - # Then memoize the result in self.head. - head = self.tail - for name, callable in self.pipeline[::-1]: - conf = self.config.get(name, {}) - head = callable(head, **conf) - self.head = head - return head(environ, start_response) - - def namespace_handler(self, k, v): - """Config handler for the 'wsgi' namespace.""" - if k == "pipeline": - # Note this allows multiple 'wsgi.pipeline' config entries - # (but each entry will be processed in a 'random' order). - # It should also allow developers to set default middleware - # in code (passed to self.__init__) that deployers can add to - # (but not remove) via config. - self.pipeline.extend(v) - elif k == "response_class": - self.response_class = v - else: - name, arg = k.split(".", 1) - bucket = self.config.setdefault(name, {}) - bucket[arg] = v diff --git a/lib/cherrypy/_cpwsgi_server.py b/lib/cherrypy/_cpwsgi_server.py deleted file mode 100644 index 874e2e9f..00000000 --- a/lib/cherrypy/_cpwsgi_server.py +++ /dev/null @@ -1,70 +0,0 @@ -"""WSGI server interface (see PEP 333). This adds some CP-specific bits to -the framework-agnostic wsgiserver package. -""" -import sys - -import cherrypy -from cherrypy import wsgiserver - - -class CPWSGIServer(wsgiserver.CherryPyWSGIServer): - - """Wrapper for wsgiserver.CherryPyWSGIServer. - - wsgiserver has been designed to not reference CherryPy in any way, - so that it can be used in other frameworks and applications. Therefore, - we wrap it here, so we can set our own mount points from cherrypy.tree - and apply some attributes from config -> cherrypy.server -> wsgiserver. - """ - - def __init__(self, server_adapter=cherrypy.server): - self.server_adapter = server_adapter - self.max_request_header_size = ( - self.server_adapter.max_request_header_size or 0 - ) - self.max_request_body_size = ( - self.server_adapter.max_request_body_size or 0 - ) - - server_name = (self.server_adapter.socket_host or - self.server_adapter.socket_file or - None) - - self.wsgi_version = self.server_adapter.wsgi_version - s = wsgiserver.CherryPyWSGIServer - s.__init__(self, server_adapter.bind_addr, cherrypy.tree, - self.server_adapter.thread_pool, - server_name, - max=self.server_adapter.thread_pool_max, - request_queue_size=self.server_adapter.socket_queue_size, - timeout=self.server_adapter.socket_timeout, - shutdown_timeout=self.server_adapter.shutdown_timeout, - accepted_queue_size=self.server_adapter.accepted_queue_size, - accepted_queue_timeout=self.server_adapter.accepted_queue_timeout, - ) - self.protocol = self.server_adapter.protocol_version - self.nodelay = self.server_adapter.nodelay - - if sys.version_info >= (3, 0): - ssl_module = self.server_adapter.ssl_module or 'builtin' - else: - ssl_module = self.server_adapter.ssl_module or 'pyopenssl' - if self.server_adapter.ssl_context: - adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) - self.ssl_adapter = adapter_class( - self.server_adapter.ssl_certificate, - self.server_adapter.ssl_private_key, - self.server_adapter.ssl_certificate_chain) - self.ssl_adapter.context = self.server_adapter.ssl_context - elif self.server_adapter.ssl_certificate: - adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) - self.ssl_adapter = adapter_class( - self.server_adapter.ssl_certificate, - self.server_adapter.ssl_private_key, - self.server_adapter.ssl_certificate_chain) - - self.stats['Enabled'] = getattr( - self.server_adapter, 'statistics', False) - - def error_log(self, msg="", level=20, traceback=False): - cherrypy.engine.log(msg, level, traceback) diff --git a/lib/cherrypy/cherryd b/lib/cherrypy/cherryd deleted file mode 100644 index 5afb27ad..00000000 --- a/lib/cherrypy/cherryd +++ /dev/null @@ -1,110 +0,0 @@ -#! /usr/bin/env python -"""The CherryPy daemon.""" - -import sys - -import cherrypy -from cherrypy.process import plugins, servers -from cherrypy import Application - - -def start(configfiles=None, daemonize=False, environment=None, - fastcgi=False, scgi=False, pidfile=None, imports=None, - cgi=False): - """Subscribe all engine plugins and start the engine.""" - sys.path = [''] + sys.path - for i in imports or []: - exec("import %s" % i) - - for c in configfiles or []: - cherrypy.config.update(c) - # If there's only one app mounted, merge config into it. - if len(cherrypy.tree.apps) == 1: - for app in cherrypy.tree.apps.values(): - if isinstance(app, Application): - app.merge(c) - - engine = cherrypy.engine - - if environment is not None: - cherrypy.config.update({'environment': environment}) - - # Only daemonize if asked to. - if daemonize: - # Don't print anything to stdout/sterr. - cherrypy.config.update({'log.screen': False}) - plugins.Daemonizer(engine).subscribe() - - if pidfile: - plugins.PIDFile(engine, pidfile).subscribe() - - if hasattr(engine, "signal_handler"): - engine.signal_handler.subscribe() - if hasattr(engine, "console_control_handler"): - engine.console_control_handler.subscribe() - - if (fastcgi and (scgi or cgi)) or (scgi and cgi): - cherrypy.log.error("You may only specify one of the cgi, fastcgi, and " - "scgi options.", 'ENGINE') - sys.exit(1) - elif fastcgi or scgi or cgi: - # Turn off autoreload when using *cgi. - cherrypy.config.update({'engine.autoreload_on': False}) - # Turn off the default HTTP server (which is subscribed by default). - cherrypy.server.unsubscribe() - - addr = cherrypy.server.bind_addr - if fastcgi: - f = servers.FlupFCGIServer(application=cherrypy.tree, - bindAddress=addr) - elif scgi: - f = servers.FlupSCGIServer(application=cherrypy.tree, - bindAddress=addr) - else: - f = servers.FlupCGIServer(application=cherrypy.tree, - bindAddress=addr) - s = servers.ServerAdapter(engine, httpserver=f, bind_addr=addr) - s.subscribe() - - # Always start the engine; this will start all other services - try: - engine.start() - except: - # Assume the error has been logged already via bus.log. - sys.exit(1) - else: - engine.block() - - -if __name__ == '__main__': - from optparse import OptionParser - - p = OptionParser() - p.add_option('-c', '--config', action="append", dest='config', - help="specify config file(s)") - p.add_option('-d', action="store_true", dest='daemonize', - help="run the server as a daemon") - p.add_option('-e', '--environment', dest='environment', default=None, - help="apply the given config environment") - p.add_option('-f', action="store_true", dest='fastcgi', - help="start a fastcgi server instead of the default HTTP " - "server") - p.add_option('-s', action="store_true", dest='scgi', - help="start a scgi server instead of the default HTTP server") - p.add_option('-x', action="store_true", dest='cgi', - help="start a cgi server instead of the default HTTP server") - p.add_option('-i', '--import', action="append", dest='imports', - help="specify modules to import") - p.add_option('-p', '--pidfile', dest='pidfile', default=None, - help="store the process id in the given file") - p.add_option('-P', '--Path', action="append", dest='Path', - help="add the given paths to sys.path") - options, args = p.parse_args() - - if options.Path: - for p in options.Path: - sys.path.insert(0, p) - - start(options.config, options.daemonize, - options.environment, options.fastcgi, options.scgi, - options.pidfile, options.imports, options.cgi) diff --git a/lib/cherrypy/favicon.ico b/lib/cherrypy/favicon.ico deleted file mode 100644 index f0d7e61badad3f332cf1e663efb97c0b5be80f5e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1406 zcmb`Hd05U_6vscWSJJB#$ugQ@jA;zAXDv%YAxlVP&ytAjTU10ymNpe4LY9=2_SI5K zC3>Y)vZNK!rhR_zJdtfiw$m?BBmX0|pFW;J|@s zYHBiQ&>#j69?Xy-Ll`=AD8q&gWBBmlj2JNjEiElZjvUFTQKJ|=dNgCkjA889v5Xrx z4sC61baZqWKYlzDCQM-B#EDFrGznc@T_#VSjGmqzQ>IK|>eQ)Bn>G!7eSHiJ446KB zIx}X>VCKx37#bQfYt}4g&z{YkIdhmhcP>UoM$DTxkNNZGvtYpjjE#+1xNspRCMGOe zw1~xv7h`H_%915ZSh{p6%a$!;`SRtgSh0eYD_62=)hf))%vim8HEY(aVeQ(rtXsDZ zb8~anuV0Uag#{ZnY+&QYjaXV*vT4&MHgDdHm6a7+wrpYR)~#&YwvFxEx3go%4tDO` z$*x_y*u8rke3QwOtB{embw6rwR)6;qO>=_vu z89aafoEI-%keQi@R4V1=%a>$jW%26OE3&h*$;rv#_3PK<=H`-@mq&hnK5yQT(K__B|98+X@Zp^73MZjvW!HsVT|~sc)O^ZGM)}O{A)-)@n=bmFdz? zRaLc>D=T%Qr>f`&r)>x2Kb1tH)^h|wLs?1GQ@HOR^ik=lq13yT$@Z=qojU#WZ-LIg Kbp8+jKgeIP@z;_7 diff --git a/lib/cherrypy/lib/__init__.py b/lib/cherrypy/lib/__init__.py deleted file mode 100644 index a75a53da..00000000 --- a/lib/cherrypy/lib/__init__.py +++ /dev/null @@ -1,85 +0,0 @@ -"""CherryPy Library""" - -# Deprecated in CherryPy 3.2 -- remove in CherryPy 3.3 -from cherrypy.lib.reprconf import unrepr, modules, attributes - -def is_iterator(obj): - '''Returns a boolean indicating if the object provided implements - the iterator protocol (i.e. like a generator). This will return - false for objects which iterable, but not iterators themselves.''' - from types import GeneratorType - if isinstance(obj, GeneratorType): - return True - elif not hasattr(obj, '__iter__'): - return False - else: - # Types which implement the protocol must return themselves when - # invoking 'iter' upon them. - return iter(obj) is obj - -def is_closable_iterator(obj): - - # Not an iterator. - if not is_iterator(obj): - return False - - # A generator - the easiest thing to deal with. - import inspect - if inspect.isgenerator(obj): - return True - - # A custom iterator. Look for a close method... - if not (hasattr(obj, 'close') and callable(obj.close)): - return False - - # ... which doesn't require any arguments. - try: - inspect.getcallargs(obj.close) - except TypeError: - return False - else: - return True - -class file_generator(object): - - """Yield the given input (a file object) in chunks (default 64k). (Core)""" - - def __init__(self, input, chunkSize=65536): - self.input = input - self.chunkSize = chunkSize - - def __iter__(self): - return self - - def __next__(self): - chunk = self.input.read(self.chunkSize) - if chunk: - return chunk - else: - if hasattr(self.input, 'close'): - self.input.close() - raise StopIteration() - next = __next__ - - -def file_generator_limited(fileobj, count, chunk_size=65536): - """Yield the given file object in chunks, stopping after `count` - bytes has been emitted. Default chunk size is 64kB. (Core) - """ - remaining = count - while remaining > 0: - chunk = fileobj.read(min(chunk_size, remaining)) - chunklen = len(chunk) - if chunklen == 0: - return - remaining -= chunklen - yield chunk - - -def set_vary_header(response, header_name): - "Add a Vary header to a response" - varies = response.headers.get("Vary", "") - varies = [x.strip() for x in varies.split(",") if x.strip()] - if header_name not in varies: - varies.append(header_name) - response.headers['Vary'] = ", ".join(varies) diff --git a/lib/cherrypy/lib/auth.py b/lib/cherrypy/lib/auth.py deleted file mode 100644 index 71591aaa..00000000 --- a/lib/cherrypy/lib/auth.py +++ /dev/null @@ -1,97 +0,0 @@ -import cherrypy -from cherrypy.lib import httpauth - - -def check_auth(users, encrypt=None, realm=None): - """If an authorization header contains credentials, return True or False. - """ - request = cherrypy.serving.request - if 'authorization' in request.headers: - # make sure the provided credentials are correctly set - ah = httpauth.parseAuthorization(request.headers['authorization']) - if ah is None: - raise cherrypy.HTTPError(400, 'Bad Request') - - if not encrypt: - encrypt = httpauth.DIGEST_AUTH_ENCODERS[httpauth.MD5] - - if hasattr(users, '__call__'): - try: - # backward compatibility - users = users() # expect it to return a dictionary - - if not isinstance(users, dict): - raise ValueError( - "Authentication users must be a dictionary") - - # fetch the user password - password = users.get(ah["username"], None) - except TypeError: - # returns a password (encrypted or clear text) - password = users(ah["username"]) - else: - if not isinstance(users, dict): - raise ValueError("Authentication users must be a dictionary") - - # fetch the user password - password = users.get(ah["username"], None) - - # validate the authorization by re-computing it here - # and compare it with what the user-agent provided - if httpauth.checkResponse(ah, password, method=request.method, - encrypt=encrypt, realm=realm): - request.login = ah["username"] - return True - - request.login = False - return False - - -def basic_auth(realm, users, encrypt=None, debug=False): - """If auth fails, raise 401 with a basic authentication header. - - realm - A string containing the authentication realm. - - users - A dict of the form: {username: password} or a callable returning - a dict. - - encrypt - callable used to encrypt the password returned from the user-agent. - if None it defaults to a md5 encryption. - - """ - if check_auth(users, encrypt): - if debug: - cherrypy.log('Auth successful', 'TOOLS.BASIC_AUTH') - return - - # inform the user-agent this path is protected - cherrypy.serving.response.headers[ - 'www-authenticate'] = httpauth.basicAuth(realm) - - raise cherrypy.HTTPError( - 401, "You are not authorized to access that resource") - - -def digest_auth(realm, users, debug=False): - """If auth fails, raise 401 with a digest authentication header. - - realm - A string containing the authentication realm. - users - A dict of the form: {username: password} or a callable returning - a dict. - """ - if check_auth(users, realm=realm): - if debug: - cherrypy.log('Auth successful', 'TOOLS.DIGEST_AUTH') - return - - # inform the user-agent this path is protected - cherrypy.serving.response.headers[ - 'www-authenticate'] = httpauth.digestAuth(realm) - - raise cherrypy.HTTPError( - 401, "You are not authorized to access that resource") diff --git a/lib/cherrypy/lib/auth_basic.py b/lib/cherrypy/lib/auth_basic.py deleted file mode 100644 index 5ba16f7f..00000000 --- a/lib/cherrypy/lib/auth_basic.py +++ /dev/null @@ -1,90 +0,0 @@ -# This file is part of CherryPy -# -*- coding: utf-8 -*- -# vim:ts=4:sw=4:expandtab:fileencoding=utf-8 - -__doc__ = """This module provides a CherryPy 3.x tool which implements -the server-side of HTTP Basic Access Authentication, as described in -:rfc:`2617`. - -Example usage, using the built-in checkpassword_dict function which uses a dict -as the credentials store:: - - userpassdict = {'bird' : 'bebop', 'ornette' : 'wayout'} - checkpassword = cherrypy.lib.auth_basic.checkpassword_dict(userpassdict) - basic_auth = {'tools.auth_basic.on': True, - 'tools.auth_basic.realm': 'earth', - 'tools.auth_basic.checkpassword': checkpassword, - } - app_config = { '/' : basic_auth } - -""" - -__author__ = 'visteya' -__date__ = 'April 2009' - -import binascii -from cherrypy._cpcompat import base64_decode -import cherrypy - - -def checkpassword_dict(user_password_dict): - """Returns a checkpassword function which checks credentials - against a dictionary of the form: {username : password}. - - If you want a simple dictionary-based authentication scheme, use - checkpassword_dict(my_credentials_dict) as the value for the - checkpassword argument to basic_auth(). - """ - def checkpassword(realm, user, password): - p = user_password_dict.get(user) - return p and p == password or False - - return checkpassword - - -def basic_auth(realm, checkpassword, debug=False): - """A CherryPy tool which hooks at before_handler to perform - HTTP Basic Access Authentication, as specified in :rfc:`2617`. - - If the request has an 'authorization' header with a 'Basic' scheme, this - tool attempts to authenticate the credentials supplied in that header. If - the request has no 'authorization' header, or if it does but the scheme is - not 'Basic', or if authentication fails, the tool sends a 401 response with - a 'WWW-Authenticate' Basic header. - - realm - A string containing the authentication realm. - - checkpassword - A callable which checks the authentication credentials. - Its signature is checkpassword(realm, username, password). where - username and password are the values obtained from the request's - 'authorization' header. If authentication succeeds, checkpassword - returns True, else it returns False. - - """ - - if '"' in realm: - raise ValueError('Realm cannot contain the " (quote) character.') - request = cherrypy.serving.request - - auth_header = request.headers.get('authorization') - if auth_header is not None: - try: - scheme, params = auth_header.split(' ', 1) - if scheme.lower() == 'basic': - username, password = base64_decode(params).split(':', 1) - if checkpassword(realm, username, password): - if debug: - cherrypy.log('Auth succeeded', 'TOOLS.AUTH_BASIC') - request.login = username - return # successful authentication - # split() error, base64.decodestring() error - except (ValueError, binascii.Error): - raise cherrypy.HTTPError(400, 'Bad Request') - - # Respond with 401 status and a WWW-Authenticate header - cherrypy.serving.response.headers[ - 'www-authenticate'] = 'Basic realm="%s"' % realm - raise cherrypy.HTTPError( - 401, "You are not authorized to access that resource") diff --git a/lib/cherrypy/lib/auth_digest.py b/lib/cherrypy/lib/auth_digest.py deleted file mode 100644 index e06535dc..00000000 --- a/lib/cherrypy/lib/auth_digest.py +++ /dev/null @@ -1,390 +0,0 @@ -# This file is part of CherryPy -# -*- coding: utf-8 -*- -# vim:ts=4:sw=4:expandtab:fileencoding=utf-8 - -__doc__ = """An implementation of the server-side of HTTP Digest Access -Authentication, which is described in :rfc:`2617`. - -Example usage, using the built-in get_ha1_dict_plain function which uses a dict -of plaintext passwords as the credentials store:: - - userpassdict = {'alice' : '4x5istwelve'} - get_ha1 = cherrypy.lib.auth_digest.get_ha1_dict_plain(userpassdict) - digest_auth = {'tools.auth_digest.on': True, - 'tools.auth_digest.realm': 'wonderland', - 'tools.auth_digest.get_ha1': get_ha1, - 'tools.auth_digest.key': 'a565c27146791cfb', - } - app_config = { '/' : digest_auth } -""" - -__author__ = 'visteya' -__date__ = 'April 2009' - - -import time -from cherrypy._cpcompat import parse_http_list, parse_keqv_list - -import cherrypy -from cherrypy._cpcompat import md5, ntob -md5_hex = lambda s: md5(ntob(s)).hexdigest() - -qop_auth = 'auth' -qop_auth_int = 'auth-int' -valid_qops = (qop_auth, qop_auth_int) - -valid_algorithms = ('MD5', 'MD5-sess') - - -def TRACE(msg): - cherrypy.log(msg, context='TOOLS.AUTH_DIGEST') - -# Three helper functions for users of the tool, providing three variants -# of get_ha1() functions for three different kinds of credential stores. - - -def get_ha1_dict_plain(user_password_dict): - """Returns a get_ha1 function which obtains a plaintext password from a - dictionary of the form: {username : password}. - - If you want a simple dictionary-based authentication scheme, with plaintext - passwords, use get_ha1_dict_plain(my_userpass_dict) as the value for the - get_ha1 argument to digest_auth(). - """ - def get_ha1(realm, username): - password = user_password_dict.get(username) - if password: - return md5_hex('%s:%s:%s' % (username, realm, password)) - return None - - return get_ha1 - - -def get_ha1_dict(user_ha1_dict): - """Returns a get_ha1 function which obtains a HA1 password hash from a - dictionary of the form: {username : HA1}. - - If you want a dictionary-based authentication scheme, but with - pre-computed HA1 hashes instead of plain-text passwords, use - get_ha1_dict(my_userha1_dict) as the value for the get_ha1 - argument to digest_auth(). - """ - def get_ha1(realm, username): - return user_ha1_dict.get(username) - - return get_ha1 - - -def get_ha1_file_htdigest(filename): - """Returns a get_ha1 function which obtains a HA1 password hash from a - flat file with lines of the same format as that produced by the Apache - htdigest utility. For example, for realm 'wonderland', username 'alice', - and password '4x5istwelve', the htdigest line would be:: - - alice:wonderland:3238cdfe91a8b2ed8e39646921a02d4c - - If you want to use an Apache htdigest file as the credentials store, - then use get_ha1_file_htdigest(my_htdigest_file) as the value for the - get_ha1 argument to digest_auth(). It is recommended that the filename - argument be an absolute path, to avoid problems. - """ - def get_ha1(realm, username): - result = None - f = open(filename, 'r') - for line in f: - u, r, ha1 = line.rstrip().split(':') - if u == username and r == realm: - result = ha1 - break - f.close() - return result - - return get_ha1 - - -def synthesize_nonce(s, key, timestamp=None): - """Synthesize a nonce value which resists spoofing and can be checked - for staleness. Returns a string suitable as the value for 'nonce' in - the www-authenticate header. - - s - A string related to the resource, such as the hostname of the server. - - key - A secret string known only to the server. - - timestamp - An integer seconds-since-the-epoch timestamp - - """ - if timestamp is None: - timestamp = int(time.time()) - h = md5_hex('%s:%s:%s' % (timestamp, s, key)) - nonce = '%s:%s' % (timestamp, h) - return nonce - - -def H(s): - """The hash function H""" - return md5_hex(s) - - -class HttpDigestAuthorization (object): - - """Class to parse a Digest Authorization header and perform re-calculation - of the digest. - """ - - def errmsg(self, s): - return 'Digest Authorization header: %s' % s - - def __init__(self, auth_header, http_method, debug=False): - self.http_method = http_method - self.debug = debug - scheme, params = auth_header.split(" ", 1) - self.scheme = scheme.lower() - if self.scheme != 'digest': - raise ValueError('Authorization scheme is not "Digest"') - - self.auth_header = auth_header - - # make a dict of the params - items = parse_http_list(params) - paramsd = parse_keqv_list(items) - - self.realm = paramsd.get('realm') - self.username = paramsd.get('username') - self.nonce = paramsd.get('nonce') - self.uri = paramsd.get('uri') - self.method = paramsd.get('method') - self.response = paramsd.get('response') # the response digest - self.algorithm = paramsd.get('algorithm', 'MD5').upper() - self.cnonce = paramsd.get('cnonce') - self.opaque = paramsd.get('opaque') - self.qop = paramsd.get('qop') # qop - self.nc = paramsd.get('nc') # nonce count - - # perform some correctness checks - if self.algorithm not in valid_algorithms: - raise ValueError( - self.errmsg("Unsupported value for algorithm: '%s'" % - self.algorithm)) - - has_reqd = ( - self.username and - self.realm and - self.nonce and - self.uri and - self.response - ) - if not has_reqd: - raise ValueError( - self.errmsg("Not all required parameters are present.")) - - if self.qop: - if self.qop not in valid_qops: - raise ValueError( - self.errmsg("Unsupported value for qop: '%s'" % self.qop)) - if not (self.cnonce and self.nc): - raise ValueError( - self.errmsg("If qop is sent then " - "cnonce and nc MUST be present")) - else: - if self.cnonce or self.nc: - raise ValueError( - self.errmsg("If qop is not sent, " - "neither cnonce nor nc can be present")) - - def __str__(self): - return 'authorization : %s' % self.auth_header - - def validate_nonce(self, s, key): - """Validate the nonce. - Returns True if nonce was generated by synthesize_nonce() and the - timestamp is not spoofed, else returns False. - - s - A string related to the resource, such as the hostname of - the server. - - key - A secret string known only to the server. - - Both s and key must be the same values which were used to synthesize - the nonce we are trying to validate. - """ - try: - timestamp, hashpart = self.nonce.split(':', 1) - s_timestamp, s_hashpart = synthesize_nonce( - s, key, timestamp).split(':', 1) - is_valid = s_hashpart == hashpart - if self.debug: - TRACE('validate_nonce: %s' % is_valid) - return is_valid - except ValueError: # split() error - pass - return False - - def is_nonce_stale(self, max_age_seconds=600): - """Returns True if a validated nonce is stale. The nonce contains a - timestamp in plaintext and also a secure hash of the timestamp. - You should first validate the nonce to ensure the plaintext - timestamp is not spoofed. - """ - try: - timestamp, hashpart = self.nonce.split(':', 1) - if int(timestamp) + max_age_seconds > int(time.time()): - return False - except ValueError: # int() error - pass - if self.debug: - TRACE("nonce is stale") - return True - - def HA2(self, entity_body=''): - """Returns the H(A2) string. See :rfc:`2617` section 3.2.2.3.""" - # RFC 2617 3.2.2.3 - # If the "qop" directive's value is "auth" or is unspecified, - # then A2 is: - # A2 = method ":" digest-uri-value - # - # If the "qop" value is "auth-int", then A2 is: - # A2 = method ":" digest-uri-value ":" H(entity-body) - if self.qop is None or self.qop == "auth": - a2 = '%s:%s' % (self.http_method, self.uri) - elif self.qop == "auth-int": - a2 = "%s:%s:%s" % (self.http_method, self.uri, H(entity_body)) - else: - # in theory, this should never happen, since I validate qop in - # __init__() - raise ValueError(self.errmsg("Unrecognized value for qop!")) - return H(a2) - - def request_digest(self, ha1, entity_body=''): - """Calculates the Request-Digest. See :rfc:`2617` section 3.2.2.1. - - ha1 - The HA1 string obtained from the credentials store. - - entity_body - If 'qop' is set to 'auth-int', then A2 includes a hash - of the "entity body". The entity body is the part of the - message which follows the HTTP headers. See :rfc:`2617` section - 4.3. This refers to the entity the user agent sent in the - request which has the Authorization header. Typically GET - requests don't have an entity, and POST requests do. - - """ - ha2 = self.HA2(entity_body) - # Request-Digest -- RFC 2617 3.2.2.1 - if self.qop: - req = "%s:%s:%s:%s:%s" % ( - self.nonce, self.nc, self.cnonce, self.qop, ha2) - else: - req = "%s:%s" % (self.nonce, ha2) - - # RFC 2617 3.2.2.2 - # - # If the "algorithm" directive's value is "MD5" or is unspecified, - # then A1 is: - # A1 = unq(username-value) ":" unq(realm-value) ":" passwd - # - # If the "algorithm" directive's value is "MD5-sess", then A1 is - # calculated only once - on the first request by the client following - # receipt of a WWW-Authenticate challenge from the server. - # A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd ) - # ":" unq(nonce-value) ":" unq(cnonce-value) - if self.algorithm == 'MD5-sess': - ha1 = H('%s:%s:%s' % (ha1, self.nonce, self.cnonce)) - - digest = H('%s:%s' % (ha1, req)) - return digest - - -def www_authenticate(realm, key, algorithm='MD5', nonce=None, qop=qop_auth, - stale=False): - """Constructs a WWW-Authenticate header for Digest authentication.""" - if qop not in valid_qops: - raise ValueError("Unsupported value for qop: '%s'" % qop) - if algorithm not in valid_algorithms: - raise ValueError("Unsupported value for algorithm: '%s'" % algorithm) - - if nonce is None: - nonce = synthesize_nonce(realm, key) - s = 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % ( - realm, nonce, algorithm, qop) - if stale: - s += ', stale="true"' - return s - - -def digest_auth(realm, get_ha1, key, debug=False): - """A CherryPy tool which hooks at before_handler to perform - HTTP Digest Access Authentication, as specified in :rfc:`2617`. - - If the request has an 'authorization' header with a 'Digest' scheme, - this tool authenticates the credentials supplied in that header. - If the request has no 'authorization' header, or if it does but the - scheme is not "Digest", or if authentication fails, the tool sends - a 401 response with a 'WWW-Authenticate' Digest header. - - realm - A string containing the authentication realm. - - get_ha1 - A callable which looks up a username in a credentials store - and returns the HA1 string, which is defined in the RFC to be - MD5(username : realm : password). The function's signature is: - ``get_ha1(realm, username)`` - where username is obtained from the request's 'authorization' header. - If username is not found in the credentials store, get_ha1() returns - None. - - key - A secret string known only to the server, used in the synthesis - of nonces. - - """ - request = cherrypy.serving.request - - auth_header = request.headers.get('authorization') - nonce_is_stale = False - if auth_header is not None: - try: - auth = HttpDigestAuthorization( - auth_header, request.method, debug=debug) - except ValueError: - raise cherrypy.HTTPError( - 400, "The Authorization header could not be parsed.") - - if debug: - TRACE(str(auth)) - - if auth.validate_nonce(realm, key): - ha1 = get_ha1(realm, auth.username) - if ha1 is not None: - # note that for request.body to be available we need to - # hook in at before_handler, not on_start_resource like - # 3.1.x digest_auth does. - digest = auth.request_digest(ha1, entity_body=request.body) - if digest == auth.response: # authenticated - if debug: - TRACE("digest matches auth.response") - # Now check if nonce is stale. - # The choice of ten minutes' lifetime for nonce is somewhat - # arbitrary - nonce_is_stale = auth.is_nonce_stale(max_age_seconds=600) - if not nonce_is_stale: - request.login = auth.username - if debug: - TRACE("authentication of %s successful" % - auth.username) - return - - # Respond with 401 status and a WWW-Authenticate header - header = www_authenticate(realm, key, stale=nonce_is_stale) - if debug: - TRACE(header) - cherrypy.serving.response.headers['WWW-Authenticate'] = header - raise cherrypy.HTTPError( - 401, "You are not authorized to access that resource") diff --git a/lib/cherrypy/lib/caching.py b/lib/cherrypy/lib/caching.py deleted file mode 100644 index fab6b569..00000000 --- a/lib/cherrypy/lib/caching.py +++ /dev/null @@ -1,470 +0,0 @@ -""" -CherryPy implements a simple caching system as a pluggable Tool. This tool -tries to be an (in-process) HTTP/1.1-compliant cache. It's not quite there -yet, but it's probably good enough for most sites. - -In general, GET responses are cached (along with selecting headers) and, if -another request arrives for the same resource, the caching Tool will return 304 -Not Modified if possible, or serve the cached response otherwise. It also sets -request.cached to True if serving a cached representation, and sets -request.cacheable to False (so it doesn't get cached again). - -If POST, PUT, or DELETE requests are made for a cached resource, they -invalidate (delete) any cached response. - -Usage -===== - -Configuration file example:: - - [/] - tools.caching.on = True - tools.caching.delay = 3600 - -You may use a class other than the default -:class:`MemoryCache` by supplying the config -entry ``cache_class``; supply the full dotted name of the replacement class -as the config value. It must implement the basic methods ``get``, ``put``, -``delete``, and ``clear``. - -You may set any attribute, including overriding methods, on the cache -instance by providing them in config. The above sets the -:attr:`delay` attribute, for example. -""" - -import datetime -import sys -import threading -import time - -import cherrypy -from cherrypy.lib import cptools, httputil -from cherrypy._cpcompat import copyitems, ntob, set_daemon, sorted, Event - - -class Cache(object): - - """Base class for Cache implementations.""" - - def get(self): - """Return the current variant if in the cache, else None.""" - raise NotImplemented - - def put(self, obj, size): - """Store the current variant in the cache.""" - raise NotImplemented - - def delete(self): - """Remove ALL cached variants of the current resource.""" - raise NotImplemented - - def clear(self): - """Reset the cache to its initial, empty state.""" - raise NotImplemented - - -# ------------------------------ Memory Cache ------------------------------- # -class AntiStampedeCache(dict): - - """A storage system for cached items which reduces stampede collisions.""" - - def wait(self, key, timeout=5, debug=False): - """Return the cached value for the given key, or None. - - If timeout is not None, and the value is already - being calculated by another thread, wait until the given timeout has - elapsed. If the value is available before the timeout expires, it is - returned. If not, None is returned, and a sentinel placed in the cache - to signal other threads to wait. - - If timeout is None, no waiting is performed nor sentinels used. - """ - value = self.get(key) - if isinstance(value, Event): - if timeout is None: - # Ignore the other thread and recalc it ourselves. - if debug: - cherrypy.log('No timeout', 'TOOLS.CACHING') - return None - - # Wait until it's done or times out. - if debug: - cherrypy.log('Waiting up to %s seconds' % - timeout, 'TOOLS.CACHING') - value.wait(timeout) - if value.result is not None: - # The other thread finished its calculation. Use it. - if debug: - cherrypy.log('Result!', 'TOOLS.CACHING') - return value.result - # Timed out. Stick an Event in the slot so other threads wait - # on this one to finish calculating the value. - if debug: - cherrypy.log('Timed out', 'TOOLS.CACHING') - e = threading.Event() - e.result = None - dict.__setitem__(self, key, e) - - return None - elif value is None: - # Stick an Event in the slot so other threads wait - # on this one to finish calculating the value. - if debug: - cherrypy.log('Timed out', 'TOOLS.CACHING') - e = threading.Event() - e.result = None - dict.__setitem__(self, key, e) - return value - - def __setitem__(self, key, value): - """Set the cached value for the given key.""" - existing = self.get(key) - dict.__setitem__(self, key, value) - if isinstance(existing, Event): - # Set Event.result so other threads waiting on it have - # immediate access without needing to poll the cache again. - existing.result = value - existing.set() - - -class MemoryCache(Cache): - - """An in-memory cache for varying response content. - - Each key in self.store is a URI, and each value is an AntiStampedeCache. - The response for any given URI may vary based on the values of - "selecting request headers"; that is, those named in the Vary - response header. We assume the list of header names to be constant - for each URI throughout the lifetime of the application, and store - that list in ``self.store[uri].selecting_headers``. - - The items contained in ``self.store[uri]`` have keys which are tuples of - request header values (in the same order as the names in its - selecting_headers), and values which are the actual responses. - """ - - maxobjects = 1000 - """The maximum number of cached objects; defaults to 1000.""" - - maxobj_size = 100000 - """The maximum size of each cached object in bytes; defaults to 100 KB.""" - - maxsize = 10000000 - """The maximum size of the entire cache in bytes; defaults to 10 MB.""" - - delay = 600 - """Seconds until the cached content expires; defaults to 600 (10 minutes). - """ - - antistampede_timeout = 5 - """Seconds to wait for other threads to release a cache lock.""" - - expire_freq = 0.1 - """Seconds to sleep between cache expiration sweeps.""" - - debug = False - - def __init__(self): - self.clear() - - # Run self.expire_cache in a separate daemon thread. - t = threading.Thread(target=self.expire_cache, name='expire_cache') - self.expiration_thread = t - set_daemon(t, True) - t.start() - - def clear(self): - """Reset the cache to its initial, empty state.""" - self.store = {} - self.expirations = {} - self.tot_puts = 0 - self.tot_gets = 0 - self.tot_hist = 0 - self.tot_expires = 0 - self.tot_non_modified = 0 - self.cursize = 0 - - def expire_cache(self): - """Continuously examine cached objects, expiring stale ones. - - This function is designed to be run in its own daemon thread, - referenced at ``self.expiration_thread``. - """ - # It's possible that "time" will be set to None - # arbitrarily, so we check "while time" to avoid exceptions. - # See tickets #99 and #180 for more information. - while time: - now = time.time() - # Must make a copy of expirations so it doesn't change size - # during iteration - for expiration_time, objects in copyitems(self.expirations): - if expiration_time <= now: - for obj_size, uri, sel_header_values in objects: - try: - del self.store[uri][tuple(sel_header_values)] - self.tot_expires += 1 - self.cursize -= obj_size - except KeyError: - # the key may have been deleted elsewhere - pass - del self.expirations[expiration_time] - time.sleep(self.expire_freq) - - def get(self): - """Return the current variant if in the cache, else None.""" - request = cherrypy.serving.request - self.tot_gets += 1 - - uri = cherrypy.url(qs=request.query_string) - uricache = self.store.get(uri) - if uricache is None: - return None - - header_values = [request.headers.get(h, '') - for h in uricache.selecting_headers] - variant = uricache.wait(key=tuple(sorted(header_values)), - timeout=self.antistampede_timeout, - debug=self.debug) - if variant is not None: - self.tot_hist += 1 - return variant - - def put(self, variant, size): - """Store the current variant in the cache.""" - request = cherrypy.serving.request - response = cherrypy.serving.response - - uri = cherrypy.url(qs=request.query_string) - uricache = self.store.get(uri) - if uricache is None: - uricache = AntiStampedeCache() - uricache.selecting_headers = [ - e.value for e in response.headers.elements('Vary')] - self.store[uri] = uricache - - if len(self.store) < self.maxobjects: - total_size = self.cursize + size - - # checks if there's space for the object - if (size < self.maxobj_size and total_size < self.maxsize): - # add to the expirations list - expiration_time = response.time + self.delay - bucket = self.expirations.setdefault(expiration_time, []) - bucket.append((size, uri, uricache.selecting_headers)) - - # add to the cache - header_values = [request.headers.get(h, '') - for h in uricache.selecting_headers] - uricache[tuple(sorted(header_values))] = variant - self.tot_puts += 1 - self.cursize = total_size - - def delete(self): - """Remove ALL cached variants of the current resource.""" - uri = cherrypy.url(qs=cherrypy.serving.request.query_string) - self.store.pop(uri, None) - - -def get(invalid_methods=("POST", "PUT", "DELETE"), debug=False, **kwargs): - """Try to obtain cached output. If fresh enough, raise HTTPError(304). - - If POST, PUT, or DELETE: - * invalidates (deletes) any cached response for this resource - * sets request.cached = False - * sets request.cacheable = False - - else if a cached copy exists: - * sets request.cached = True - * sets request.cacheable = False - * sets response.headers to the cached values - * checks the cached Last-Modified response header against the - current If-(Un)Modified-Since request headers; raises 304 - if necessary. - * sets response.status and response.body to the cached values - * returns True - - otherwise: - * sets request.cached = False - * sets request.cacheable = True - * returns False - """ - request = cherrypy.serving.request - response = cherrypy.serving.response - - if not hasattr(cherrypy, "_cache"): - # Make a process-wide Cache object. - cherrypy._cache = kwargs.pop("cache_class", MemoryCache)() - - # Take all remaining kwargs and set them on the Cache object. - for k, v in kwargs.items(): - setattr(cherrypy._cache, k, v) - cherrypy._cache.debug = debug - - # POST, PUT, DELETE should invalidate (delete) the cached copy. - # See http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10. - if request.method in invalid_methods: - if debug: - cherrypy.log('request.method %r in invalid_methods %r' % - (request.method, invalid_methods), 'TOOLS.CACHING') - cherrypy._cache.delete() - request.cached = False - request.cacheable = False - return False - - if 'no-cache' in [e.value for e in request.headers.elements('Pragma')]: - request.cached = False - request.cacheable = True - return False - - cache_data = cherrypy._cache.get() - request.cached = bool(cache_data) - request.cacheable = not request.cached - if request.cached: - # Serve the cached copy. - max_age = cherrypy._cache.delay - for v in [e.value for e in request.headers.elements('Cache-Control')]: - atoms = v.split('=', 1) - directive = atoms.pop(0) - if directive == 'max-age': - if len(atoms) != 1 or not atoms[0].isdigit(): - raise cherrypy.HTTPError( - 400, "Invalid Cache-Control header") - max_age = int(atoms[0]) - break - elif directive == 'no-cache': - if debug: - cherrypy.log( - 'Ignoring cache due to Cache-Control: no-cache', - 'TOOLS.CACHING') - request.cached = False - request.cacheable = True - return False - - if debug: - cherrypy.log('Reading response from cache', 'TOOLS.CACHING') - s, h, b, create_time = cache_data - age = int(response.time - create_time) - if (age > max_age): - if debug: - cherrypy.log('Ignoring cache due to age > %d' % max_age, - 'TOOLS.CACHING') - request.cached = False - request.cacheable = True - return False - - # Copy the response headers. See - # https://bitbucket.org/cherrypy/cherrypy/issue/721. - response.headers = rh = httputil.HeaderMap() - for k in h: - dict.__setitem__(rh, k, dict.__getitem__(h, k)) - - # Add the required Age header - response.headers["Age"] = str(age) - - try: - # Note that validate_since depends on a Last-Modified header; - # this was put into the cached copy, and should have been - # resurrected just above (response.headers = cache_data[1]). - cptools.validate_since() - except cherrypy.HTTPRedirect: - x = sys.exc_info()[1] - if x.status == 304: - cherrypy._cache.tot_non_modified += 1 - raise - - # serve it & get out from the request - response.status = s - response.body = b - else: - if debug: - cherrypy.log('request is not cached', 'TOOLS.CACHING') - return request.cached - - -def tee_output(): - """Tee response output to cache storage. Internal.""" - # Used by CachingTool by attaching to request.hooks - - request = cherrypy.serving.request - if 'no-store' in request.headers.values('Cache-Control'): - return - - def tee(body): - """Tee response.body into a list.""" - if ('no-cache' in response.headers.values('Pragma') or - 'no-store' in response.headers.values('Cache-Control')): - for chunk in body: - yield chunk - return - - output = [] - for chunk in body: - output.append(chunk) - yield chunk - - # save the cache data - body = ntob('').join(output) - cherrypy._cache.put((response.status, response.headers or {}, - body, response.time), len(body)) - - response = cherrypy.serving.response - response.body = tee(response.body) - - -def expires(secs=0, force=False, debug=False): - """Tool for influencing cache mechanisms using the 'Expires' header. - - secs - Must be either an int or a datetime.timedelta, and indicates the - number of seconds between response.time and when the response should - expire. The 'Expires' header will be set to response.time + secs. - If secs is zero, the 'Expires' header is set one year in the past, and - the following "cache prevention" headers are also set: - - * Pragma: no-cache - * Cache-Control': no-cache, must-revalidate - - force - If False, the following headers are checked: - - * Etag - * Last-Modified - * Age - * Expires - - If any are already present, none of the above response headers are set. - - """ - - response = cherrypy.serving.response - headers = response.headers - - cacheable = False - if not force: - # some header names that indicate that the response can be cached - for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'): - if indicator in headers: - cacheable = True - break - - if not cacheable and not force: - if debug: - cherrypy.log('request is not cacheable', 'TOOLS.EXPIRES') - else: - if debug: - cherrypy.log('request is cacheable', 'TOOLS.EXPIRES') - if isinstance(secs, datetime.timedelta): - secs = (86400 * secs.days) + secs.seconds - - if secs == 0: - if force or ("Pragma" not in headers): - headers["Pragma"] = "no-cache" - if cherrypy.serving.request.protocol >= (1, 1): - if force or "Cache-Control" not in headers: - headers["Cache-Control"] = "no-cache, must-revalidate" - # Set an explicit Expires date in the past. - expiry = httputil.HTTPDate(1169942400.0) - else: - expiry = httputil.HTTPDate(response.time + secs) - if force or "Expires" not in headers: - headers["Expires"] = expiry diff --git a/lib/cherrypy/lib/covercp.py b/lib/cherrypy/lib/covercp.py deleted file mode 100644 index a74ec342..00000000 --- a/lib/cherrypy/lib/covercp.py +++ /dev/null @@ -1,387 +0,0 @@ -"""Code-coverage tools for CherryPy. - -To use this module, or the coverage tools in the test suite, -you need to download 'coverage.py', either Gareth Rees' `original -implementation `_ -or Ned Batchelder's `enhanced version: -`_ - -To turn on coverage tracing, use the following code:: - - cherrypy.engine.subscribe('start', covercp.start) - -DO NOT subscribe anything on the 'start_thread' channel, as previously -recommended. Calling start once in the main thread should be sufficient -to start coverage on all threads. Calling start again in each thread -effectively clears any coverage data gathered up to that point. - -Run your code, then use the ``covercp.serve()`` function to browse the -results in a web browser. If you run this module from the command line, -it will call ``serve()`` for you. -""" - -import re -import sys -import cgi -from cherrypy._cpcompat import quote_plus -import os -import os.path -localFile = os.path.join(os.path.dirname(__file__), "coverage.cache") - -the_coverage = None -try: - from coverage import coverage - the_coverage = coverage(data_file=localFile) - - def start(): - the_coverage.start() -except ImportError: - # Setting the_coverage to None will raise errors - # that need to be trapped downstream. - the_coverage = None - - import warnings - warnings.warn( - "No code coverage will be performed; " - "coverage.py could not be imported.") - - def start(): - pass -start.priority = 20 - -TEMPLATE_MENU = """ - - CherryPy Coverage Menu - - - -

CherryPy Coverage

""" - -TEMPLATE_FORM = """ -
- - - Show percentages -
- Hide files over - %%
- Exclude files matching
- -
- - - -
""" - -TEMPLATE_FRAMESET = """ -CherryPy coverage data - - - - - -""" - -TEMPLATE_COVERAGE = """ - - Coverage for %(name)s - - - -

%(name)s

-

%(fullpath)s

-

Coverage: %(pc)s%%

""" - -TEMPLATE_LOC_COVERED = """ - %s  - %s -\n""" -TEMPLATE_LOC_NOT_COVERED = """ - %s  - %s -\n""" -TEMPLATE_LOC_EXCLUDED = """ - %s  - %s -\n""" - -TEMPLATE_ITEM = ( - "%s%s%s\n" -) - - -def _percent(statements, missing): - s = len(statements) - e = s - len(missing) - if s > 0: - return int(round(100.0 * e / s)) - return 0 - - -def _show_branch(root, base, path, pct=0, showpct=False, exclude="", - coverage=the_coverage): - - # Show the directory name and any of our children - dirs = [k for k, v in root.items() if v] - dirs.sort() - for name in dirs: - newpath = os.path.join(path, name) - - if newpath.lower().startswith(base): - relpath = newpath[len(base):] - yield "| " * relpath.count(os.sep) - yield ( - "%s\n" % - (newpath, quote_plus(exclude), name) - ) - - for chunk in _show_branch( - root[name], base, newpath, pct, showpct, - exclude, coverage=coverage - ): - yield chunk - - # Now list the files - if path.lower().startswith(base): - relpath = path[len(base):] - files = [k for k, v in root.items() if not v] - files.sort() - for name in files: - newpath = os.path.join(path, name) - - pc_str = "" - if showpct: - try: - _, statements, _, missing, _ = coverage.analysis2(newpath) - except: - # Yes, we really want to pass on all errors. - pass - else: - pc = _percent(statements, missing) - pc_str = ("%3d%% " % pc).replace(' ', ' ') - if pc < float(pct) or pc == -1: - pc_str = "%s" % pc_str - else: - pc_str = "%s" % pc_str - - yield TEMPLATE_ITEM % ("| " * (relpath.count(os.sep) + 1), - pc_str, newpath, name) - - -def _skip_file(path, exclude): - if exclude: - return bool(re.search(exclude, path)) - - -def _graft(path, tree): - d = tree - - p = path - atoms = [] - while True: - p, tail = os.path.split(p) - if not tail: - break - atoms.append(tail) - atoms.append(p) - if p != "/": - atoms.append("/") - - atoms.reverse() - for node in atoms: - if node: - d = d.setdefault(node, {}) - - -def get_tree(base, exclude, coverage=the_coverage): - """Return covered module names as a nested dict.""" - tree = {} - runs = coverage.data.executed_files() - for path in runs: - if not _skip_file(path, exclude) and not os.path.isdir(path): - _graft(path, tree) - return tree - - -class CoverStats(object): - - def __init__(self, coverage, root=None): - self.coverage = coverage - if root is None: - # Guess initial depth. Files outside this path will not be - # reachable from the web interface. - import cherrypy - root = os.path.dirname(cherrypy.__file__) - self.root = root - - def index(self): - return TEMPLATE_FRAMESET % self.root.lower() - index.exposed = True - - def menu(self, base="/", pct="50", showpct="", - exclude=r'python\d\.\d|test|tut\d|tutorial'): - - # The coverage module uses all-lower-case names. - base = base.lower().rstrip(os.sep) - - yield TEMPLATE_MENU - yield TEMPLATE_FORM % locals() - - # Start by showing links for parent paths - yield "
" - path = "" - atoms = base.split(os.sep) - atoms.pop() - for atom in atoms: - path += atom + os.sep - yield ("%s %s" - % (path, quote_plus(exclude), atom, os.sep)) - yield "
" - - yield "
" - - # Then display the tree - tree = get_tree(base, exclude, self.coverage) - if not tree: - yield "

No modules covered.

" - else: - for chunk in _show_branch(tree, base, "/", pct, - showpct == 'checked', exclude, - coverage=self.coverage): - yield chunk - - yield "
" - yield "" - menu.exposed = True - - def annotated_file(self, filename, statements, excluded, missing): - source = open(filename, 'r') - buffer = [] - for lineno, line in enumerate(source.readlines()): - lineno += 1 - line = line.strip("\n\r") - empty_the_buffer = True - if lineno in excluded: - template = TEMPLATE_LOC_EXCLUDED - elif lineno in missing: - template = TEMPLATE_LOC_NOT_COVERED - elif lineno in statements: - template = TEMPLATE_LOC_COVERED - else: - empty_the_buffer = False - buffer.append((lineno, line)) - if empty_the_buffer: - for lno, pastline in buffer: - yield template % (lno, cgi.escape(pastline)) - buffer = [] - yield template % (lineno, cgi.escape(line)) - - def report(self, name): - filename, statements, excluded, missing, _ = self.coverage.analysis2( - name) - pc = _percent(statements, missing) - yield TEMPLATE_COVERAGE % dict(name=os.path.basename(name), - fullpath=name, - pc=pc) - yield '\n' - for line in self.annotated_file(filename, statements, excluded, - missing): - yield line - yield '
' - yield '' - yield '' - report.exposed = True - - -def serve(path=localFile, port=8080, root=None): - if coverage is None: - raise ImportError("The coverage module could not be imported.") - from coverage import coverage - cov = coverage(data_file=path) - cov.load() - - import cherrypy - cherrypy.config.update({'server.socket_port': int(port), - 'server.thread_pool': 10, - 'environment': "production", - }) - cherrypy.quickstart(CoverStats(cov, root)) - -if __name__ == "__main__": - serve(*tuple(sys.argv[1:])) diff --git a/lib/cherrypy/lib/cpstats.py b/lib/cherrypy/lib/cpstats.py deleted file mode 100644 index a8661a14..00000000 --- a/lib/cherrypy/lib/cpstats.py +++ /dev/null @@ -1,687 +0,0 @@ -"""CPStats, a package for collecting and reporting on program statistics. - -Overview -======== - -Statistics about program operation are an invaluable monitoring and debugging -tool. Unfortunately, the gathering and reporting of these critical values is -usually ad-hoc. This package aims to add a centralized place for gathering -statistical performance data, a structure for recording that data which -provides for extrapolation of that data into more useful information, -and a method of serving that data to both human investigators and -monitoring software. Let's examine each of those in more detail. - -Data Gathering --------------- - -Just as Python's `logging` module provides a common importable for gathering -and sending messages, performance statistics would benefit from a similar -common mechanism, and one that does *not* require each package which wishes -to collect stats to import a third-party module. Therefore, we choose to -re-use the `logging` module by adding a `statistics` object to it. - -That `logging.statistics` object is a nested dict. It is not a custom class, -because that would: - - 1. require libraries and applications to import a third-party module in - order to participate - 2. inhibit innovation in extrapolation approaches and in reporting tools, and - 3. be slow. - -There are, however, some specifications regarding the structure of the dict.:: - - { - +----"SQLAlchemy": { - | "Inserts": 4389745, - | "Inserts per Second": - | lambda s: s["Inserts"] / (time() - s["Start"]), - | C +---"Table Statistics": { - | o | "widgets": {-----------+ - N | l | "Rows": 1.3M, | Record - a | l | "Inserts": 400, | - m | e | },---------------------+ - e | c | "froobles": { - s | t | "Rows": 7845, - p | i | "Inserts": 0, - a | o | }, - c | n +---}, - e | "Slow Queries": - | [{"Query": "SELECT * FROM widgets;", - | "Processing Time": 47.840923343, - | }, - | ], - +----}, - } - -The `logging.statistics` dict has four levels. The topmost level is nothing -more than a set of names to introduce modularity, usually along the lines of -package names. If the SQLAlchemy project wanted to participate, for example, -it might populate the item `logging.statistics['SQLAlchemy']`, whose value -would be a second-layer dict we call a "namespace". Namespaces help multiple -packages to avoid collisions over key names, and make reports easier to read, -to boot. The maintainers of SQLAlchemy should feel free to use more than one -namespace if needed (such as 'SQLAlchemy ORM'). Note that there are no case -or other syntax constraints on the namespace names; they should be chosen -to be maximally readable by humans (neither too short nor too long). - -Each namespace, then, is a dict of named statistical values, such as -'Requests/sec' or 'Uptime'. You should choose names which will look -good on a report: spaces and capitalization are just fine. - -In addition to scalars, values in a namespace MAY be a (third-layer) -dict, or a list, called a "collection". For example, the CherryPy -:class:`StatsTool` keeps track of what each request is doing (or has most -recently done) in a 'Requests' collection, where each key is a thread ID; each -value in the subdict MUST be a fourth dict (whew!) of statistical data about -each thread. We call each subdict in the collection a "record". Similarly, -the :class:`StatsTool` also keeps a list of slow queries, where each record -contains data about each slow query, in order. - -Values in a namespace or record may also be functions, which brings us to: - -Extrapolation -------------- - -The collection of statistical data needs to be fast, as close to unnoticeable -as possible to the host program. That requires us to minimize I/O, for example, -but in Python it also means we need to minimize function calls. So when you -are designing your namespace and record values, try to insert the most basic -scalar values you already have on hand. - -When it comes time to report on the gathered data, however, we usually have -much more freedom in what we can calculate. Therefore, whenever reporting -tools (like the provided :class:`StatsPage` CherryPy class) fetch the contents -of `logging.statistics` for reporting, they first call -`extrapolate_statistics` (passing the whole `statistics` dict as the only -argument). This makes a deep copy of the statistics dict so that the -reporting tool can both iterate over it and even change it without harming -the original. But it also expands any functions in the dict by calling them. -For example, you might have a 'Current Time' entry in the namespace with the -value "lambda scope: time.time()". The "scope" parameter is the current -namespace dict (or record, if we're currently expanding one of those -instead), allowing you access to existing static entries. If you're truly -evil, you can even modify more than one entry at a time. - -However, don't try to calculate an entry and then use its value in further -extrapolations; the order in which the functions are called is not guaranteed. -This can lead to a certain amount of duplicated work (or a redesign of your -schema), but that's better than complicating the spec. - -After the whole thing has been extrapolated, it's time for: - -Reporting ---------- - -The :class:`StatsPage` class grabs the `logging.statistics` dict, extrapolates -it all, and then transforms it to HTML for easy viewing. Each namespace gets -its own header and attribute table, plus an extra table for each collection. -This is NOT part of the statistics specification; other tools can format how -they like. - -You can control which columns are output and how they are formatted by updating -StatsPage.formatting, which is a dict that mirrors the keys and nesting of -`logging.statistics`. The difference is that, instead of data values, it has -formatting values. Use None for a given key to indicate to the StatsPage that a -given column should not be output. Use a string with formatting -(such as '%.3f') to interpolate the value(s), or use a callable (such as -lambda v: v.isoformat()) for more advanced formatting. Any entry which is not -mentioned in the formatting dict is output unchanged. - -Monitoring ----------- - -Although the HTML output takes pains to assign unique id's to each with -statistical data, you're probably better off fetching /cpstats/data, which -outputs the whole (extrapolated) `logging.statistics` dict in JSON format. -That is probably easier to parse, and doesn't have any formatting controls, -so you get the "original" data in a consistently-serialized format. -Note: there's no treatment yet for datetime objects. Try time.time() instead -for now if you can. Nagios will probably thank you. - -Turning Collection Off ----------------------- - -It is recommended each namespace have an "Enabled" item which, if False, -stops collection (but not reporting) of statistical data. Applications -SHOULD provide controls to pause and resume collection by setting these -entries to False or True, if present. - - -Usage -===== - -To collect statistics on CherryPy applications:: - - from cherrypy.lib import cpstats - appconfig['/']['tools.cpstats.on'] = True - -To collect statistics on your own code:: - - import logging - # Initialize the repository - if not hasattr(logging, 'statistics'): logging.statistics = {} - # Initialize my namespace - mystats = logging.statistics.setdefault('My Stuff', {}) - # Initialize my namespace's scalars and collections - mystats.update({ - 'Enabled': True, - 'Start Time': time.time(), - 'Important Events': 0, - 'Events/Second': lambda s: ( - (s['Important Events'] / (time.time() - s['Start Time']))), - }) - ... - for event in events: - ... - # Collect stats - if mystats.get('Enabled', False): - mystats['Important Events'] += 1 - -To report statistics:: - - root.cpstats = cpstats.StatsPage() - -To format statistics reports:: - - See 'Reporting', above. - -""" - -# ------------------------------- Statistics -------------------------------- # - -import logging -if not hasattr(logging, 'statistics'): - logging.statistics = {} - - -def extrapolate_statistics(scope): - """Return an extrapolated copy of the given scope.""" - c = {} - for k, v in list(scope.items()): - if isinstance(v, dict): - v = extrapolate_statistics(v) - elif isinstance(v, (list, tuple)): - v = [extrapolate_statistics(record) for record in v] - elif hasattr(v, '__call__'): - v = v(scope) - c[k] = v - return c - - -# -------------------- CherryPy Applications Statistics --------------------- # - -import threading -import time - -import cherrypy - -appstats = logging.statistics.setdefault('CherryPy Applications', {}) -appstats.update({ - 'Enabled': True, - 'Bytes Read/Request': lambda s: ( - s['Total Requests'] and - (s['Total Bytes Read'] / float(s['Total Requests'])) or - 0.0 - ), - 'Bytes Read/Second': lambda s: s['Total Bytes Read'] / s['Uptime'](s), - 'Bytes Written/Request': lambda s: ( - s['Total Requests'] and - (s['Total Bytes Written'] / float(s['Total Requests'])) or - 0.0 - ), - 'Bytes Written/Second': lambda s: ( - s['Total Bytes Written'] / s['Uptime'](s) - ), - 'Current Time': lambda s: time.time(), - 'Current Requests': 0, - 'Requests/Second': lambda s: float(s['Total Requests']) / s['Uptime'](s), - 'Server Version': cherrypy.__version__, - 'Start Time': time.time(), - 'Total Bytes Read': 0, - 'Total Bytes Written': 0, - 'Total Requests': 0, - 'Total Time': 0, - 'Uptime': lambda s: time.time() - s['Start Time'], - 'Requests': {}, -}) - -proc_time = lambda s: time.time() - s['Start Time'] - - -class ByteCountWrapper(object): - - """Wraps a file-like object, counting the number of bytes read.""" - - def __init__(self, rfile): - self.rfile = rfile - self.bytes_read = 0 - - def read(self, size=-1): - data = self.rfile.read(size) - self.bytes_read += len(data) - return data - - def readline(self, size=-1): - data = self.rfile.readline(size) - self.bytes_read += len(data) - return data - - def readlines(self, sizehint=0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - - def close(self): - self.rfile.close() - - def __iter__(self): - return self - - def next(self): - data = self.rfile.next() - self.bytes_read += len(data) - return data - - -average_uriset_time = lambda s: s['Count'] and (s['Sum'] / s['Count']) or 0 - - -class StatsTool(cherrypy.Tool): - - """Record various information about the current request.""" - - def __init__(self): - cherrypy.Tool.__init__(self, 'on_end_request', self.record_stop) - - def _setup(self): - """Hook this tool into cherrypy.request. - - The standard CherryPy request object will automatically call this - method when the tool is "turned on" in config. - """ - if appstats.get('Enabled', False): - cherrypy.Tool._setup(self) - self.record_start() - - def record_start(self): - """Record the beginning of a request.""" - request = cherrypy.serving.request - if not hasattr(request.rfile, 'bytes_read'): - request.rfile = ByteCountWrapper(request.rfile) - request.body.fp = request.rfile - - r = request.remote - - appstats['Current Requests'] += 1 - appstats['Total Requests'] += 1 - appstats['Requests'][threading._get_ident()] = { - 'Bytes Read': None, - 'Bytes Written': None, - # Use a lambda so the ip gets updated by tools.proxy later - 'Client': lambda s: '%s:%s' % (r.ip, r.port), - 'End Time': None, - 'Processing Time': proc_time, - 'Request-Line': request.request_line, - 'Response Status': None, - 'Start Time': time.time(), - } - - def record_stop( - self, uriset=None, slow_queries=1.0, slow_queries_count=100, - debug=False, **kwargs): - """Record the end of a request.""" - resp = cherrypy.serving.response - w = appstats['Requests'][threading._get_ident()] - - r = cherrypy.request.rfile.bytes_read - w['Bytes Read'] = r - appstats['Total Bytes Read'] += r - - if resp.stream: - w['Bytes Written'] = 'chunked' - else: - cl = int(resp.headers.get('Content-Length', 0)) - w['Bytes Written'] = cl - appstats['Total Bytes Written'] += cl - - w['Response Status'] = getattr( - resp, 'output_status', None) or resp.status - - w['End Time'] = time.time() - p = w['End Time'] - w['Start Time'] - w['Processing Time'] = p - appstats['Total Time'] += p - - appstats['Current Requests'] -= 1 - - if debug: - cherrypy.log('Stats recorded: %s' % repr(w), 'TOOLS.CPSTATS') - - if uriset: - rs = appstats.setdefault('URI Set Tracking', {}) - r = rs.setdefault(uriset, { - 'Min': None, 'Max': None, 'Count': 0, 'Sum': 0, - 'Avg': average_uriset_time}) - if r['Min'] is None or p < r['Min']: - r['Min'] = p - if r['Max'] is None or p > r['Max']: - r['Max'] = p - r['Count'] += 1 - r['Sum'] += p - - if slow_queries and p > slow_queries: - sq = appstats.setdefault('Slow Queries', []) - sq.append(w.copy()) - if len(sq) > slow_queries_count: - sq.pop(0) - - -import cherrypy -cherrypy.tools.cpstats = StatsTool() - - -# ---------------------- CherryPy Statistics Reporting ---------------------- # - -import os -thisdir = os.path.abspath(os.path.dirname(__file__)) - -try: - import json -except ImportError: - try: - import simplejson as json - except ImportError: - json = None - - -missing = object() - -locale_date = lambda v: time.strftime('%c', time.gmtime(v)) -iso_format = lambda v: time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(v)) - - -def pause_resume(ns): - def _pause_resume(enabled): - pause_disabled = '' - resume_disabled = '' - if enabled: - resume_disabled = 'disabled="disabled" ' - else: - pause_disabled = 'disabled="disabled" ' - return """ -
- - -
-
- - -
- """ % (ns, pause_disabled, ns, resume_disabled) - return _pause_resume - - -class StatsPage(object): - - formatting = { - 'CherryPy Applications': { - 'Enabled': pause_resume('CherryPy Applications'), - 'Bytes Read/Request': '%.3f', - 'Bytes Read/Second': '%.3f', - 'Bytes Written/Request': '%.3f', - 'Bytes Written/Second': '%.3f', - 'Current Time': iso_format, - 'Requests/Second': '%.3f', - 'Start Time': iso_format, - 'Total Time': '%.3f', - 'Uptime': '%.3f', - 'Slow Queries': { - 'End Time': None, - 'Processing Time': '%.3f', - 'Start Time': iso_format, - }, - 'URI Set Tracking': { - 'Avg': '%.3f', - 'Max': '%.3f', - 'Min': '%.3f', - 'Sum': '%.3f', - }, - 'Requests': { - 'Bytes Read': '%s', - 'Bytes Written': '%s', - 'End Time': None, - 'Processing Time': '%.3f', - 'Start Time': None, - }, - }, - 'CherryPy WSGIServer': { - 'Enabled': pause_resume('CherryPy WSGIServer'), - 'Connections/second': '%.3f', - 'Start time': iso_format, - }, - } - - def index(self): - # Transform the raw data into pretty output for HTML - yield """ - - - Statistics - - - -""" - for title, scalars, collections in self.get_namespaces(): - yield """ -

%s

- - - -""" % title - for i, (key, value) in enumerate(scalars): - colnum = i % 3 - if colnum == 0: - yield """ - """ - yield ( - """ - """ % - vars() - ) - if colnum == 2: - yield """ - """ - - if colnum == 0: - yield """ - - - """ - elif colnum == 1: - yield """ - - """ - yield """ - -
%(key)s%(value)s
""" - - for subtitle, headers, subrows in collections: - yield """ -

%s

- - - """ % subtitle - for key in headers: - yield """ - """ % key - yield """ - - - """ - for subrow in subrows: - yield """ - """ - for value in subrow: - yield """ - """ % value - yield """ - """ - yield """ - -
%s
%s
""" - yield """ - - -""" - index.exposed = True - - def get_namespaces(self): - """Yield (title, scalars, collections) for each namespace.""" - s = extrapolate_statistics(logging.statistics) - for title, ns in sorted(s.items()): - scalars = [] - collections = [] - ns_fmt = self.formatting.get(title, {}) - for k, v in sorted(ns.items()): - fmt = ns_fmt.get(k, {}) - if isinstance(v, dict): - headers, subrows = self.get_dict_collection(v, fmt) - collections.append((k, ['ID'] + headers, subrows)) - elif isinstance(v, (list, tuple)): - headers, subrows = self.get_list_collection(v, fmt) - collections.append((k, headers, subrows)) - else: - format = ns_fmt.get(k, missing) - if format is None: - # Don't output this column. - continue - if hasattr(format, '__call__'): - v = format(v) - elif format is not missing: - v = format % v - scalars.append((k, v)) - yield title, scalars, collections - - def get_dict_collection(self, v, formatting): - """Return ([headers], [rows]) for the given collection.""" - # E.g., the 'Requests' dict. - headers = [] - for record in v.itervalues(): - for k3 in record: - format = formatting.get(k3, missing) - if format is None: - # Don't output this column. - continue - if k3 not in headers: - headers.append(k3) - headers.sort() - - subrows = [] - for k2, record in sorted(v.items()): - subrow = [k2] - for k3 in headers: - v3 = record.get(k3, '') - format = formatting.get(k3, missing) - if format is None: - # Don't output this column. - continue - if hasattr(format, '__call__'): - v3 = format(v3) - elif format is not missing: - v3 = format % v3 - subrow.append(v3) - subrows.append(subrow) - - return headers, subrows - - def get_list_collection(self, v, formatting): - """Return ([headers], [subrows]) for the given collection.""" - # E.g., the 'Slow Queries' list. - headers = [] - for record in v: - for k3 in record: - format = formatting.get(k3, missing) - if format is None: - # Don't output this column. - continue - if k3 not in headers: - headers.append(k3) - headers.sort() - - subrows = [] - for record in v: - subrow = [] - for k3 in headers: - v3 = record.get(k3, '') - format = formatting.get(k3, missing) - if format is None: - # Don't output this column. - continue - if hasattr(format, '__call__'): - v3 = format(v3) - elif format is not missing: - v3 = format % v3 - subrow.append(v3) - subrows.append(subrow) - - return headers, subrows - - if json is not None: - def data(self): - s = extrapolate_statistics(logging.statistics) - cherrypy.response.headers['Content-Type'] = 'application/json' - return json.dumps(s, sort_keys=True, indent=4) - data.exposed = True - - def pause(self, namespace): - logging.statistics.get(namespace, {})['Enabled'] = False - raise cherrypy.HTTPRedirect('./') - pause.exposed = True - pause.cp_config = {'tools.allow.on': True, - 'tools.allow.methods': ['POST']} - - def resume(self, namespace): - logging.statistics.get(namespace, {})['Enabled'] = True - raise cherrypy.HTTPRedirect('./') - resume.exposed = True - resume.cp_config = {'tools.allow.on': True, - 'tools.allow.methods': ['POST']} diff --git a/lib/cherrypy/lib/cptools.py b/lib/cherrypy/lib/cptools.py deleted file mode 100644 index f376282c..00000000 --- a/lib/cherrypy/lib/cptools.py +++ /dev/null @@ -1,630 +0,0 @@ -"""Functions for builtin CherryPy tools.""" - -import logging -import re - -import cherrypy -from cherrypy._cpcompat import basestring, md5, set, unicodestr -from cherrypy.lib import httputil as _httputil -from cherrypy.lib import is_iterator - - -# Conditional HTTP request support # - -def validate_etags(autotags=False, debug=False): - """Validate the current ETag against If-Match, If-None-Match headers. - - If autotags is True, an ETag response-header value will be provided - from an MD5 hash of the response body (unless some other code has - already provided an ETag header). If False (the default), the ETag - will not be automatic. - - WARNING: the autotags feature is not designed for URL's which allow - methods other than GET. For example, if a POST to the same URL returns - no content, the automatic ETag will be incorrect, breaking a fundamental - use for entity tags in a possibly destructive fashion. Likewise, if you - raise 304 Not Modified, the response body will be empty, the ETag hash - will be incorrect, and your application will break. - See :rfc:`2616` Section 14.24. - """ - response = cherrypy.serving.response - - # Guard against being run twice. - if hasattr(response, "ETag"): - return - - status, reason, msg = _httputil.valid_status(response.status) - - etag = response.headers.get('ETag') - - # Automatic ETag generation. See warning in docstring. - if etag: - if debug: - cherrypy.log('ETag already set: %s' % etag, 'TOOLS.ETAGS') - elif not autotags: - if debug: - cherrypy.log('Autotags off', 'TOOLS.ETAGS') - elif status != 200: - if debug: - cherrypy.log('Status not 200', 'TOOLS.ETAGS') - else: - etag = response.collapse_body() - etag = '"%s"' % md5(etag).hexdigest() - if debug: - cherrypy.log('Setting ETag: %s' % etag, 'TOOLS.ETAGS') - response.headers['ETag'] = etag - - response.ETag = etag - - # "If the request would, without the If-Match header field, result in - # anything other than a 2xx or 412 status, then the If-Match header - # MUST be ignored." - if debug: - cherrypy.log('Status: %s' % status, 'TOOLS.ETAGS') - if status >= 200 and status <= 299: - request = cherrypy.serving.request - - conditions = request.headers.elements('If-Match') or [] - conditions = [str(x) for x in conditions] - if debug: - cherrypy.log('If-Match conditions: %s' % repr(conditions), - 'TOOLS.ETAGS') - if conditions and not (conditions == ["*"] or etag in conditions): - raise cherrypy.HTTPError(412, "If-Match failed: ETag %r did " - "not match %r" % (etag, conditions)) - - conditions = request.headers.elements('If-None-Match') or [] - conditions = [str(x) for x in conditions] - if debug: - cherrypy.log('If-None-Match conditions: %s' % repr(conditions), - 'TOOLS.ETAGS') - if conditions == ["*"] or etag in conditions: - if debug: - cherrypy.log('request.method: %s' % - request.method, 'TOOLS.ETAGS') - if request.method in ("GET", "HEAD"): - raise cherrypy.HTTPRedirect([], 304) - else: - raise cherrypy.HTTPError(412, "If-None-Match failed: ETag %r " - "matched %r" % (etag, conditions)) - - -def validate_since(): - """Validate the current Last-Modified against If-Modified-Since headers. - - If no code has set the Last-Modified response header, then no validation - will be performed. - """ - response = cherrypy.serving.response - lastmod = response.headers.get('Last-Modified') - if lastmod: - status, reason, msg = _httputil.valid_status(response.status) - - request = cherrypy.serving.request - - since = request.headers.get('If-Unmodified-Since') - if since and since != lastmod: - if (status >= 200 and status <= 299) or status == 412: - raise cherrypy.HTTPError(412) - - since = request.headers.get('If-Modified-Since') - if since and since == lastmod: - if (status >= 200 and status <= 299) or status == 304: - if request.method in ("GET", "HEAD"): - raise cherrypy.HTTPRedirect([], 304) - else: - raise cherrypy.HTTPError(412) - - -# Tool code # - -def allow(methods=None, debug=False): - """Raise 405 if request.method not in methods (default ['GET', 'HEAD']). - - The given methods are case-insensitive, and may be in any order. - If only one method is allowed, you may supply a single string; - if more than one, supply a list of strings. - - Regardless of whether the current method is allowed or not, this - also emits an 'Allow' response header, containing the given methods. - """ - if not isinstance(methods, (tuple, list)): - methods = [methods] - methods = [m.upper() for m in methods if m] - if not methods: - methods = ['GET', 'HEAD'] - elif 'GET' in methods and 'HEAD' not in methods: - methods.append('HEAD') - - cherrypy.response.headers['Allow'] = ', '.join(methods) - if cherrypy.request.method not in methods: - if debug: - cherrypy.log('request.method %r not in methods %r' % - (cherrypy.request.method, methods), 'TOOLS.ALLOW') - raise cherrypy.HTTPError(405) - else: - if debug: - cherrypy.log('request.method %r in methods %r' % - (cherrypy.request.method, methods), 'TOOLS.ALLOW') - - -def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', - scheme='X-Forwarded-Proto', debug=False): - """Change the base URL (scheme://host[:port][/path]). - - For running a CP server behind Apache, lighttpd, or other HTTP server. - - For Apache and lighttpd, you should leave the 'local' argument at the - default value of 'X-Forwarded-Host'. For Squid, you probably want to set - tools.proxy.local = 'Origin'. - - If you want the new request.base to include path info (not just the host), - you must explicitly set base to the full base path, and ALSO set 'local' - to '', so that the X-Forwarded-Host request header (which never includes - path info) does not override it. Regardless, the value for 'base' MUST - NOT end in a slash. - - cherrypy.request.remote.ip (the IP address of the client) will be - rewritten if the header specified by the 'remote' arg is valid. - By default, 'remote' is set to 'X-Forwarded-For'. If you do not - want to rewrite remote.ip, set the 'remote' arg to an empty string. - """ - - request = cherrypy.serving.request - - if scheme: - s = request.headers.get(scheme, None) - if debug: - cherrypy.log('Testing scheme %r:%r' % (scheme, s), 'TOOLS.PROXY') - if s == 'on' and 'ssl' in scheme.lower(): - # This handles e.g. webfaction's 'X-Forwarded-Ssl: on' header - scheme = 'https' - else: - # This is for lighttpd/pound/Mongrel's 'X-Forwarded-Proto: https' - scheme = s - if not scheme: - scheme = request.base[:request.base.find("://")] - - if local: - lbase = request.headers.get(local, None) - if debug: - cherrypy.log('Testing local %r:%r' % (local, lbase), 'TOOLS.PROXY') - if lbase is not None: - base = lbase.split(',')[0] - if not base: - port = request.local.port - if port == 80: - base = '127.0.0.1' - else: - base = '127.0.0.1:%s' % port - - if base.find("://") == -1: - # add http:// or https:// if needed - base = scheme + "://" + base - - request.base = base - - if remote: - xff = request.headers.get(remote) - if debug: - cherrypy.log('Testing remote %r:%r' % (remote, xff), 'TOOLS.PROXY') - if xff: - if remote == 'X-Forwarded-For': - #Bug #1268 - xff = xff.split(',')[0].strip() - request.remote.ip = xff - - -def ignore_headers(headers=('Range',), debug=False): - """Delete request headers whose field names are included in 'headers'. - - This is a useful tool for working behind certain HTTP servers; - for example, Apache duplicates the work that CP does for 'Range' - headers, and will doubly-truncate the response. - """ - request = cherrypy.serving.request - for name in headers: - if name in request.headers: - if debug: - cherrypy.log('Ignoring request header %r' % name, - 'TOOLS.IGNORE_HEADERS') - del request.headers[name] - - -def response_headers(headers=None, debug=False): - """Set headers on the response.""" - if debug: - cherrypy.log('Setting response headers: %s' % repr(headers), - 'TOOLS.RESPONSE_HEADERS') - for name, value in (headers or []): - cherrypy.serving.response.headers[name] = value -response_headers.failsafe = True - - -def referer(pattern, accept=True, accept_missing=False, error=403, - message='Forbidden Referer header.', debug=False): - """Raise HTTPError if Referer header does/does not match the given pattern. - - pattern - A regular expression pattern to test against the Referer. - - accept - If True, the Referer must match the pattern; if False, - the Referer must NOT match the pattern. - - accept_missing - If True, permit requests with no Referer header. - - error - The HTTP error code to return to the client on failure. - - message - A string to include in the response body on failure. - - """ - try: - ref = cherrypy.serving.request.headers['Referer'] - match = bool(re.match(pattern, ref)) - if debug: - cherrypy.log('Referer %r matches %r' % (ref, pattern), - 'TOOLS.REFERER') - if accept == match: - return - except KeyError: - if debug: - cherrypy.log('No Referer header', 'TOOLS.REFERER') - if accept_missing: - return - - raise cherrypy.HTTPError(error, message) - - -class SessionAuth(object): - - """Assert that the user is logged in.""" - - session_key = "username" - debug = False - - def check_username_and_password(self, username, password): - pass - - def anonymous(self): - """Provide a temporary user name for anonymous users.""" - pass - - def on_login(self, username): - pass - - def on_logout(self, username): - pass - - def on_check(self, username): - pass - - def login_screen(self, from_page='..', username='', error_msg='', - **kwargs): - return (unicodestr(""" -Message: %(error_msg)s -
- Login: -
- Password: -
- -
- -
-""") % vars()).encode("utf-8") - - def do_login(self, username, password, from_page='..', **kwargs): - """Login. May raise redirect, or return True if request handled.""" - response = cherrypy.serving.response - error_msg = self.check_username_and_password(username, password) - if error_msg: - body = self.login_screen(from_page, username, error_msg) - response.body = body - if "Content-Length" in response.headers: - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - return True - else: - cherrypy.serving.request.login = username - cherrypy.session[self.session_key] = username - self.on_login(username) - raise cherrypy.HTTPRedirect(from_page or "/") - - def do_logout(self, from_page='..', **kwargs): - """Logout. May raise redirect, or return True if request handled.""" - sess = cherrypy.session - username = sess.get(self.session_key) - sess[self.session_key] = None - if username: - cherrypy.serving.request.login = None - self.on_logout(username) - raise cherrypy.HTTPRedirect(from_page) - - def do_check(self): - """Assert username. Raise redirect, or return True if request handled. - """ - sess = cherrypy.session - request = cherrypy.serving.request - response = cherrypy.serving.response - - username = sess.get(self.session_key) - if not username: - sess[self.session_key] = username = self.anonymous() - self._debug_message('No session[username], trying anonymous') - if not username: - url = cherrypy.url(qs=request.query_string) - self._debug_message( - 'No username, routing to login_screen with from_page %(url)r', - locals(), - ) - response.body = self.login_screen(url) - if "Content-Length" in response.headers: - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - return True - self._debug_message('Setting request.login to %(username)r', locals()) - request.login = username - self.on_check(username) - - def _debug_message(self, template, context={}): - if not self.debug: - return - cherrypy.log(template % context, 'TOOLS.SESSAUTH') - - def run(self): - request = cherrypy.serving.request - response = cherrypy.serving.response - - path = request.path_info - if path.endswith('login_screen'): - self._debug_message('routing %(path)r to login_screen', locals()) - response.body = self.login_screen() - return True - elif path.endswith('do_login'): - if request.method != 'POST': - response.headers['Allow'] = "POST" - self._debug_message('do_login requires POST') - raise cherrypy.HTTPError(405) - self._debug_message('routing %(path)r to do_login', locals()) - return self.do_login(**request.params) - elif path.endswith('do_logout'): - if request.method != 'POST': - response.headers['Allow'] = "POST" - raise cherrypy.HTTPError(405) - self._debug_message('routing %(path)r to do_logout', locals()) - return self.do_logout(**request.params) - else: - self._debug_message('No special path, running do_check') - return self.do_check() - - -def session_auth(**kwargs): - sa = SessionAuth() - for k, v in kwargs.items(): - setattr(sa, k, v) - return sa.run() -session_auth.__doc__ = """Session authentication hook. - -Any attribute of the SessionAuth class may be overridden via a keyword arg -to this function: - -""" + "\n".join(["%s: %s" % (k, type(getattr(SessionAuth, k)).__name__) - for k in dir(SessionAuth) if not k.startswith("__")]) - - -def log_traceback(severity=logging.ERROR, debug=False): - """Write the last error's traceback to the cherrypy error log.""" - cherrypy.log("", "HTTP", severity=severity, traceback=True) - - -def log_request_headers(debug=False): - """Write request headers to the cherrypy error log.""" - h = [" %s: %s" % (k, v) for k, v in cherrypy.serving.request.header_list] - cherrypy.log('\nRequest Headers:\n' + '\n'.join(h), "HTTP") - - -def log_hooks(debug=False): - """Write request.hooks to the cherrypy error log.""" - request = cherrypy.serving.request - - msg = [] - # Sort by the standard points if possible. - from cherrypy import _cprequest - points = _cprequest.hookpoints - for k in request.hooks.keys(): - if k not in points: - points.append(k) - - for k in points: - msg.append(" %s:" % k) - v = request.hooks.get(k, []) - v.sort() - for h in v: - msg.append(" %r" % h) - cherrypy.log('\nRequest Hooks for ' + cherrypy.url() + - ':\n' + '\n'.join(msg), "HTTP") - - -def redirect(url='', internal=True, debug=False): - """Raise InternalRedirect or HTTPRedirect to the given url.""" - if debug: - cherrypy.log('Redirecting %sto: %s' % - ({True: 'internal ', False: ''}[internal], url), - 'TOOLS.REDIRECT') - if internal: - raise cherrypy.InternalRedirect(url) - else: - raise cherrypy.HTTPRedirect(url) - - -def trailing_slash(missing=True, extra=False, status=None, debug=False): - """Redirect if path_info has (missing|extra) trailing slash.""" - request = cherrypy.serving.request - pi = request.path_info - - if debug: - cherrypy.log('is_index: %r, missing: %r, extra: %r, path_info: %r' % - (request.is_index, missing, extra, pi), - 'TOOLS.TRAILING_SLASH') - if request.is_index is True: - if missing: - if not pi.endswith('/'): - new_url = cherrypy.url(pi + '/', request.query_string) - raise cherrypy.HTTPRedirect(new_url, status=status or 301) - elif request.is_index is False: - if extra: - # If pi == '/', don't redirect to ''! - if pi.endswith('/') and pi != '/': - new_url = cherrypy.url(pi[:-1], request.query_string) - raise cherrypy.HTTPRedirect(new_url, status=status or 301) - - -def flatten(debug=False): - """Wrap response.body in a generator that recursively iterates over body. - - This allows cherrypy.response.body to consist of 'nested generators'; - that is, a set of generators that yield generators. - """ - def flattener(input): - numchunks = 0 - for x in input: - if not is_iterator(x): - numchunks += 1 - yield x - else: - for y in flattener(x): - numchunks += 1 - yield y - if debug: - cherrypy.log('Flattened %d chunks' % numchunks, 'TOOLS.FLATTEN') - response = cherrypy.serving.response - response.body = flattener(response.body) - - -def accept(media=None, debug=False): - """Return the client's preferred media-type (from the given Content-Types). - - If 'media' is None (the default), no test will be performed. - - If 'media' is provided, it should be the Content-Type value (as a string) - or values (as a list or tuple of strings) which the current resource - can emit. The client's acceptable media ranges (as declared in the - Accept request header) will be matched in order to these Content-Type - values; the first such string is returned. That is, the return value - will always be one of the strings provided in the 'media' arg (or None - if 'media' is None). - - If no match is found, then HTTPError 406 (Not Acceptable) is raised. - Note that most web browsers send */* as a (low-quality) acceptable - media range, which should match any Content-Type. In addition, "...if - no Accept header field is present, then it is assumed that the client - accepts all media types." - - Matching types are checked in order of client preference first, - and then in the order of the given 'media' values. - - Note that this function does not honor accept-params (other than "q"). - """ - if not media: - return - if isinstance(media, basestring): - media = [media] - request = cherrypy.serving.request - - # Parse the Accept request header, and try to match one - # of the requested media-ranges (in order of preference). - ranges = request.headers.elements('Accept') - if not ranges: - # Any media type is acceptable. - if debug: - cherrypy.log('No Accept header elements', 'TOOLS.ACCEPT') - return media[0] - else: - # Note that 'ranges' is sorted in order of preference - for element in ranges: - if element.qvalue > 0: - if element.value == "*/*": - # Matches any type or subtype - if debug: - cherrypy.log('Match due to */*', 'TOOLS.ACCEPT') - return media[0] - elif element.value.endswith("/*"): - # Matches any subtype - mtype = element.value[:-1] # Keep the slash - for m in media: - if m.startswith(mtype): - if debug: - cherrypy.log('Match due to %s' % element.value, - 'TOOLS.ACCEPT') - return m - else: - # Matches exact value - if element.value in media: - if debug: - cherrypy.log('Match due to %s' % element.value, - 'TOOLS.ACCEPT') - return element.value - - # No suitable media-range found. - ah = request.headers.get('Accept') - if ah is None: - msg = "Your client did not send an Accept header." - else: - msg = "Your client sent this Accept header: %s." % ah - msg += (" But this resource only emits these media types: %s." % - ", ".join(media)) - raise cherrypy.HTTPError(406, msg) - - -class MonitoredHeaderMap(_httputil.HeaderMap): - - def __init__(self): - self.accessed_headers = set() - - def __getitem__(self, key): - self.accessed_headers.add(key) - return _httputil.HeaderMap.__getitem__(self, key) - - def __contains__(self, key): - self.accessed_headers.add(key) - return _httputil.HeaderMap.__contains__(self, key) - - def get(self, key, default=None): - self.accessed_headers.add(key) - return _httputil.HeaderMap.get(self, key, default=default) - - if hasattr({}, 'has_key'): - # Python 2 - def has_key(self, key): - self.accessed_headers.add(key) - return _httputil.HeaderMap.has_key(self, key) - - -def autovary(ignore=None, debug=False): - """Auto-populate the Vary response header based on request.header access. - """ - request = cherrypy.serving.request - - req_h = request.headers - request.headers = MonitoredHeaderMap() - request.headers.update(req_h) - if ignore is None: - ignore = set(['Content-Disposition', 'Content-Length', 'Content-Type']) - - def set_response_header(): - resp_h = cherrypy.serving.response.headers - v = set([e.value for e in resp_h.elements('Vary')]) - if debug: - cherrypy.log( - 'Accessed headers: %s' % request.headers.accessed_headers, - 'TOOLS.AUTOVARY') - v = v.union(request.headers.accessed_headers) - v = v.difference(ignore) - v = list(v) - v.sort() - resp_h['Vary'] = ', '.join(v) - request.hooks.attach('before_finalize', set_response_header, 95) diff --git a/lib/cherrypy/lib/encoding.py b/lib/cherrypy/lib/encoding.py deleted file mode 100644 index a4c2cbd6..00000000 --- a/lib/cherrypy/lib/encoding.py +++ /dev/null @@ -1,421 +0,0 @@ -import struct -import time - -import cherrypy -from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr -from cherrypy.lib import file_generator -from cherrypy.lib import is_closable_iterator -from cherrypy.lib import set_vary_header - - -def decode(encoding=None, default_encoding='utf-8'): - """Replace or extend the list of charsets used to decode a request entity. - - Either argument may be a single string or a list of strings. - - encoding - If not None, restricts the set of charsets attempted while decoding - a request entity to the given set (even if a different charset is - given in the Content-Type request header). - - default_encoding - Only in effect if the 'encoding' argument is not given. - If given, the set of charsets attempted while decoding a request - entity is *extended* with the given value(s). - - """ - body = cherrypy.request.body - if encoding is not None: - if not isinstance(encoding, list): - encoding = [encoding] - body.attempt_charsets = encoding - elif default_encoding: - if not isinstance(default_encoding, list): - default_encoding = [default_encoding] - body.attempt_charsets = body.attempt_charsets + default_encoding - -class UTF8StreamEncoder: - def __init__(self, iterator): - self._iterator = iterator - - def __iter__(self): - return self - - def next(self): - return self.__next__() - - def __next__(self): - res = next(self._iterator) - if isinstance(res, unicodestr): - res = res.encode('utf-8') - return res - - def close(self): - if is_closable_iterator(self._iterator): - self._iterator.close() - - def __getattr__(self, attr): - if attr.startswith('__'): - raise AttributeError(self, attr) - return getattr(self._iterator, attr) - - -class ResponseEncoder: - - default_encoding = 'utf-8' - failmsg = "Response body could not be encoded with %r." - encoding = None - errors = 'strict' - text_only = True - add_charset = True - debug = False - - def __init__(self, **kwargs): - for k, v in kwargs.items(): - setattr(self, k, v) - - self.attempted_charsets = set() - request = cherrypy.serving.request - if request.handler is not None: - # Replace request.handler with self - if self.debug: - cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') - self.oldhandler = request.handler - request.handler = self - - def encode_stream(self, encoding): - """Encode a streaming response body. - - Use a generator wrapper, and just pray it works as the stream is - being written out. - """ - if encoding in self.attempted_charsets: - return False - self.attempted_charsets.add(encoding) - - def encoder(body): - for chunk in body: - if isinstance(chunk, unicodestr): - chunk = chunk.encode(encoding, self.errors) - yield chunk - self.body = encoder(self.body) - return True - - def encode_string(self, encoding): - """Encode a buffered response body.""" - if encoding in self.attempted_charsets: - return False - self.attempted_charsets.add(encoding) - body = [] - for chunk in self.body: - if isinstance(chunk, unicodestr): - try: - chunk = chunk.encode(encoding, self.errors) - except (LookupError, UnicodeError): - return False - body.append(chunk) - self.body = body - return True - - def find_acceptable_charset(self): - request = cherrypy.serving.request - response = cherrypy.serving.response - - if self.debug: - cherrypy.log('response.stream %r' % - response.stream, 'TOOLS.ENCODE') - if response.stream: - encoder = self.encode_stream - else: - encoder = self.encode_string - if "Content-Length" in response.headers: - # Delete Content-Length header so finalize() recalcs it. - # Encoded strings may be of different lengths from their - # unicode equivalents, and even from each other. For example: - # >>> t = u"\u7007\u3040" - # >>> len(t) - # 2 - # >>> len(t.encode("UTF-8")) - # 6 - # >>> len(t.encode("utf7")) - # 8 - del response.headers["Content-Length"] - - # Parse the Accept-Charset request header, and try to provide one - # of the requested charsets (in order of user preference). - encs = request.headers.elements('Accept-Charset') - charsets = [enc.value.lower() for enc in encs] - if self.debug: - cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') - - if self.encoding is not None: - # If specified, force this encoding to be used, or fail. - encoding = self.encoding.lower() - if self.debug: - cherrypy.log('Specified encoding %r' % - encoding, 'TOOLS.ENCODE') - if (not charsets) or "*" in charsets or encoding in charsets: - if self.debug: - cherrypy.log('Attempting encoding %r' % - encoding, 'TOOLS.ENCODE') - if encoder(encoding): - return encoding - else: - if not encs: - if self.debug: - cherrypy.log('Attempting default encoding %r' % - self.default_encoding, 'TOOLS.ENCODE') - # Any character-set is acceptable. - if encoder(self.default_encoding): - return self.default_encoding - else: - raise cherrypy.HTTPError(500, self.failmsg % - self.default_encoding) - else: - for element in encs: - if element.qvalue > 0: - if element.value == "*": - # Matches any charset. Try our default. - if self.debug: - cherrypy.log('Attempting default encoding due ' - 'to %r' % element, 'TOOLS.ENCODE') - if encoder(self.default_encoding): - return self.default_encoding - else: - encoding = element.value - if self.debug: - cherrypy.log('Attempting encoding %s (qvalue >' - '0)' % element, 'TOOLS.ENCODE') - if encoder(encoding): - return encoding - - if "*" not in charsets: - # If no "*" is present in an Accept-Charset field, then all - # character sets not explicitly mentioned get a quality - # value of 0, except for ISO-8859-1, which gets a quality - # value of 1 if not explicitly mentioned. - iso = 'iso-8859-1' - if iso not in charsets: - if self.debug: - cherrypy.log('Attempting ISO-8859-1 encoding', - 'TOOLS.ENCODE') - if encoder(iso): - return iso - - # No suitable encoding found. - ac = request.headers.get('Accept-Charset') - if ac is None: - msg = "Your client did not send an Accept-Charset header." - else: - msg = "Your client sent this Accept-Charset header: %s." % ac - _charsets = ", ".join(sorted(self.attempted_charsets)) - msg += " We tried these charsets: %s." % (_charsets,) - raise cherrypy.HTTPError(406, msg) - - def __call__(self, *args, **kwargs): - response = cherrypy.serving.response - self.body = self.oldhandler(*args, **kwargs) - - if isinstance(self.body, basestring): - # strings get wrapped in a list because iterating over a single - # item list is much faster than iterating over every character - # in a long string. - if self.body: - self.body = [self.body] - else: - # [''] doesn't evaluate to False, so replace it with []. - self.body = [] - elif hasattr(self.body, 'read'): - self.body = file_generator(self.body) - elif self.body is None: - self.body = [] - - ct = response.headers.elements("Content-Type") - if self.debug: - cherrypy.log('Content-Type: %r' % [str(h) - for h in ct], 'TOOLS.ENCODE') - if ct and self.add_charset: - ct = ct[0] - if self.text_only: - if ct.value.lower().startswith("text/"): - if self.debug: - cherrypy.log( - 'Content-Type %s starts with "text/"' % ct, - 'TOOLS.ENCODE') - do_find = True - else: - if self.debug: - cherrypy.log('Not finding because Content-Type %s ' - 'does not start with "text/"' % ct, - 'TOOLS.ENCODE') - do_find = False - else: - if self.debug: - cherrypy.log('Finding because not text_only', - 'TOOLS.ENCODE') - do_find = True - - if do_find: - # Set "charset=..." param on response Content-Type header - ct.params['charset'] = self.find_acceptable_charset() - if self.debug: - cherrypy.log('Setting Content-Type %s' % ct, - 'TOOLS.ENCODE') - response.headers["Content-Type"] = str(ct) - - return self.body - -# GZIP - - -def compress(body, compress_level): - """Compress 'body' at the given compress_level.""" - import zlib - - # See http://www.gzip.org/zlib/rfc-gzip.html - yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker - yield ntob('\x08') # CM: compression method - yield ntob('\x00') # FLG: none set - # MTIME: 4 bytes - yield struct.pack(" 0 is present - * The 'identity' value is given with a qvalue > 0. - - """ - request = cherrypy.serving.request - response = cherrypy.serving.response - - set_vary_header(response, "Accept-Encoding") - - if not response.body: - # Response body is empty (might be a 304 for instance) - if debug: - cherrypy.log('No response body', context='TOOLS.GZIP') - return - - # If returning cached content (which should already have been gzipped), - # don't re-zip. - if getattr(request, "cached", False): - if debug: - cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') - return - - acceptable = request.headers.elements('Accept-Encoding') - if not acceptable: - # If no Accept-Encoding field is present in a request, - # the server MAY assume that the client will accept any - # content coding. In this case, if "identity" is one of - # the available content-codings, then the server SHOULD use - # the "identity" content-coding, unless it has additional - # information that a different content-coding is meaningful - # to the client. - if debug: - cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') - return - - ct = response.headers.get('Content-Type', '').split(';')[0] - for coding in acceptable: - if coding.value == 'identity' and coding.qvalue != 0: - if debug: - cherrypy.log('Non-zero identity qvalue: %s' % coding, - context='TOOLS.GZIP') - return - if coding.value in ('gzip', 'x-gzip'): - if coding.qvalue == 0: - if debug: - cherrypy.log('Zero gzip qvalue: %s' % coding, - context='TOOLS.GZIP') - return - - if ct not in mime_types: - # If the list of provided mime-types contains tokens - # such as 'text/*' or 'application/*+xml', - # we go through them and find the most appropriate one - # based on the given content-type. - # The pattern matching is only caring about the most - # common cases, as stated above, and doesn't support - # for extra parameters. - found = False - if '/' in ct: - ct_media_type, ct_sub_type = ct.split('/') - for mime_type in mime_types: - if '/' in mime_type: - media_type, sub_type = mime_type.split('/') - if ct_media_type == media_type: - if sub_type == '*': - found = True - break - elif '+' in sub_type and '+' in ct_sub_type: - ct_left, ct_right = ct_sub_type.split('+') - left, right = sub_type.split('+') - if left == '*' and ct_right == right: - found = True - break - - if not found: - if debug: - cherrypy.log('Content-Type %s not in mime_types %r' % - (ct, mime_types), context='TOOLS.GZIP') - return - - if debug: - cherrypy.log('Gzipping', context='TOOLS.GZIP') - # Return a generator that compresses the page - response.headers['Content-Encoding'] = 'gzip' - response.body = compress(response.body, compress_level) - if "Content-Length" in response.headers: - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - - return - - if debug: - cherrypy.log('No acceptable encoding found.', context='GZIP') - cherrypy.HTTPError(406, "identity, gzip").set_response() diff --git a/lib/cherrypy/lib/gctools.py b/lib/cherrypy/lib/gctools.py deleted file mode 100644 index 4b616c59..00000000 --- a/lib/cherrypy/lib/gctools.py +++ /dev/null @@ -1,217 +0,0 @@ -import gc -import inspect -import os -import sys -import time - -try: - import objgraph -except ImportError: - objgraph = None - -import cherrypy -from cherrypy import _cprequest, _cpwsgi -from cherrypy.process.plugins import SimplePlugin - - -class ReferrerTree(object): - - """An object which gathers all referrers of an object to a given depth.""" - - peek_length = 40 - - def __init__(self, ignore=None, maxdepth=2, maxparents=10): - self.ignore = ignore or [] - self.ignore.append(inspect.currentframe().f_back) - self.maxdepth = maxdepth - self.maxparents = maxparents - - def ascend(self, obj, depth=1): - """Return a nested list containing referrers of the given object.""" - depth += 1 - parents = [] - - # Gather all referrers in one step to minimize - # cascading references due to repr() logic. - refs = gc.get_referrers(obj) - self.ignore.append(refs) - if len(refs) > self.maxparents: - return [("[%s referrers]" % len(refs), [])] - - try: - ascendcode = self.ascend.__code__ - except AttributeError: - ascendcode = self.ascend.im_func.func_code - for parent in refs: - if inspect.isframe(parent) and parent.f_code is ascendcode: - continue - if parent in self.ignore: - continue - if depth <= self.maxdepth: - parents.append((parent, self.ascend(parent, depth))) - else: - parents.append((parent, [])) - - return parents - - def peek(self, s): - """Return s, restricted to a sane length.""" - if len(s) > (self.peek_length + 3): - half = self.peek_length // 2 - return s[:half] + '...' + s[-half:] - else: - return s - - def _format(self, obj, descend=True): - """Return a string representation of a single object.""" - if inspect.isframe(obj): - filename, lineno, func, context, index = inspect.getframeinfo(obj) - return "" % func - - if not descend: - return self.peek(repr(obj)) - - if isinstance(obj, dict): - return "{" + ", ".join(["%s: %s" % (self._format(k, descend=False), - self._format(v, descend=False)) - for k, v in obj.items()]) + "}" - elif isinstance(obj, list): - return "[" + ", ".join([self._format(item, descend=False) - for item in obj]) + "]" - elif isinstance(obj, tuple): - return "(" + ", ".join([self._format(item, descend=False) - for item in obj]) + ")" - - r = self.peek(repr(obj)) - if isinstance(obj, (str, int, float)): - return r - return "%s: %s" % (type(obj), r) - - def format(self, tree): - """Return a list of string reprs from a nested list of referrers.""" - output = [] - - def ascend(branch, depth=1): - for parent, grandparents in branch: - output.append((" " * depth) + self._format(parent)) - if grandparents: - ascend(grandparents, depth + 1) - ascend(tree) - return output - - -def get_instances(cls): - return [x for x in gc.get_objects() if isinstance(x, cls)] - - -class RequestCounter(SimplePlugin): - - def start(self): - self.count = 0 - - def before_request(self): - self.count += 1 - - def after_request(self): - self.count -= 1 -request_counter = RequestCounter(cherrypy.engine) -request_counter.subscribe() - - -def get_context(obj): - if isinstance(obj, _cprequest.Request): - return "path=%s;stage=%s" % (obj.path_info, obj.stage) - elif isinstance(obj, _cprequest.Response): - return "status=%s" % obj.status - elif isinstance(obj, _cpwsgi.AppResponse): - return "PATH_INFO=%s" % obj.environ.get('PATH_INFO', '') - elif hasattr(obj, "tb_lineno"): - return "tb_lineno=%s" % obj.tb_lineno - return "" - - -class GCRoot(object): - - """A CherryPy page handler for testing reference leaks.""" - - classes = [ - (_cprequest.Request, 2, 2, - "Should be 1 in this request thread and 1 in the main thread."), - (_cprequest.Response, 2, 2, - "Should be 1 in this request thread and 1 in the main thread."), - (_cpwsgi.AppResponse, 1, 1, - "Should be 1 in this request thread only."), - ] - - def index(self): - return "Hello, world!" - index.exposed = True - - def stats(self): - output = ["Statistics:"] - - for trial in range(10): - if request_counter.count > 0: - break - time.sleep(0.5) - else: - output.append("\nNot all requests closed properly.") - - # gc_collect isn't perfectly synchronous, because it may - # break reference cycles that then take time to fully - # finalize. Call it thrice and hope for the best. - gc.collect() - gc.collect() - unreachable = gc.collect() - if unreachable: - if objgraph is not None: - final = objgraph.by_type('Nondestructible') - if final: - objgraph.show_backrefs(final, filename='finalizers.png') - - trash = {} - for x in gc.garbage: - trash[type(x)] = trash.get(type(x), 0) + 1 - if trash: - output.insert(0, "\n%s unreachable objects:" % unreachable) - trash = [(v, k) for k, v in trash.items()] - trash.sort() - for pair in trash: - output.append(" " + repr(pair)) - - # Check declared classes to verify uncollected instances. - # These don't have to be part of a cycle; they can be - # any objects that have unanticipated referrers that keep - # them from being collected. - allobjs = {} - for cls, minobj, maxobj, msg in self.classes: - allobjs[cls] = get_instances(cls) - - for cls, minobj, maxobj, msg in self.classes: - objs = allobjs[cls] - lenobj = len(objs) - if lenobj < minobj or lenobj > maxobj: - if minobj == maxobj: - output.append( - "\nExpected %s %r references, got %s." % - (minobj, cls, lenobj)) - else: - output.append( - "\nExpected %s to %s %r references, got %s." % - (minobj, maxobj, cls, lenobj)) - - for obj in objs: - if objgraph is not None: - ig = [id(objs), id(inspect.currentframe())] - fname = "graph_%s_%s.png" % (cls.__name__, id(obj)) - objgraph.show_backrefs( - obj, extra_ignore=ig, max_depth=4, too_many=20, - filename=fname, extra_info=get_context) - output.append("\nReferrers for %s (refcount=%s):" % - (repr(obj), sys.getrefcount(obj))) - t = ReferrerTree(ignore=[objs], maxdepth=3) - tree = t.ascend(obj) - output.extend(t.format(tree)) - - return "\n".join(output) - stats.exposed = True diff --git a/lib/cherrypy/lib/http.py b/lib/cherrypy/lib/http.py deleted file mode 100644 index 12043ad1..00000000 --- a/lib/cherrypy/lib/http.py +++ /dev/null @@ -1,6 +0,0 @@ -import warnings -warnings.warn('cherrypy.lib.http has been deprecated and will be removed ' - 'in CherryPy 3.3 use cherrypy.lib.httputil instead.', - DeprecationWarning) - -from cherrypy.lib.httputil import * diff --git a/lib/cherrypy/lib/httpauth.py b/lib/cherrypy/lib/httpauth.py deleted file mode 100644 index 0897ea2a..00000000 --- a/lib/cherrypy/lib/httpauth.py +++ /dev/null @@ -1,371 +0,0 @@ -""" -This module defines functions to implement HTTP Digest Authentication -(:rfc:`2617`). -This has full compliance with 'Digest' and 'Basic' authentication methods. In -'Digest' it supports both MD5 and MD5-sess algorithms. - -Usage: - First use 'doAuth' to request the client authentication for a - certain resource. You should send an httplib.UNAUTHORIZED response to the - client so he knows he has to authenticate itself. - - Then use 'parseAuthorization' to retrieve the 'auth_map' used in - 'checkResponse'. - - To use 'checkResponse' you must have already verified the password - associated with the 'username' key in 'auth_map' dict. Then you use the - 'checkResponse' function to verify if the password matches the one sent - by the client. - -SUPPORTED_ALGORITHM - list of supported 'Digest' algorithms -SUPPORTED_QOP - list of supported 'Digest' 'qop'. -""" -__version__ = 1, 0, 1 -__author__ = "Tiago Cogumbreiro " -__credits__ = """ - Peter van Kampen for its recipe which implement most of Digest - authentication: - http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302378 -""" - -__license__ = """ -Copyright (c) 2005, Tiago Cogumbreiro -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Sylvain Hellegouarch nor the names of his - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" - -__all__ = ("digestAuth", "basicAuth", "doAuth", "checkResponse", - "parseAuthorization", "SUPPORTED_ALGORITHM", "md5SessionKey", - "calculateNonce", "SUPPORTED_QOP") - -########################################################################## -import time -from cherrypy._cpcompat import base64_decode, ntob, md5 -from cherrypy._cpcompat import parse_http_list, parse_keqv_list - -MD5 = "MD5" -MD5_SESS = "MD5-sess" -AUTH = "auth" -AUTH_INT = "auth-int" - -SUPPORTED_ALGORITHM = (MD5, MD5_SESS) -SUPPORTED_QOP = (AUTH, AUTH_INT) - -########################################################################## -# doAuth -# -DIGEST_AUTH_ENCODERS = { - MD5: lambda val: md5(ntob(val)).hexdigest(), - MD5_SESS: lambda val: md5(ntob(val)).hexdigest(), - # SHA: lambda val: sha.new(ntob(val)).hexdigest (), -} - - -def calculateNonce(realm, algorithm=MD5): - """This is an auxaliary function that calculates 'nonce' value. It is used - to handle sessions.""" - - global SUPPORTED_ALGORITHM, DIGEST_AUTH_ENCODERS - assert algorithm in SUPPORTED_ALGORITHM - - try: - encoder = DIGEST_AUTH_ENCODERS[algorithm] - except KeyError: - raise NotImplementedError("The chosen algorithm (%s) does not have " - "an implementation yet" % algorithm) - - return encoder("%d:%s" % (time.time(), realm)) - - -def digestAuth(realm, algorithm=MD5, nonce=None, qop=AUTH): - """Challenges the client for a Digest authentication.""" - global SUPPORTED_ALGORITHM, DIGEST_AUTH_ENCODERS, SUPPORTED_QOP - assert algorithm in SUPPORTED_ALGORITHM - assert qop in SUPPORTED_QOP - - if nonce is None: - nonce = calculateNonce(realm, algorithm) - - return 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % ( - realm, nonce, algorithm, qop - ) - - -def basicAuth(realm): - """Challengenes the client for a Basic authentication.""" - assert '"' not in realm, "Realms cannot contain the \" (quote) character." - - return 'Basic realm="%s"' % realm - - -def doAuth(realm): - """'doAuth' function returns the challenge string b giving priority over - Digest and fallback to Basic authentication when the browser doesn't - support the first one. - - This should be set in the HTTP header under the key 'WWW-Authenticate'.""" - - return digestAuth(realm) + " " + basicAuth(realm) - - -########################################################################## -# Parse authorization parameters -# -def _parseDigestAuthorization(auth_params): - # Convert the auth params to a dict - items = parse_http_list(auth_params) - params = parse_keqv_list(items) - - # Now validate the params - - # Check for required parameters - required = ["username", "realm", "nonce", "uri", "response"] - for k in required: - if k not in params: - return None - - # If qop is sent then cnonce and nc MUST be present - if "qop" in params and not ("cnonce" in params - and "nc" in params): - return None - - # If qop is not sent, neither cnonce nor nc can be present - if ("cnonce" in params or "nc" in params) and \ - "qop" not in params: - return None - - return params - - -def _parseBasicAuthorization(auth_params): - username, password = base64_decode(auth_params).split(":", 1) - return {"username": username, "password": password} - -AUTH_SCHEMES = { - "basic": _parseBasicAuthorization, - "digest": _parseDigestAuthorization, -} - - -def parseAuthorization(credentials): - """parseAuthorization will convert the value of the 'Authorization' key in - the HTTP header to a map itself. If the parsing fails 'None' is returned. - """ - - global AUTH_SCHEMES - - auth_scheme, auth_params = credentials.split(" ", 1) - auth_scheme = auth_scheme.lower() - - parser = AUTH_SCHEMES[auth_scheme] - params = parser(auth_params) - - if params is None: - return - - assert "auth_scheme" not in params - params["auth_scheme"] = auth_scheme - return params - - -########################################################################## -# Check provided response for a valid password -# -def md5SessionKey(params, password): - """ - If the "algorithm" directive's value is "MD5-sess", then A1 - [the session key] is calculated only once - on the first request by the - client following receipt of a WWW-Authenticate challenge from the server. - - This creates a 'session key' for the authentication of subsequent - requests and responses which is different for each "authentication - session", thus limiting the amount of material hashed with any one - key. - - Because the server need only use the hash of the user - credentials in order to create the A1 value, this construction could - be used in conjunction with a third party authentication service so - that the web server would not need the actual password value. The - specification of such a protocol is beyond the scope of this - specification. -""" - - keys = ("username", "realm", "nonce", "cnonce") - params_copy = {} - for key in keys: - params_copy[key] = params[key] - - params_copy["algorithm"] = MD5_SESS - return _A1(params_copy, password) - - -def _A1(params, password): - algorithm = params.get("algorithm", MD5) - H = DIGEST_AUTH_ENCODERS[algorithm] - - if algorithm == MD5: - # If the "algorithm" directive's value is "MD5" or is - # unspecified, then A1 is: - # A1 = unq(username-value) ":" unq(realm-value) ":" passwd - return "%s:%s:%s" % (params["username"], params["realm"], password) - - elif algorithm == MD5_SESS: - - # This is A1 if qop is set - # A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd ) - # ":" unq(nonce-value) ":" unq(cnonce-value) - h_a1 = H("%s:%s:%s" % (params["username"], params["realm"], password)) - return "%s:%s:%s" % (h_a1, params["nonce"], params["cnonce"]) - - -def _A2(params, method, kwargs): - # If the "qop" directive's value is "auth" or is unspecified, then A2 is: - # A2 = Method ":" digest-uri-value - - qop = params.get("qop", "auth") - if qop == "auth": - return method + ":" + params["uri"] - elif qop == "auth-int": - # If the "qop" value is "auth-int", then A2 is: - # A2 = Method ":" digest-uri-value ":" H(entity-body) - entity_body = kwargs.get("entity_body", "") - H = kwargs["H"] - - return "%s:%s:%s" % ( - method, - params["uri"], - H(entity_body) - ) - - else: - raise NotImplementedError("The 'qop' method is unknown: %s" % qop) - - -def _computeDigestResponse(auth_map, password, method="GET", A1=None, - **kwargs): - """ - Generates a response respecting the algorithm defined in RFC 2617 - """ - params = auth_map - - algorithm = params.get("algorithm", MD5) - - H = DIGEST_AUTH_ENCODERS[algorithm] - KD = lambda secret, data: H(secret + ":" + data) - - qop = params.get("qop", None) - - H_A2 = H(_A2(params, method, kwargs)) - - if algorithm == MD5_SESS and A1 is not None: - H_A1 = H(A1) - else: - H_A1 = H(_A1(params, password)) - - if qop in ("auth", "auth-int"): - # If the "qop" value is "auth" or "auth-int": - # request-digest = <"> < KD ( H(A1), unq(nonce-value) - # ":" nc-value - # ":" unq(cnonce-value) - # ":" unq(qop-value) - # ":" H(A2) - # ) <"> - request = "%s:%s:%s:%s:%s" % ( - params["nonce"], - params["nc"], - params["cnonce"], - params["qop"], - H_A2, - ) - elif qop is None: - # If the "qop" directive is not present (this construction is - # for compatibility with RFC 2069): - # request-digest = - # <"> < KD ( H(A1), unq(nonce-value) ":" H(A2) ) > <"> - request = "%s:%s" % (params["nonce"], H_A2) - - return KD(H_A1, request) - - -def _checkDigestResponse(auth_map, password, method="GET", A1=None, **kwargs): - """This function is used to verify the response given by the client when - he tries to authenticate. - Optional arguments: - entity_body - when 'qop' is set to 'auth-int' you MUST provide the - raw data you are going to send to the client (usually the - HTML page. - request_uri - the uri from the request line compared with the 'uri' - directive of the authorization map. They must represent - the same resource (unused at this time). - """ - - if auth_map['realm'] != kwargs.get('realm', None): - return False - - response = _computeDigestResponse( - auth_map, password, method, A1, **kwargs) - - return response == auth_map["response"] - - -def _checkBasicResponse(auth_map, password, method='GET', encrypt=None, - **kwargs): - # Note that the Basic response doesn't provide the realm value so we cannot - # test it - pass_through = lambda password, username=None: password - encrypt = encrypt or pass_through - try: - candidate = encrypt(auth_map["password"], auth_map["username"]) - except TypeError: - # if encrypt only takes one parameter, it's the password - candidate = encrypt(auth_map["password"]) - return candidate == password - -AUTH_RESPONSES = { - "basic": _checkBasicResponse, - "digest": _checkDigestResponse, -} - - -def checkResponse(auth_map, password, method="GET", encrypt=None, **kwargs): - """'checkResponse' compares the auth_map with the password and optionally - other arguments that each implementation might need. - - If the response is of type 'Basic' then the function has the following - signature:: - - checkBasicResponse(auth_map, password) -> bool - - If the response is of type 'Digest' then the function has the following - signature:: - - checkDigestResponse(auth_map, password, method='GET', A1=None) -> bool - - The 'A1' argument is only used in MD5_SESS algorithm based responses. - Check md5SessionKey() for more info. - """ - checker = AUTH_RESPONSES[auth_map["auth_scheme"]] - return checker(auth_map, password, method=method, encrypt=encrypt, - **kwargs) diff --git a/lib/cherrypy/lib/httputil.py b/lib/cherrypy/lib/httputil.py deleted file mode 100644 index 69a18d45..00000000 --- a/lib/cherrypy/lib/httputil.py +++ /dev/null @@ -1,536 +0,0 @@ -"""HTTP library functions. - -This module contains functions for building an HTTP application -framework: any one, not just one whose name starts with "Ch". ;) If you -reference any modules from some popular framework inside *this* module, -FuManChu will personally hang you up by your thumbs and submit you -to a public caning. -""" - -from binascii import b2a_base64 -from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou -from cherrypy._cpcompat import basestring, bytestr, iteritems, nativestr -from cherrypy._cpcompat import reversed, sorted, unicodestr, unquote_qs -response_codes = BaseHTTPRequestHandler.responses.copy() - -# From https://bitbucket.org/cherrypy/cherrypy/issue/361 -response_codes[500] = ('Internal Server Error', - 'The server encountered an unexpected condition ' - 'which prevented it from fulfilling the request.') -response_codes[503] = ('Service Unavailable', - 'The server is currently unable to handle the ' - 'request due to a temporary overloading or ' - 'maintenance of the server.') - -import re -import urllib - - -def urljoin(*atoms): - """Return the given path \*atoms, joined into a single URL. - - This will correctly join a SCRIPT_NAME and PATH_INFO into the - original URL, even if either atom is blank. - """ - url = "/".join([x for x in atoms if x]) - while "//" in url: - url = url.replace("//", "/") - # Special-case the final url of "", and return "/" instead. - return url or "/" - - -def urljoin_bytes(*atoms): - """Return the given path *atoms, joined into a single URL. - - This will correctly join a SCRIPT_NAME and PATH_INFO into the - original URL, even if either atom is blank. - """ - url = ntob("/").join([x for x in atoms if x]) - while ntob("//") in url: - url = url.replace(ntob("//"), ntob("/")) - # Special-case the final url of "", and return "/" instead. - return url or ntob("/") - - -def protocol_from_http(protocol_str): - """Return a protocol tuple from the given 'HTTP/x.y' string.""" - return int(protocol_str[5]), int(protocol_str[7]) - - -def get_ranges(headervalue, content_length): - """Return a list of (start, stop) indices from a Range header, or None. - - Each (start, stop) tuple will be composed of two ints, which are suitable - for use in a slicing operation. That is, the header "Range: bytes=3-6", - if applied against a Python string, is requesting resource[3:7]. This - function will return the list [(3, 7)]. - - If this function returns an empty list, you should return HTTP 416. - """ - - if not headervalue: - return None - - result = [] - bytesunit, byteranges = headervalue.split("=", 1) - for brange in byteranges.split(","): - start, stop = [x.strip() for x in brange.split("-", 1)] - if start: - if not stop: - stop = content_length - 1 - start, stop = int(start), int(stop) - if start >= content_length: - # From rfc 2616 sec 14.16: - # "If the server receives a request (other than one - # including an If-Range request-header field) with an - # unsatisfiable Range request-header field (that is, - # all of whose byte-range-spec values have a first-byte-pos - # value greater than the current length of the selected - # resource), it SHOULD return a response code of 416 - # (Requested range not satisfiable)." - continue - if stop < start: - # From rfc 2616 sec 14.16: - # "If the server ignores a byte-range-spec because it - # is syntactically invalid, the server SHOULD treat - # the request as if the invalid Range header field - # did not exist. (Normally, this means return a 200 - # response containing the full entity)." - return None - result.append((start, stop + 1)) - else: - if not stop: - # See rfc quote above. - return None - # Negative subscript (last N bytes) - # - # RFC 2616 Section 14.35.1: - # If the entity is shorter than the specified suffix-length, - # the entire entity-body is used. - if int(stop) > content_length: - result.append((0, content_length)) - else: - result.append((content_length - int(stop), content_length)) - - return result - - -class HeaderElement(object): - - """An element (with parameters) from an HTTP header's element list.""" - - def __init__(self, value, params=None): - self.value = value - if params is None: - params = {} - self.params = params - - def __cmp__(self, other): - return cmp(self.value, other.value) - - def __lt__(self, other): - return self.value < other.value - - def __str__(self): - p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)] - return str("%s%s" % (self.value, "".join(p))) - - def __bytes__(self): - return ntob(self.__str__()) - - def __unicode__(self): - return ntou(self.__str__()) - - def parse(elementstr): - """Transform 'token;key=val' to ('token', {'key': 'val'}).""" - # Split the element into a value and parameters. The 'value' may - # be of the form, "token=token", but we don't split that here. - atoms = [x.strip() for x in elementstr.split(";") if x.strip()] - if not atoms: - initial_value = '' - else: - initial_value = atoms.pop(0).strip() - params = {} - for atom in atoms: - atom = [x.strip() for x in atom.split("=", 1) if x.strip()] - key = atom.pop(0) - if atom: - val = atom[0] - else: - val = "" - params[key] = val - return initial_value, params - parse = staticmethod(parse) - - def from_str(cls, elementstr): - """Construct an instance from a string of the form 'token;key=val'.""" - ival, params = cls.parse(elementstr) - return cls(ival, params) - from_str = classmethod(from_str) - - -q_separator = re.compile(r'; *q *=') - - -class AcceptElement(HeaderElement): - - """An element (with parameters) from an Accept* header's element list. - - AcceptElement objects are comparable; the more-preferred object will be - "less than" the less-preferred object. They are also therefore sortable; - if you sort a list of AcceptElement objects, they will be listed in - priority order; the most preferred value will be first. Yes, it should - have been the other way around, but it's too late to fix now. - """ - - def from_str(cls, elementstr): - qvalue = None - # The first "q" parameter (if any) separates the initial - # media-range parameter(s) (if any) from the accept-params. - atoms = q_separator.split(elementstr, 1) - media_range = atoms.pop(0).strip() - if atoms: - # The qvalue for an Accept header can have extensions. The other - # headers cannot, but it's easier to parse them as if they did. - qvalue = HeaderElement.from_str(atoms[0].strip()) - - media_type, params = cls.parse(media_range) - if qvalue is not None: - params["q"] = qvalue - return cls(media_type, params) - from_str = classmethod(from_str) - - def qvalue(self): - val = self.params.get("q", "1") - if isinstance(val, HeaderElement): - val = val.value - return float(val) - qvalue = property(qvalue, doc="The qvalue, or priority, of this value.") - - def __cmp__(self, other): - diff = cmp(self.qvalue, other.qvalue) - if diff == 0: - diff = cmp(str(self), str(other)) - return diff - - def __lt__(self, other): - if self.qvalue == other.qvalue: - return str(self) < str(other) - else: - return self.qvalue < other.qvalue - -RE_HEADER_SPLIT = re.compile(',(?=(?:[^"]*"[^"]*")*[^"]*$)') -def header_elements(fieldname, fieldvalue): - """Return a sorted HeaderElement list from a comma-separated header string. - """ - if not fieldvalue: - return [] - - result = [] - for element in RE_HEADER_SPLIT.split(fieldvalue): - if fieldname.startswith("Accept") or fieldname == 'TE': - hv = AcceptElement.from_str(element) - else: - hv = HeaderElement.from_str(element) - result.append(hv) - - return list(reversed(sorted(result))) - - -def decode_TEXT(value): - r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr").""" - try: - # Python 3 - from email.header import decode_header - except ImportError: - from email.Header import decode_header - atoms = decode_header(value) - decodedvalue = "" - for atom, charset in atoms: - if charset is not None: - atom = atom.decode(charset) - decodedvalue += atom - return decodedvalue - - -def valid_status(status): - """Return legal HTTP status Code, Reason-phrase and Message. - - The status arg must be an int, or a str that begins with an int. - - If status is an int, or a str and no reason-phrase is supplied, - a default reason-phrase will be provided. - """ - - if not status: - status = 200 - - status = str(status) - parts = status.split(" ", 1) - if len(parts) == 1: - # No reason supplied. - code, = parts - reason = None - else: - code, reason = parts - reason = reason.strip() - - try: - code = int(code) - except ValueError: - raise ValueError("Illegal response status from server " - "(%s is non-numeric)." % repr(code)) - - if code < 100 or code > 599: - raise ValueError("Illegal response status from server " - "(%s is out of range)." % repr(code)) - - if code not in response_codes: - # code is unknown but not illegal - default_reason, message = "", "" - else: - default_reason, message = response_codes[code] - - if reason is None: - reason = default_reason - - return code, reason, message - - -# NOTE: the parse_qs functions that follow are modified version of those -# in the python3.0 source - we need to pass through an encoding to the unquote -# method, but the default parse_qs function doesn't allow us to. These do. - -def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'): - """Parse a query given as a string argument. - - Arguments: - - qs: URL-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - URL encoded queries should be treated as blank strings. A - true value indicates that blanks should be retained as blank - strings. The default false value indicates that blank values - are to be ignored and treated as if they were not included. - - strict_parsing: flag indicating what to do with parsing errors. If - false (the default), errors are silently ignored. If true, - errors raise a ValueError exception. - - Returns a dict, as G-d intended. - """ - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - d = {} - for name_value in pairs: - if not name_value and not strict_parsing: - continue - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError("bad query field: %r" % (name_value,)) - # Handle case of a control-name with no equal sign - if keep_blank_values: - nv.append('') - else: - continue - if len(nv[1]) or keep_blank_values: - name = unquote_qs(nv[0], encoding) - value = unquote_qs(nv[1], encoding) - if name in d: - if not isinstance(d[name], list): - d[name] = [d[name]] - d[name].append(value) - else: - d[name] = value - return d - - -image_map_pattern = re.compile(r"[0-9]+,[0-9]+") - - -def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'): - """Build a params dictionary from a query_string. - - Duplicate key/value pairs in the provided query_string will be - returned as {'key': [val1, val2, ...]}. Single key/values will - be returned as strings: {'key': 'value'}. - """ - if image_map_pattern.match(query_string): - # Server-side image map. Map the coords to 'x' and 'y' - # (like CGI::Request does). - pm = query_string.split(",") - pm = {'x': int(pm[0]), 'y': int(pm[1])} - else: - pm = _parse_qs(query_string, keep_blank_values, encoding=encoding) - return pm - - -class CaseInsensitiveDict(dict): - - """A case-insensitive dict subclass. - - Each key is changed on entry to str(key).title(). - """ - - def __getitem__(self, key): - return dict.__getitem__(self, str(key).title()) - - def __setitem__(self, key, value): - dict.__setitem__(self, str(key).title(), value) - - def __delitem__(self, key): - dict.__delitem__(self, str(key).title()) - - def __contains__(self, key): - return dict.__contains__(self, str(key).title()) - - def get(self, key, default=None): - return dict.get(self, str(key).title(), default) - - if hasattr({}, 'has_key'): - def has_key(self, key): - return str(key).title() in self - - def update(self, E): - for k in E.keys(): - self[str(k).title()] = E[k] - - def fromkeys(cls, seq, value=None): - newdict = cls() - for k in seq: - newdict[str(k).title()] = value - return newdict - fromkeys = classmethod(fromkeys) - - def setdefault(self, key, x=None): - key = str(key).title() - try: - return self[key] - except KeyError: - self[key] = x - return x - - def pop(self, key, default): - return dict.pop(self, str(key).title(), default) - - -# TEXT = -# -# A CRLF is allowed in the definition of TEXT only as part of a header -# field continuation. It is expected that the folding LWS will be -# replaced with a single SP before interpretation of the TEXT value." -if nativestr == bytestr: - header_translate_table = ''.join([chr(i) for i in xrange(256)]) - header_translate_deletechars = ''.join( - [chr(i) for i in xrange(32)]) + chr(127) -else: - header_translate_table = None - header_translate_deletechars = bytes(range(32)) + bytes([127]) - - -class HeaderMap(CaseInsensitiveDict): - - """A dict subclass for HTTP request and response headers. - - Each key is changed on entry to str(key).title(). This allows headers - to be case-insensitive and avoid duplicates. - - Values are header values (decoded according to :rfc:`2047` if necessary). - """ - - protocol = (1, 1) - encodings = ["ISO-8859-1"] - - # Someday, when http-bis is done, this will probably get dropped - # since few servers, clients, or intermediaries do it. But until then, - # we're going to obey the spec as is. - # "Words of *TEXT MAY contain characters from character sets other than - # ISO-8859-1 only when encoded according to the rules of RFC 2047." - use_rfc_2047 = True - - def elements(self, key): - """Return a sorted list of HeaderElements for the given header.""" - key = str(key).title() - value = self.get(key) - return header_elements(key, value) - - def values(self, key): - """Return a sorted list of HeaderElement.value for the given header.""" - return [e.value for e in self.elements(key)] - - def output(self): - """Transform self into a list of (name, value) tuples.""" - return list(self.encode_header_items(self.items())) - - def encode_header_items(cls, header_items): - """ - Prepare the sequence of name, value tuples into a form suitable for - transmitting on the wire for HTTP. - """ - for k, v in header_items: - if isinstance(k, unicodestr): - k = cls.encode(k) - - if not isinstance(v, basestring): - v = str(v) - - if isinstance(v, unicodestr): - v = cls.encode(v) - - # See header_translate_* constants above. - # Replace only if you really know what you're doing. - k = k.translate(header_translate_table, - header_translate_deletechars) - v = v.translate(header_translate_table, - header_translate_deletechars) - - yield (k, v) - encode_header_items = classmethod(encode_header_items) - - def encode(cls, v): - """Return the given header name or value, encoded for HTTP output.""" - for enc in cls.encodings: - try: - return v.encode(enc) - except UnicodeEncodeError: - continue - - if cls.protocol == (1, 1) and cls.use_rfc_2047: - # Encode RFC-2047 TEXT - # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?="). - # We do our own here instead of using the email module - # because we never want to fold lines--folding has - # been deprecated by the HTTP working group. - v = b2a_base64(v.encode('utf-8')) - return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?=')) - - raise ValueError("Could not encode header part %r using " - "any of the encodings %r." % - (v, cls.encodings)) - encode = classmethod(encode) - - -class Host(object): - - """An internet address. - - name - Should be the client's host name. If not available (because no DNS - lookup is performed), the IP address should be used instead. - - """ - - ip = "0.0.0.0" - port = 80 - name = "unknown.tld" - - def __init__(self, ip, port, name=None): - self.ip = ip - self.port = port - if name is None: - name = ip - self.name = name - - def __repr__(self): - return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name) diff --git a/lib/cherrypy/lib/jsontools.py b/lib/cherrypy/lib/jsontools.py deleted file mode 100644 index 90b3ff8a..00000000 --- a/lib/cherrypy/lib/jsontools.py +++ /dev/null @@ -1,96 +0,0 @@ -import cherrypy -from cherrypy._cpcompat import basestring, ntou, json_encode, json_decode - - -def json_processor(entity): - """Read application/json data into request.json.""" - if not entity.headers.get(ntou("Content-Length"), ntou("")): - raise cherrypy.HTTPError(411) - - body = entity.fp.read() - try: - cherrypy.serving.request.json = json_decode(body.decode('utf-8')) - except ValueError: - raise cherrypy.HTTPError(400, 'Invalid JSON document') - - -def json_in(content_type=[ntou('application/json'), ntou('text/javascript')], - force=True, debug=False, processor=json_processor): - """Add a processor to parse JSON request entities: - The default processor places the parsed data into request.json. - - Incoming request entities which match the given content_type(s) will - be deserialized from JSON to the Python equivalent, and the result - stored at cherrypy.request.json. The 'content_type' argument may - be a Content-Type string or a list of allowable Content-Type strings. - - If the 'force' argument is True (the default), then entities of other - content types will not be allowed; "415 Unsupported Media Type" is - raised instead. - - Supply your own processor to use a custom decoder, or to handle the parsed - data differently. The processor can be configured via - tools.json_in.processor or via the decorator method. - - Note that the deserializer requires the client send a Content-Length - request header, or it will raise "411 Length Required". If for any - other reason the request entity cannot be deserialized from JSON, - it will raise "400 Bad Request: Invalid JSON document". - - You must be using Python 2.6 or greater, or have the 'simplejson' - package importable; otherwise, ValueError is raised during processing. - """ - request = cherrypy.serving.request - if isinstance(content_type, basestring): - content_type = [content_type] - - if force: - if debug: - cherrypy.log('Removing body processors %s' % - repr(request.body.processors.keys()), 'TOOLS.JSON_IN') - request.body.processors.clear() - request.body.default_proc = cherrypy.HTTPError( - 415, 'Expected an entity of content type %s' % - ', '.join(content_type)) - - for ct in content_type: - if debug: - cherrypy.log('Adding body processor for %s' % ct, 'TOOLS.JSON_IN') - request.body.processors[ct] = processor - - -def json_handler(*args, **kwargs): - value = cherrypy.serving.request._json_inner_handler(*args, **kwargs) - return json_encode(value) - - -def json_out(content_type='application/json', debug=False, - handler=json_handler): - """Wrap request.handler to serialize its output to JSON. Sets Content-Type. - - If the given content_type is None, the Content-Type response header - is not set. - - Provide your own handler to use a custom encoder. For example - cherrypy.config['tools.json_out.handler'] = , or - @json_out(handler=function). - - You must be using Python 2.6 or greater, or have the 'simplejson' - package importable; otherwise, ValueError is raised during processing. - """ - request = cherrypy.serving.request - # request.handler may be set to None by e.g. the caching tool - # to signal to all components that a response body has already - # been attached, in which case we don't need to wrap anything. - if request.handler is None: - return - if debug: - cherrypy.log('Replacing %s with JSON handler' % request.handler, - 'TOOLS.JSON_OUT') - request._json_inner_handler = request.handler - request.handler = handler - if content_type is not None: - if debug: - cherrypy.log('Setting Content-Type to %s' % - content_type, 'TOOLS.JSON_OUT') - cherrypy.serving.response.headers['Content-Type'] = content_type diff --git a/lib/cherrypy/lib/lockfile.py b/lib/cherrypy/lib/lockfile.py deleted file mode 100644 index 4cf7b1b6..00000000 --- a/lib/cherrypy/lib/lockfile.py +++ /dev/null @@ -1,147 +0,0 @@ -""" -Platform-independent file locking. Inspired by and modeled after zc.lockfile. -""" - -import os - -try: - import msvcrt -except ImportError: - pass - -try: - import fcntl -except ImportError: - pass - - -class LockError(Exception): - - "Could not obtain a lock" - - msg = "Unable to lock %r" - - def __init__(self, path): - super(LockError, self).__init__(self.msg % path) - - -class UnlockError(LockError): - - "Could not release a lock" - - msg = "Unable to unlock %r" - - -# first, a default, naive locking implementation -class LockFile(object): - - """ - A default, naive locking implementation. Always fails if the file - already exists. - """ - - def __init__(self, path): - self.path = path - try: - fd = os.open(path, os.O_CREAT | os.O_WRONLY | os.O_EXCL) - except OSError: - raise LockError(self.path) - os.close(fd) - - def release(self): - os.remove(self.path) - - def remove(self): - pass - - -class SystemLockFile(object): - - """ - An abstract base class for platform-specific locking. - """ - - def __init__(self, path): - self.path = path - - try: - # Open lockfile for writing without truncation: - self.fp = open(path, 'r+') - except IOError: - # If the file doesn't exist, IOError is raised; Use a+ instead. - # Note that there may be a race here. Multiple processes - # could fail on the r+ open and open the file a+, but only - # one will get the the lock and write a pid. - self.fp = open(path, 'a+') - - try: - self._lock_file() - except: - self.fp.seek(1) - self.fp.close() - del self.fp - raise - - self.fp.write(" %s\n" % os.getpid()) - self.fp.truncate() - self.fp.flush() - - def release(self): - if not hasattr(self, 'fp'): - return - self._unlock_file() - self.fp.close() - del self.fp - - def remove(self): - """ - Attempt to remove the file - """ - try: - os.remove(self.path) - except: - pass - - #@abc.abstract_method - # def _lock_file(self): - # """Attempt to obtain the lock on self.fp. Raise LockError if not - # acquired.""" - - def _unlock_file(self): - """Attempt to obtain the lock on self.fp. Raise UnlockError if not - released.""" - - -class WindowsLockFile(SystemLockFile): - - def _lock_file(self): - # Lock just the first byte - try: - msvcrt.locking(self.fp.fileno(), msvcrt.LK_NBLCK, 1) - except IOError: - raise LockError(self.fp.name) - - def _unlock_file(self): - try: - self.fp.seek(0) - msvcrt.locking(self.fp.fileno(), msvcrt.LK_UNLCK, 1) - except IOError: - raise UnlockError(self.fp.name) - -if 'msvcrt' in globals(): - LockFile = WindowsLockFile - - -class UnixLockFile(SystemLockFile): - - def _lock_file(self): - flags = fcntl.LOCK_EX | fcntl.LOCK_NB - try: - fcntl.flock(self.fp.fileno(), flags) - except IOError: - raise LockError(self.fp.name) - - # no need to implement _unlock_file, it will be unlocked on close() - -if 'fcntl' in globals(): - LockFile = UnixLockFile diff --git a/lib/cherrypy/lib/locking.py b/lib/cherrypy/lib/locking.py deleted file mode 100644 index 72dda9b3..00000000 --- a/lib/cherrypy/lib/locking.py +++ /dev/null @@ -1,47 +0,0 @@ -import datetime - - -class NeverExpires(object): - def expired(self): - return False - - -class Timer(object): - """ - A simple timer that will indicate when an expiration time has passed. - """ - def __init__(self, expiration): - "Create a timer that expires at `expiration` (UTC datetime)" - self.expiration = expiration - - @classmethod - def after(cls, elapsed): - """ - Return a timer that will expire after `elapsed` passes. - """ - return cls(datetime.datetime.utcnow() + elapsed) - - def expired(self): - return datetime.datetime.utcnow() >= self.expiration - - -class LockTimeout(Exception): - "An exception when a lock could not be acquired before a timeout period" - - -class LockChecker(object): - """ - Keep track of the time and detect if a timeout has expired - """ - def __init__(self, session_id, timeout): - self.session_id = session_id - if timeout: - self.timer = Timer.after(timeout) - else: - self.timer = NeverExpires() - - def expired(self): - if self.timer.expired(): - raise LockTimeout( - "Timeout acquiring lock for %(session_id)s" % vars(self)) - return False diff --git a/lib/cherrypy/lib/profiler.py b/lib/cherrypy/lib/profiler.py deleted file mode 100644 index 5dac386e..00000000 --- a/lib/cherrypy/lib/profiler.py +++ /dev/null @@ -1,216 +0,0 @@ -"""Profiler tools for CherryPy. - -CherryPy users -============== - -You can profile any of your pages as follows:: - - from cherrypy.lib import profiler - - class Root: - p = profile.Profiler("/path/to/profile/dir") - - def index(self): - self.p.run(self._index) - index.exposed = True - - def _index(self): - return "Hello, world!" - - cherrypy.tree.mount(Root()) - -You can also turn on profiling for all requests -using the ``make_app`` function as WSGI middleware. - -CherryPy developers -=================== - -This module can be used whenever you make changes to CherryPy, -to get a quick sanity-check on overall CP performance. Use the -``--profile`` flag when running the test suite. Then, use the ``serve()`` -function to browse the results in a web browser. If you run this -module from the command line, it will call ``serve()`` for you. - -""" - - -def new_func_strip_path(func_name): - """Make profiler output more readable by adding `__init__` modules' parents - """ - filename, line, name = func_name - if filename.endswith("__init__.py"): - return os.path.basename(filename[:-12]) + filename[-12:], line, name - return os.path.basename(filename), line, name - -try: - import profile - import pstats - pstats.func_strip_path = new_func_strip_path -except ImportError: - profile = None - pstats = None - -import os -import os.path -import sys -import warnings - -from cherrypy._cpcompat import StringIO - -_count = 0 - - -class Profiler(object): - - def __init__(self, path=None): - if not path: - path = os.path.join(os.path.dirname(__file__), "profile") - self.path = path - if not os.path.exists(path): - os.makedirs(path) - - def run(self, func, *args, **params): - """Dump profile data into self.path.""" - global _count - c = _count = _count + 1 - path = os.path.join(self.path, "cp_%04d.prof" % c) - prof = profile.Profile() - result = prof.runcall(func, *args, **params) - prof.dump_stats(path) - return result - - def statfiles(self): - """:rtype: list of available profiles. - """ - return [f for f in os.listdir(self.path) - if f.startswith("cp_") and f.endswith(".prof")] - - def stats(self, filename, sortby='cumulative'): - """:rtype stats(index): output of print_stats() for the given profile. - """ - sio = StringIO() - if sys.version_info >= (2, 5): - s = pstats.Stats(os.path.join(self.path, filename), stream=sio) - s.strip_dirs() - s.sort_stats(sortby) - s.print_stats() - else: - # pstats.Stats before Python 2.5 didn't take a 'stream' arg, - # but just printed to stdout. So re-route stdout. - s = pstats.Stats(os.path.join(self.path, filename)) - s.strip_dirs() - s.sort_stats(sortby) - oldout = sys.stdout - try: - sys.stdout = sio - s.print_stats() - finally: - sys.stdout = oldout - response = sio.getvalue() - sio.close() - return response - - def index(self): - return """ - CherryPy profile data - - - - - - """ - index.exposed = True - - def menu(self): - yield "

Profiling runs

" - yield "

Click on one of the runs below to see profiling data.

" - runs = self.statfiles() - runs.sort() - for i in runs: - yield "%s
" % ( - i, i) - menu.exposed = True - - def report(self, filename): - import cherrypy - cherrypy.response.headers['Content-Type'] = 'text/plain' - return self.stats(filename) - report.exposed = True - - -class ProfileAggregator(Profiler): - - def __init__(self, path=None): - Profiler.__init__(self, path) - global _count - self.count = _count = _count + 1 - self.profiler = profile.Profile() - - def run(self, func, *args, **params): - path = os.path.join(self.path, "cp_%04d.prof" % self.count) - result = self.profiler.runcall(func, *args, **params) - self.profiler.dump_stats(path) - return result - - -class make_app: - - def __init__(self, nextapp, path=None, aggregate=False): - """Make a WSGI middleware app which wraps 'nextapp' with profiling. - - nextapp - the WSGI application to wrap, usually an instance of - cherrypy.Application. - - path - where to dump the profiling output. - - aggregate - if True, profile data for all HTTP requests will go in - a single file. If False (the default), each HTTP request will - dump its profile data into a separate file. - - """ - if profile is None or pstats is None: - msg = ("Your installation of Python does not have a profile " - "module. If you're on Debian, try " - "`sudo apt-get install python-profiler`. " - "See http://www.cherrypy.org/wiki/ProfilingOnDebian " - "for details.") - warnings.warn(msg) - - self.nextapp = nextapp - self.aggregate = aggregate - if aggregate: - self.profiler = ProfileAggregator(path) - else: - self.profiler = Profiler(path) - - def __call__(self, environ, start_response): - def gather(): - result = [] - for line in self.nextapp(environ, start_response): - result.append(line) - return result - return self.profiler.run(gather) - - -def serve(path=None, port=8080): - if profile is None or pstats is None: - msg = ("Your installation of Python does not have a profile module. " - "If you're on Debian, try " - "`sudo apt-get install python-profiler`. " - "See http://www.cherrypy.org/wiki/ProfilingOnDebian " - "for details.") - warnings.warn(msg) - - import cherrypy - cherrypy.config.update({'server.socket_port': int(port), - 'server.thread_pool': 10, - 'environment': "production", - }) - cherrypy.quickstart(Profiler(path)) - - -if __name__ == "__main__": - serve(*tuple(sys.argv[1:])) diff --git a/lib/cherrypy/lib/reprconf.py b/lib/cherrypy/lib/reprconf.py deleted file mode 100644 index 6e70b5ec..00000000 --- a/lib/cherrypy/lib/reprconf.py +++ /dev/null @@ -1,503 +0,0 @@ -"""Generic configuration system using unrepr. - -Configuration data may be supplied as a Python dictionary, as a filename, -or as an open file object. When you supply a filename or file, Python's -builtin ConfigParser is used (with some extensions). - -Namespaces ----------- - -Configuration keys are separated into namespaces by the first "." in the key. - -The only key that cannot exist in a namespace is the "environment" entry. -This special entry 'imports' other config entries from a template stored in -the Config.environments dict. - -You can define your own namespaces to be called when new config is merged -by adding a named handler to Config.namespaces. The name can be any string, -and the handler must be either a callable or a context manager. -""" - -try: - # Python 3.0+ - from configparser import ConfigParser -except ImportError: - from ConfigParser import ConfigParser - -try: - set -except NameError: - from sets import Set as set - -try: - basestring -except NameError: - basestring = str - -try: - # Python 3 - import builtins -except ImportError: - # Python 2 - import __builtin__ as builtins - -import operator as _operator -import sys - - -def as_dict(config): - """Return a dict from 'config' whether it is a dict, file, or filename.""" - if isinstance(config, basestring): - config = Parser().dict_from_file(config) - elif hasattr(config, 'read'): - config = Parser().dict_from_file(config) - return config - - -class NamespaceSet(dict): - - """A dict of config namespace names and handlers. - - Each config entry should begin with a namespace name; the corresponding - namespace handler will be called once for each config entry in that - namespace, and will be passed two arguments: the config key (with the - namespace removed) and the config value. - - Namespace handlers may be any Python callable; they may also be - Python 2.5-style 'context managers', in which case their __enter__ - method should return a callable to be used as the handler. - See cherrypy.tools (the Toolbox class) for an example. - """ - - def __call__(self, config): - """Iterate through config and pass it to each namespace handler. - - config - A flat dict, where keys use dots to separate - namespaces, and values are arbitrary. - - The first name in each config key is used to look up the corresponding - namespace handler. For example, a config entry of {'tools.gzip.on': v} - will call the 'tools' namespace handler with the args: ('gzip.on', v) - """ - # Separate the given config into namespaces - ns_confs = {} - for k in config: - if "." in k: - ns, name = k.split(".", 1) - bucket = ns_confs.setdefault(ns, {}) - bucket[name] = config[k] - - # I chose __enter__ and __exit__ so someday this could be - # rewritten using Python 2.5's 'with' statement: - # for ns, handler in self.iteritems(): - # with handler as callable: - # for k, v in ns_confs.get(ns, {}).iteritems(): - # callable(k, v) - for ns, handler in self.items(): - exit = getattr(handler, "__exit__", None) - if exit: - callable = handler.__enter__() - no_exc = True - try: - try: - for k, v in ns_confs.get(ns, {}).items(): - callable(k, v) - except: - # The exceptional case is handled here - no_exc = False - if exit is None: - raise - if not exit(*sys.exc_info()): - raise - # The exception is swallowed if exit() returns true - finally: - # The normal and non-local-goto cases are handled here - if no_exc and exit: - exit(None, None, None) - else: - for k, v in ns_confs.get(ns, {}).items(): - handler(k, v) - - def __repr__(self): - return "%s.%s(%s)" % (self.__module__, self.__class__.__name__, - dict.__repr__(self)) - - def __copy__(self): - newobj = self.__class__() - newobj.update(self) - return newobj - copy = __copy__ - - -class Config(dict): - - """A dict-like set of configuration data, with defaults and namespaces. - - May take a file, filename, or dict. - """ - - defaults = {} - environments = {} - namespaces = NamespaceSet() - - def __init__(self, file=None, **kwargs): - self.reset() - if file is not None: - self.update(file) - if kwargs: - self.update(kwargs) - - def reset(self): - """Reset self to default values.""" - self.clear() - dict.update(self, self.defaults) - - def update(self, config): - """Update self from a dict, file or filename.""" - if isinstance(config, basestring): - # Filename - config = Parser().dict_from_file(config) - elif hasattr(config, 'read'): - # Open file object - config = Parser().dict_from_file(config) - else: - config = config.copy() - self._apply(config) - - def _apply(self, config): - """Update self from a dict.""" - which_env = config.get('environment') - if which_env: - env = self.environments[which_env] - for k in env: - if k not in config: - config[k] = env[k] - - dict.update(self, config) - self.namespaces(config) - - def __setitem__(self, k, v): - dict.__setitem__(self, k, v) - self.namespaces({k: v}) - - -class Parser(ConfigParser): - - """Sub-class of ConfigParser that keeps the case of options and that - raises an exception if the file cannot be read. - """ - - def optionxform(self, optionstr): - return optionstr - - def read(self, filenames): - if isinstance(filenames, basestring): - filenames = [filenames] - for filename in filenames: - # try: - # fp = open(filename) - # except IOError: - # continue - fp = open(filename) - try: - self._read(fp, filename) - finally: - fp.close() - - def as_dict(self, raw=False, vars=None): - """Convert an INI file to a dictionary""" - # Load INI file into a dict - result = {} - for section in self.sections(): - if section not in result: - result[section] = {} - for option in self.options(section): - value = self.get(section, option, raw=raw, vars=vars) - try: - value = unrepr(value) - except Exception: - x = sys.exc_info()[1] - msg = ("Config error in section: %r, option: %r, " - "value: %r. Config values must be valid Python." % - (section, option, value)) - raise ValueError(msg, x.__class__.__name__, x.args) - result[section][option] = value - return result - - def dict_from_file(self, file): - if hasattr(file, 'read'): - self.readfp(file) - else: - self.read(file) - return self.as_dict() - - -# public domain "unrepr" implementation, found on the web and then improved. - - -class _Builder2: - - def build(self, o): - m = getattr(self, 'build_' + o.__class__.__name__, None) - if m is None: - raise TypeError("unrepr does not recognize %s" % - repr(o.__class__.__name__)) - return m(o) - - def astnode(self, s): - """Return a Python2 ast Node compiled from a string.""" - try: - import compiler - except ImportError: - # Fallback to eval when compiler package is not available, - # e.g. IronPython 1.0. - return eval(s) - - p = compiler.parse("__tempvalue__ = " + s) - return p.getChildren()[1].getChildren()[0].getChildren()[1] - - def build_Subscript(self, o): - expr, flags, subs = o.getChildren() - expr = self.build(expr) - subs = self.build(subs) - return expr[subs] - - def build_CallFunc(self, o): - children = o.getChildren() - # Build callee from first child - callee = self.build(children[0]) - # Build args and kwargs from remaining children - args = [] - kwargs = {} - for child in children[1:]: - class_name = child.__class__.__name__ - # None is ignored - if class_name == 'NoneType': - continue - # Keywords become kwargs - if class_name == 'Keyword': - kwargs.update(self.build(child)) - # Everything else becomes args - else : - args.append(self.build(child)) - return callee(*args, **kwargs) - - def build_Keyword(self, o): - key, value_obj = o.getChildren() - value = self.build(value_obj) - kw_dict = {key: value} - return kw_dict - - def build_List(self, o): - return map(self.build, o.getChildren()) - - def build_Const(self, o): - return o.value - - def build_Dict(self, o): - d = {} - i = iter(map(self.build, o.getChildren())) - for el in i: - d[el] = i.next() - return d - - def build_Tuple(self, o): - return tuple(self.build_List(o)) - - def build_Name(self, o): - name = o.name - if name == 'None': - return None - if name == 'True': - return True - if name == 'False': - return False - - # See if the Name is a package or module. If it is, import it. - try: - return modules(name) - except ImportError: - pass - - # See if the Name is in builtins. - try: - return getattr(builtins, name) - except AttributeError: - pass - - raise TypeError("unrepr could not resolve the name %s" % repr(name)) - - def build_Add(self, o): - left, right = map(self.build, o.getChildren()) - return left + right - - def build_Mul(self, o): - left, right = map(self.build, o.getChildren()) - return left * right - - def build_Getattr(self, o): - parent = self.build(o.expr) - return getattr(parent, o.attrname) - - def build_NoneType(self, o): - return None - - def build_UnarySub(self, o): - return -self.build(o.getChildren()[0]) - - def build_UnaryAdd(self, o): - return self.build(o.getChildren()[0]) - - -class _Builder3: - - def build(self, o): - m = getattr(self, 'build_' + o.__class__.__name__, None) - if m is None: - raise TypeError("unrepr does not recognize %s" % - repr(o.__class__.__name__)) - return m(o) - - def astnode(self, s): - """Return a Python3 ast Node compiled from a string.""" - try: - import ast - except ImportError: - # Fallback to eval when ast package is not available, - # e.g. IronPython 1.0. - return eval(s) - - p = ast.parse("__tempvalue__ = " + s) - return p.body[0].value - - def build_Subscript(self, o): - return self.build(o.value)[self.build(o.slice)] - - def build_Index(self, o): - return self.build(o.value) - - def build_Call(self, o): - callee = self.build(o.func) - - if o.args is None: - args = () - else: - args = tuple([self.build(a) for a in o.args]) - - if o.starargs is None: - starargs = () - else: - starargs = self.build(o.starargs) - - if o.kwargs is None: - kwargs = {} - else: - kwargs = self.build(o.kwargs) - - return callee(*(args + starargs), **kwargs) - - def build_List(self, o): - return list(map(self.build, o.elts)) - - def build_Str(self, o): - return o.s - - def build_Num(self, o): - return o.n - - def build_Dict(self, o): - return dict([(self.build(k), self.build(v)) - for k, v in zip(o.keys, o.values)]) - - def build_Tuple(self, o): - return tuple(self.build_List(o)) - - def build_Name(self, o): - name = o.id - if name == 'None': - return None - if name == 'True': - return True - if name == 'False': - return False - - # See if the Name is a package or module. If it is, import it. - try: - return modules(name) - except ImportError: - pass - - # See if the Name is in builtins. - try: - import builtins - return getattr(builtins, name) - except AttributeError: - pass - - raise TypeError("unrepr could not resolve the name %s" % repr(name)) - - def build_NameConstant(self, o): - return o.value - - def build_UnaryOp(self, o): - op, operand = map(self.build, [o.op, o.operand]) - return op(operand) - - def build_BinOp(self, o): - left, op, right = map(self.build, [o.left, o.op, o.right]) - return op(left, right) - - def build_Add(self, o): - return _operator.add - - def build_Mult(self, o): - return _operator.mul - - def build_USub(self, o): - return _operator.neg - - def build_Attribute(self, o): - parent = self.build(o.value) - return getattr(parent, o.attr) - - def build_NoneType(self, o): - return None - - -def unrepr(s): - """Return a Python object compiled from a string.""" - if not s: - return s - if sys.version_info < (3, 0): - b = _Builder2() - else: - b = _Builder3() - obj = b.astnode(s) - return b.build(obj) - - -def modules(modulePath): - """Load a module and retrieve a reference to that module.""" - __import__(modulePath) - return sys.modules[modulePath] - - -def attributes(full_attribute_name): - """Load a module and retrieve an attribute of that module.""" - - # Parse out the path, module, and attribute - last_dot = full_attribute_name.rfind(".") - attr_name = full_attribute_name[last_dot + 1:] - mod_path = full_attribute_name[:last_dot] - - mod = modules(mod_path) - # Let an AttributeError propagate outward. - try: - attr = getattr(mod, attr_name) - except AttributeError: - raise AttributeError("'%s' object has no attribute '%s'" - % (mod_path, attr_name)) - - # Return a reference to the attribute. - return attr diff --git a/lib/cherrypy/lib/sessions.py b/lib/cherrypy/lib/sessions.py deleted file mode 100644 index 37556363..00000000 --- a/lib/cherrypy/lib/sessions.py +++ /dev/null @@ -1,968 +0,0 @@ -"""Session implementation for CherryPy. - -You need to edit your config file to use sessions. Here's an example:: - - [/] - tools.sessions.on = True - tools.sessions.storage_type = "file" - tools.sessions.storage_path = "/home/site/sessions" - tools.sessions.timeout = 60 - -This sets the session to be stored in files in the directory -/home/site/sessions, and the session timeout to 60 minutes. If you omit -``storage_type`` the sessions will be saved in RAM. -``tools.sessions.on`` is the only required line for working sessions, -the rest are optional. - -By default, the session ID is passed in a cookie, so the client's browser must -have cookies enabled for your site. - -To set data for the current session, use -``cherrypy.session['fieldname'] = 'fieldvalue'``; -to get data use ``cherrypy.session.get('fieldname')``. - -================ -Locking sessions -================ - -By default, the ``'locking'`` mode of sessions is ``'implicit'``, which means -the session is locked early and unlocked late. Be mindful of this default mode -for any requests that take a long time to process (streaming responses, -expensive calculations, database lookups, API calls, etc), as other concurrent -requests that also utilize sessions will hang until the session is unlocked. - -If you want to control when the session data is locked and unlocked, -set ``tools.sessions.locking = 'explicit'``. Then call -``cherrypy.session.acquire_lock()`` and ``cherrypy.session.release_lock()``. -Regardless of which mode you use, the session is guaranteed to be unlocked when -the request is complete. - -================= -Expiring Sessions -================= - -You can force a session to expire with :func:`cherrypy.lib.sessions.expire`. -Simply call that function at the point you want the session to expire, and it -will cause the session cookie to expire client-side. - -=========================== -Session Fixation Protection -=========================== - -If CherryPy receives, via a request cookie, a session id that it does not -recognize, it will reject that id and create a new one to return in the -response cookie. This `helps prevent session fixation attacks -`_. -However, CherryPy "recognizes" a session id by looking up the saved session -data for that id. Therefore, if you never save any session data, -**you will get a new session id for every request**. - -================ -Sharing Sessions -================ - -If you run multiple instances of CherryPy (for example via mod_python behind -Apache prefork), you most likely cannot use the RAM session backend, since each -instance of CherryPy will have its own memory space. Use a different backend -instead, and verify that all instances are pointing at the same file or db -location. Alternately, you might try a load balancer which makes sessions -"sticky". Google is your friend, there. - -================ -Expiration Dates -================ - -The response cookie will possess an expiration date to inform the client at -which point to stop sending the cookie back in requests. If the server time -and client time differ, expect sessions to be unreliable. **Make sure the -system time of your server is accurate**. - -CherryPy defaults to a 60-minute session timeout, which also applies to the -cookie which is sent to the client. Unfortunately, some versions of Safari -("4 public beta" on Windows XP at least) appear to have a bug in their parsing -of the GMT expiration date--they appear to interpret the date as one hour in -the past. Sixty minutes minus one hour is pretty close to zero, so you may -experience this bug as a new session id for every request, unless the requests -are less than one second apart. To fix, try increasing the session.timeout. - -On the other extreme, some users report Firefox sending cookies after their -expiration date, although this was on a system with an inaccurate system time. -Maybe FF doesn't trust system time. -""" -import sys -import datetime -import os -import time -import threading -import types - -import cherrypy -from cherrypy._cpcompat import copyitems, pickle, random20, unicodestr -from cherrypy.lib import httputil -from cherrypy.lib import lockfile -from cherrypy.lib import locking -from cherrypy.lib import is_iterator - -missing = object() - - -class Session(object): - - """A CherryPy dict-like Session object (one per request).""" - - _id = None - - id_observers = None - "A list of callbacks to which to pass new id's." - - def _get_id(self): - return self._id - - def _set_id(self, value): - self._id = value - for o in self.id_observers: - o(value) - id = property(_get_id, _set_id, doc="The current session ID.") - - timeout = 60 - "Number of minutes after which to delete session data." - - locked = False - """ - If True, this session instance has exclusive read/write access - to session data.""" - - loaded = False - """ - If True, data has been retrieved from storage. This should happen - automatically on the first attempt to access session data.""" - - clean_thread = None - "Class-level Monitor which calls self.clean_up." - - clean_freq = 5 - "The poll rate for expired session cleanup in minutes." - - originalid = None - "The session id passed by the client. May be missing or unsafe." - - missing = False - "True if the session requested by the client did not exist." - - regenerated = False - """ - True if the application called session.regenerate(). This is not set by - internal calls to regenerate the session id.""" - - debug = False - "If True, log debug information." - - # --------------------- Session management methods --------------------- # - - def __init__(self, id=None, **kwargs): - self.id_observers = [] - self._data = {} - - for k, v in kwargs.items(): - setattr(self, k, v) - - self.originalid = id - self.missing = False - if id is None: - if self.debug: - cherrypy.log('No id given; making a new one', 'TOOLS.SESSIONS') - self._regenerate() - else: - self.id = id - if self._exists(): - if self.debug: - cherrypy.log('Set id to %s.' % id, 'TOOLS.SESSIONS') - else: - if self.debug: - cherrypy.log('Expired or malicious session %r; ' - 'making a new one' % id, 'TOOLS.SESSIONS') - # Expired or malicious session. Make a new one. - # See https://bitbucket.org/cherrypy/cherrypy/issue/709. - self.id = None - self.missing = True - self._regenerate() - - def now(self): - """Generate the session specific concept of 'now'. - - Other session providers can override this to use alternative, - possibly timezone aware, versions of 'now'. - """ - return datetime.datetime.now() - - def regenerate(self): - """Replace the current session (with a new id).""" - self.regenerated = True - self._regenerate() - - def _regenerate(self): - if self.id is not None: - if self.debug: - cherrypy.log( - 'Deleting the existing session %r before ' - 'regeneration.' % self.id, - 'TOOLS.SESSIONS') - self.delete() - - old_session_was_locked = self.locked - if old_session_was_locked: - self.release_lock() - if self.debug: - cherrypy.log('Old lock released.', 'TOOLS.SESSIONS') - - self.id = None - while self.id is None: - self.id = self.generate_id() - # Assert that the generated id is not already stored. - if self._exists(): - self.id = None - if self.debug: - cherrypy.log('Set id to generated %s.' % self.id, - 'TOOLS.SESSIONS') - - if old_session_was_locked: - self.acquire_lock() - if self.debug: - cherrypy.log('Regenerated lock acquired.', 'TOOLS.SESSIONS') - - def clean_up(self): - """Clean up expired sessions.""" - pass - - def generate_id(self): - """Return a new session id.""" - return random20() - - def save(self): - """Save session data.""" - try: - # If session data has never been loaded then it's never been - # accessed: no need to save it - if self.loaded: - t = datetime.timedelta(seconds=self.timeout * 60) - expiration_time = self.now() + t - if self.debug: - cherrypy.log('Saving session %r with expiry %s' % - (self.id, expiration_time), - 'TOOLS.SESSIONS') - self._save(expiration_time) - else: - if self.debug: - cherrypy.log( - 'Skipping save of session %r (no session loaded).' % - self.id, 'TOOLS.SESSIONS') - finally: - if self.locked: - # Always release the lock if the user didn't release it - self.release_lock() - if self.debug: - cherrypy.log('Lock released after save.', 'TOOLS.SESSIONS') - - def load(self): - """Copy stored session data into this session instance.""" - data = self._load() - # data is either None or a tuple (session_data, expiration_time) - if data is None or data[1] < self.now(): - if self.debug: - cherrypy.log('Expired session %r, flushing data.' % self.id, - 'TOOLS.SESSIONS') - self._data = {} - else: - if self.debug: - cherrypy.log('Data loaded for session %r.' % self.id, - 'TOOLS.SESSIONS') - self._data = data[0] - self.loaded = True - - # Stick the clean_thread in the class, not the instance. - # The instances are created and destroyed per-request. - cls = self.__class__ - if self.clean_freq and not cls.clean_thread: - # clean_up is an instancemethod and not a classmethod, - # so that tool config can be accessed inside the method. - t = cherrypy.process.plugins.Monitor( - cherrypy.engine, self.clean_up, self.clean_freq * 60, - name='Session cleanup') - t.subscribe() - cls.clean_thread = t - t.start() - if self.debug: - cherrypy.log('Started cleanup thread.', 'TOOLS.SESSIONS') - - def delete(self): - """Delete stored session data.""" - self._delete() - if self.debug: - cherrypy.log('Deleted session %s.' % self.id, - 'TOOLS.SESSIONS') - - # -------------------- Application accessor methods -------------------- # - - def __getitem__(self, key): - if not self.loaded: - self.load() - return self._data[key] - - def __setitem__(self, key, value): - if not self.loaded: - self.load() - self._data[key] = value - - def __delitem__(self, key): - if not self.loaded: - self.load() - del self._data[key] - - def pop(self, key, default=missing): - """Remove the specified key and return the corresponding value. - If key is not found, default is returned if given, - otherwise KeyError is raised. - """ - if not self.loaded: - self.load() - if default is missing: - return self._data.pop(key) - else: - return self._data.pop(key, default) - - def __contains__(self, key): - if not self.loaded: - self.load() - return key in self._data - - if hasattr({}, 'has_key'): - def has_key(self, key): - """D.has_key(k) -> True if D has a key k, else False.""" - if not self.loaded: - self.load() - return key in self._data - - def get(self, key, default=None): - """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" - if not self.loaded: - self.load() - return self._data.get(key, default) - - def update(self, d): - """D.update(E) -> None. Update D from E: for k in E: D[k] = E[k].""" - if not self.loaded: - self.load() - self._data.update(d) - - def setdefault(self, key, default=None): - """D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D.""" - if not self.loaded: - self.load() - return self._data.setdefault(key, default) - - def clear(self): - """D.clear() -> None. Remove all items from D.""" - if not self.loaded: - self.load() - self._data.clear() - - def keys(self): - """D.keys() -> list of D's keys.""" - if not self.loaded: - self.load() - return self._data.keys() - - def items(self): - """D.items() -> list of D's (key, value) pairs, as 2-tuples.""" - if not self.loaded: - self.load() - return self._data.items() - - def values(self): - """D.values() -> list of D's values.""" - if not self.loaded: - self.load() - return self._data.values() - - -class RamSession(Session): - - # Class-level objects. Don't rebind these! - cache = {} - locks = {} - - def clean_up(self): - """Clean up expired sessions.""" - - now = self.now() - for _id, (data, expiration_time) in copyitems(self.cache): - if expiration_time <= now: - try: - del self.cache[_id] - except KeyError: - pass - try: - if self.locks[_id].acquire(blocking=False): - lock = self.locks.pop(_id) - lock.release() - except KeyError: - pass - - # added to remove obsolete lock objects - for _id in list(self.locks): - if _id not in self.cache and self.locks[_id].acquire(blocking=False): - lock = self.locks.pop(_id) - lock.release() - - def _exists(self): - return self.id in self.cache - - def _load(self): - return self.cache.get(self.id) - - def _save(self, expiration_time): - self.cache[self.id] = (self._data, expiration_time) - - def _delete(self): - self.cache.pop(self.id, None) - - def acquire_lock(self): - """Acquire an exclusive lock on the currently-loaded session data.""" - self.locked = True - self.locks.setdefault(self.id, threading.RLock()).acquire() - - def release_lock(self): - """Release the lock on the currently-loaded session data.""" - self.locks[self.id].release() - self.locked = False - - def __len__(self): - """Return the number of active sessions.""" - return len(self.cache) - - -class FileSession(Session): - - """Implementation of the File backend for sessions - - storage_path - The folder where session data will be saved. Each session - will be saved as pickle.dump(data, expiration_time) in its own file; - the filename will be self.SESSION_PREFIX + self.id. - - lock_timeout - A timedelta or numeric seconds indicating how long - to block acquiring a lock. If None (default), acquiring a lock - will block indefinitely. - """ - - SESSION_PREFIX = 'session-' - LOCK_SUFFIX = '.lock' - pickle_protocol = pickle.HIGHEST_PROTOCOL - - def __init__(self, id=None, **kwargs): - # The 'storage_path' arg is required for file-based sessions. - kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) - kwargs.setdefault('lock_timeout', None) - - Session.__init__(self, id=id, **kwargs) - - # validate self.lock_timeout - if isinstance(self.lock_timeout, (int, float)): - self.lock_timeout = datetime.timedelta(seconds=self.lock_timeout) - if not isinstance(self.lock_timeout, (datetime.timedelta, type(None))): - raise ValueError("Lock timeout must be numeric seconds or " - "a timedelta instance.") - - def setup(cls, **kwargs): - """Set up the storage system for file-based sessions. - - This should only be called once per process; this will be done - automatically when using sessions.init (as the built-in Tool does). - """ - # The 'storage_path' arg is required for file-based sessions. - kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) - - for k, v in kwargs.items(): - setattr(cls, k, v) - setup = classmethod(setup) - - def _get_file_path(self): - f = os.path.join(self.storage_path, self.SESSION_PREFIX + self.id) - if not os.path.abspath(f).startswith(self.storage_path): - raise cherrypy.HTTPError(400, "Invalid session id in cookie.") - return f - - def _exists(self): - path = self._get_file_path() - return os.path.exists(path) - - def _load(self, path=None): - assert self.locked, ("The session load without being locked. " - "Check your tools' priority levels.") - if path is None: - path = self._get_file_path() - try: - f = open(path, "rb") - try: - return pickle.load(f) - finally: - f.close() - except (IOError, EOFError): - e = sys.exc_info()[1] - if self.debug: - cherrypy.log("Error loading the session pickle: %s" % - e, 'TOOLS.SESSIONS') - return None - - def _save(self, expiration_time): - assert self.locked, ("The session was saved without being locked. " - "Check your tools' priority levels.") - f = open(self._get_file_path(), "wb") - try: - pickle.dump((self._data, expiration_time), f, self.pickle_protocol) - finally: - f.close() - - def _delete(self): - assert self.locked, ("The session deletion without being locked. " - "Check your tools' priority levels.") - try: - os.unlink(self._get_file_path()) - except OSError: - pass - - def acquire_lock(self, path=None): - """Acquire an exclusive lock on the currently-loaded session data.""" - if path is None: - path = self._get_file_path() - path += self.LOCK_SUFFIX - checker = locking.LockChecker(self.id, self.lock_timeout) - while not checker.expired(): - try: - self.lock = lockfile.LockFile(path) - except lockfile.LockError: - time.sleep(0.1) - else: - break - self.locked = True - if self.debug: - cherrypy.log('Lock acquired.', 'TOOLS.SESSIONS') - - def release_lock(self, path=None): - """Release the lock on the currently-loaded session data.""" - self.lock.release() - self.lock.remove() - self.locked = False - - def clean_up(self): - """Clean up expired sessions.""" - now = self.now() - # Iterate over all session files in self.storage_path - for fname in os.listdir(self.storage_path): - if (fname.startswith(self.SESSION_PREFIX) - and not fname.endswith(self.LOCK_SUFFIX)): - # We have a session file: lock and load it and check - # if it's expired. If it fails, nevermind. - path = os.path.join(self.storage_path, fname) - self.acquire_lock(path) - if self.debug: - # This is a bit of a hack, since we're calling clean_up - # on the first instance rather than the entire class, - # so depending on whether you have "debug" set on the - # path of the first session called, this may not run. - cherrypy.log('Cleanup lock acquired.', 'TOOLS.SESSIONS') - - try: - contents = self._load(path) - # _load returns None on IOError - if contents is not None: - data, expiration_time = contents - if expiration_time < now: - # Session expired: deleting it - os.unlink(path) - finally: - self.release_lock(path) - - def __len__(self): - """Return the number of active sessions.""" - return len([fname for fname in os.listdir(self.storage_path) - if (fname.startswith(self.SESSION_PREFIX) - and not fname.endswith(self.LOCK_SUFFIX))]) - - -class PostgresqlSession(Session): - - """ Implementation of the PostgreSQL backend for sessions. It assumes - a table like this:: - - create table session ( - id varchar(40), - data text, - expiration_time timestamp - ) - - You must provide your own get_db function. - """ - - pickle_protocol = pickle.HIGHEST_PROTOCOL - - def __init__(self, id=None, **kwargs): - Session.__init__(self, id, **kwargs) - self.cursor = self.db.cursor() - - def setup(cls, **kwargs): - """Set up the storage system for Postgres-based sessions. - - This should only be called once per process; this will be done - automatically when using sessions.init (as the built-in Tool does). - """ - for k, v in kwargs.items(): - setattr(cls, k, v) - - self.db = self.get_db() - setup = classmethod(setup) - - def __del__(self): - if self.cursor: - self.cursor.close() - self.db.commit() - - def _exists(self): - # Select session data from table - self.cursor.execute('select data, expiration_time from session ' - 'where id=%s', (self.id,)) - rows = self.cursor.fetchall() - return bool(rows) - - def _load(self): - # Select session data from table - self.cursor.execute('select data, expiration_time from session ' - 'where id=%s', (self.id,)) - rows = self.cursor.fetchall() - if not rows: - return None - - pickled_data, expiration_time = rows[0] - data = pickle.loads(pickled_data) - return data, expiration_time - - def _save(self, expiration_time): - pickled_data = pickle.dumps(self._data, self.pickle_protocol) - self.cursor.execute('update session set data = %s, ' - 'expiration_time = %s where id = %s', - (pickled_data, expiration_time, self.id)) - - def _delete(self): - self.cursor.execute('delete from session where id=%s', (self.id,)) - - def acquire_lock(self): - """Acquire an exclusive lock on the currently-loaded session data.""" - # We use the "for update" clause to lock the row - self.locked = True - self.cursor.execute('select id from session where id=%s for update', - (self.id,)) - if self.debug: - cherrypy.log('Lock acquired.', 'TOOLS.SESSIONS') - - def release_lock(self): - """Release the lock on the currently-loaded session data.""" - # We just close the cursor and that will remove the lock - # introduced by the "for update" clause - self.cursor.close() - self.locked = False - - def clean_up(self): - """Clean up expired sessions.""" - self.cursor.execute('delete from session where expiration_time < %s', - (self.now(),)) - - -class MemcachedSession(Session): - - # The most popular memcached client for Python isn't thread-safe. - # Wrap all .get and .set operations in a single lock. - mc_lock = threading.RLock() - - # This is a seperate set of locks per session id. - locks = {} - - servers = ['127.0.0.1:11211'] - - def setup(cls, **kwargs): - """Set up the storage system for memcached-based sessions. - - This should only be called once per process; this will be done - automatically when using sessions.init (as the built-in Tool does). - """ - for k, v in kwargs.items(): - setattr(cls, k, v) - - import memcache - cls.cache = memcache.Client(cls.servers) - setup = classmethod(setup) - - def _get_id(self): - return self._id - - def _set_id(self, value): - # This encode() call is where we differ from the superclass. - # Memcache keys MUST be byte strings, not unicode. - if isinstance(value, unicodestr): - value = value.encode('utf-8') - - self._id = value - for o in self.id_observers: - o(value) - id = property(_get_id, _set_id, doc="The current session ID.") - - def _exists(self): - self.mc_lock.acquire() - try: - return bool(self.cache.get(self.id)) - finally: - self.mc_lock.release() - - def _load(self): - self.mc_lock.acquire() - try: - return self.cache.get(self.id) - finally: - self.mc_lock.release() - - def _save(self, expiration_time): - # Send the expiration time as "Unix time" (seconds since 1/1/1970) - td = int(time.mktime(expiration_time.timetuple())) - self.mc_lock.acquire() - try: - if not self.cache.set(self.id, (self._data, expiration_time), td): - raise AssertionError( - "Session data for id %r not set." % self.id) - finally: - self.mc_lock.release() - - def _delete(self): - self.cache.delete(self.id) - - def acquire_lock(self): - """Acquire an exclusive lock on the currently-loaded session data.""" - self.locked = True - self.locks.setdefault(self.id, threading.RLock()).acquire() - if self.debug: - cherrypy.log('Lock acquired.', 'TOOLS.SESSIONS') - - def release_lock(self): - """Release the lock on the currently-loaded session data.""" - self.locks[self.id].release() - self.locked = False - - def __len__(self): - """Return the number of active sessions.""" - raise NotImplementedError - - -# Hook functions (for CherryPy tools) - -def save(): - """Save any changed session data.""" - - if not hasattr(cherrypy.serving, "session"): - return - request = cherrypy.serving.request - response = cherrypy.serving.response - - # Guard against running twice - if hasattr(request, "_sessionsaved"): - return - request._sessionsaved = True - - if response.stream: - # If the body is being streamed, we have to save the data - # *after* the response has been written out - request.hooks.attach('on_end_request', cherrypy.session.save) - else: - # If the body is not being streamed, we save the data now - # (so we can release the lock). - if is_iterator(response.body): - response.collapse_body() - cherrypy.session.save() -save.failsafe = True - - -def close(): - """Close the session object for this request.""" - sess = getattr(cherrypy.serving, "session", None) - if getattr(sess, "locked", False): - # If the session is still locked we release the lock - sess.release_lock() - if sess.debug: - cherrypy.log('Lock released on close.', 'TOOLS.SESSIONS') -close.failsafe = True -close.priority = 90 - - -def init(storage_type='ram', path=None, path_header=None, name='session_id', - timeout=60, domain=None, secure=False, clean_freq=5, - persistent=True, httponly=False, debug=False, **kwargs): - """Initialize session object (using cookies). - - storage_type - One of 'ram', 'file', 'postgresql', 'memcached'. This will be - used to look up the corresponding class in cherrypy.lib.sessions - globals. For example, 'file' will use the FileSession class. - - path - The 'path' value to stick in the response cookie metadata. - - path_header - If 'path' is None (the default), then the response - cookie 'path' will be pulled from request.headers[path_header]. - - name - The name of the cookie. - - timeout - The expiration timeout (in minutes) for the stored session data. - If 'persistent' is True (the default), this is also the timeout - for the cookie. - - domain - The cookie domain. - - secure - If False (the default) the cookie 'secure' value will not - be set. If True, the cookie 'secure' value will be set (to 1). - - clean_freq (minutes) - The poll rate for expired session cleanup. - - persistent - If True (the default), the 'timeout' argument will be used - to expire the cookie. If False, the cookie will not have an expiry, - and the cookie will be a "session cookie" which expires when the - browser is closed. - - httponly - If False (the default) the cookie 'httponly' value will not be set. - If True, the cookie 'httponly' value will be set (to 1). - - Any additional kwargs will be bound to the new Session instance, - and may be specific to the storage type. See the subclass of Session - you're using for more information. - """ - - request = cherrypy.serving.request - - # Guard against running twice - if hasattr(request, "_session_init_flag"): - return - request._session_init_flag = True - - # Check if request came with a session ID - id = None - if name in request.cookie: - id = request.cookie[name].value - if debug: - cherrypy.log('ID obtained from request.cookie: %r' % id, - 'TOOLS.SESSIONS') - - # Find the storage class and call setup (first time only). - storage_class = storage_type.title() + 'Session' - storage_class = globals()[storage_class] - if not hasattr(cherrypy, "session"): - if hasattr(storage_class, "setup"): - storage_class.setup(**kwargs) - - # Create and attach a new Session instance to cherrypy.serving. - # It will possess a reference to (and lock, and lazily load) - # the requested session data. - kwargs['timeout'] = timeout - kwargs['clean_freq'] = clean_freq - cherrypy.serving.session = sess = storage_class(id, **kwargs) - sess.debug = debug - - def update_cookie(id): - """Update the cookie every time the session id changes.""" - cherrypy.serving.response.cookie[name] = id - sess.id_observers.append(update_cookie) - - # Create cherrypy.session which will proxy to cherrypy.serving.session - if not hasattr(cherrypy, "session"): - cherrypy.session = cherrypy._ThreadLocalProxy('session') - - if persistent: - cookie_timeout = timeout - else: - # See http://support.microsoft.com/kb/223799/EN-US/ - # and http://support.mozilla.com/en-US/kb/Cookies - cookie_timeout = None - set_response_cookie(path=path, path_header=path_header, name=name, - timeout=cookie_timeout, domain=domain, secure=secure, - httponly=httponly) - - -def set_response_cookie(path=None, path_header=None, name='session_id', - timeout=60, domain=None, secure=False, httponly=False): - """Set a response cookie for the client. - - path - the 'path' value to stick in the response cookie metadata. - - path_header - if 'path' is None (the default), then the response - cookie 'path' will be pulled from request.headers[path_header]. - - name - the name of the cookie. - - timeout - the expiration timeout for the cookie. If 0 or other boolean - False, no 'expires' param will be set, and the cookie will be a - "session cookie" which expires when the browser is closed. - - domain - the cookie domain. - - secure - if False (the default) the cookie 'secure' value will not - be set. If True, the cookie 'secure' value will be set (to 1). - - httponly - If False (the default) the cookie 'httponly' value will not be set. - If True, the cookie 'httponly' value will be set (to 1). - - """ - # Set response cookie - cookie = cherrypy.serving.response.cookie - cookie[name] = cherrypy.serving.session.id - cookie[name]['path'] = ( - path or - cherrypy.serving.request.headers.get(path_header) or - '/' - ) - - # We'd like to use the "max-age" param as indicated in - # http://www.faqs.org/rfcs/rfc2109.html but IE doesn't - # save it to disk and the session is lost if people close - # the browser. So we have to use the old "expires" ... sigh ... -## cookie[name]['max-age'] = timeout * 60 - if timeout: - e = time.time() + (timeout * 60) - cookie[name]['expires'] = httputil.HTTPDate(e) - if domain is not None: - cookie[name]['domain'] = domain - if secure: - cookie[name]['secure'] = 1 - if httponly: - if not cookie[name].isReservedKey('httponly'): - raise ValueError("The httponly cookie token is not supported.") - cookie[name]['httponly'] = 1 - - -def expire(): - """Expire the current session cookie.""" - name = cherrypy.serving.request.config.get( - 'tools.sessions.name', 'session_id') - one_year = 60 * 60 * 24 * 365 - e = time.time() - one_year - cherrypy.serving.response.cookie[name]['expires'] = httputil.HTTPDate(e) diff --git a/lib/cherrypy/lib/static.py b/lib/cherrypy/lib/static.py deleted file mode 100644 index a630dae6..00000000 --- a/lib/cherrypy/lib/static.py +++ /dev/null @@ -1,378 +0,0 @@ -import os -import re -import stat -import mimetypes - -try: - from io import UnsupportedOperation -except ImportError: - UnsupportedOperation = object() - -import cherrypy -from cherrypy._cpcompat import ntob, unquote -from cherrypy.lib import cptools, httputil, file_generator_limited - - -mimetypes.init() -mimetypes.types_map['.dwg'] = 'image/x-dwg' -mimetypes.types_map['.ico'] = 'image/x-icon' -mimetypes.types_map['.bz2'] = 'application/x-bzip2' -mimetypes.types_map['.gz'] = 'application/x-gzip' - - -def serve_file(path, content_type=None, disposition=None, name=None, - debug=False): - """Set status, headers, and body in order to serve the given path. - - The Content-Type header will be set to the content_type arg, if provided. - If not provided, the Content-Type will be guessed by the file extension - of the 'path' argument. - - If disposition is not None, the Content-Disposition header will be set - to "; filename=". If name is None, it will be set - to the basename of path. If disposition is None, no Content-Disposition - header will be written. - """ - - response = cherrypy.serving.response - - # If path is relative, users should fix it by making path absolute. - # That is, CherryPy should not guess where the application root is. - # It certainly should *not* use cwd (since CP may be invoked from a - # variety of paths). If using tools.staticdir, you can make your relative - # paths become absolute by supplying a value for "tools.staticdir.root". - if not os.path.isabs(path): - msg = "'%s' is not an absolute path." % path - if debug: - cherrypy.log(msg, 'TOOLS.STATICFILE') - raise ValueError(msg) - - try: - st = os.stat(path) - except OSError: - if debug: - cherrypy.log('os.stat(%r) failed' % path, 'TOOLS.STATIC') - raise cherrypy.NotFound() - - # Check if path is a directory. - if stat.S_ISDIR(st.st_mode): - # Let the caller deal with it as they like. - if debug: - cherrypy.log('%r is a directory' % path, 'TOOLS.STATIC') - raise cherrypy.NotFound() - - # Set the Last-Modified response header, so that - # modified-since validation code can work. - response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime) - cptools.validate_since() - - if content_type is None: - # Set content-type based on filename extension - ext = "" - i = path.rfind('.') - if i != -1: - ext = path[i:].lower() - content_type = mimetypes.types_map.get(ext, None) - if content_type is not None: - response.headers['Content-Type'] = content_type - if debug: - cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC') - - cd = None - if disposition is not None: - if name is None: - name = os.path.basename(path) - cd = '%s; filename="%s"' % (disposition, name) - response.headers["Content-Disposition"] = cd - if debug: - cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') - - # Set Content-Length and use an iterable (file object) - # this way CP won't load the whole file in memory - content_length = st.st_size - fileobj = open(path, 'rb') - return _serve_fileobj(fileobj, content_type, content_length, debug=debug) - - -def serve_fileobj(fileobj, content_type=None, disposition=None, name=None, - debug=False): - """Set status, headers, and body in order to serve the given file object. - - The Content-Type header will be set to the content_type arg, if provided. - - If disposition is not None, the Content-Disposition header will be set - to "; filename=". If name is None, 'filename' will - not be set. If disposition is None, no Content-Disposition header will - be written. - - CAUTION: If the request contains a 'Range' header, one or more seek()s will - be performed on the file object. This may cause undesired behavior if - the file object is not seekable. It could also produce undesired results - if the caller set the read position of the file object prior to calling - serve_fileobj(), expecting that the data would be served starting from that - position. - """ - - response = cherrypy.serving.response - - try: - st = os.fstat(fileobj.fileno()) - except AttributeError: - if debug: - cherrypy.log('os has no fstat attribute', 'TOOLS.STATIC') - content_length = None - except UnsupportedOperation: - content_length = None - else: - # Set the Last-Modified response header, so that - # modified-since validation code can work. - response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime) - cptools.validate_since() - content_length = st.st_size - - if content_type is not None: - response.headers['Content-Type'] = content_type - if debug: - cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC') - - cd = None - if disposition is not None: - if name is None: - cd = disposition - else: - cd = '%s; filename="%s"' % (disposition, name) - response.headers["Content-Disposition"] = cd - if debug: - cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') - - return _serve_fileobj(fileobj, content_type, content_length, debug=debug) - - -def _serve_fileobj(fileobj, content_type, content_length, debug=False): - """Internal. Set response.body to the given file object, perhaps ranged.""" - response = cherrypy.serving.response - - # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code - request = cherrypy.serving.request - if request.protocol >= (1, 1): - response.headers["Accept-Ranges"] = "bytes" - r = httputil.get_ranges(request.headers.get('Range'), content_length) - if r == []: - response.headers['Content-Range'] = "bytes */%s" % content_length - message = ("Invalid Range (first-byte-pos greater than " - "Content-Length)") - if debug: - cherrypy.log(message, 'TOOLS.STATIC') - raise cherrypy.HTTPError(416, message) - - if r: - if len(r) == 1: - # Return a single-part response. - start, stop = r[0] - if stop > content_length: - stop = content_length - r_len = stop - start - if debug: - cherrypy.log( - 'Single part; start: %r, stop: %r' % (start, stop), - 'TOOLS.STATIC') - response.status = "206 Partial Content" - response.headers['Content-Range'] = ( - "bytes %s-%s/%s" % (start, stop - 1, content_length)) - response.headers['Content-Length'] = r_len - fileobj.seek(start) - response.body = file_generator_limited(fileobj, r_len) - else: - # Return a multipart/byteranges response. - response.status = "206 Partial Content" - try: - # Python 3 - from email.generator import _make_boundary as make_boundary - except ImportError: - # Python 2 - from mimetools import choose_boundary as make_boundary - boundary = make_boundary() - ct = "multipart/byteranges; boundary=%s" % boundary - response.headers['Content-Type'] = ct - if "Content-Length" in response.headers: - # Delete Content-Length header so finalize() recalcs it. - del response.headers["Content-Length"] - - def file_ranges(): - # Apache compatibility: - yield ntob("\r\n") - - for start, stop in r: - if debug: - cherrypy.log( - 'Multipart; start: %r, stop: %r' % ( - start, stop), - 'TOOLS.STATIC') - yield ntob("--" + boundary, 'ascii') - yield ntob("\r\nContent-type: %s" % content_type, - 'ascii') - yield ntob( - "\r\nContent-range: bytes %s-%s/%s\r\n\r\n" % ( - start, stop - 1, content_length), - 'ascii') - fileobj.seek(start) - gen = file_generator_limited(fileobj, stop - start) - for chunk in gen: - yield chunk - yield ntob("\r\n") - # Final boundary - yield ntob("--" + boundary + "--", 'ascii') - - # Apache compatibility: - yield ntob("\r\n") - response.body = file_ranges() - return response.body - else: - if debug: - cherrypy.log('No byteranges requested', 'TOOLS.STATIC') - - # Set Content-Length and use an iterable (file object) - # this way CP won't load the whole file in memory - response.headers['Content-Length'] = content_length - response.body = fileobj - return response.body - - -def serve_download(path, name=None): - """Serve 'path' as an application/x-download attachment.""" - # This is such a common idiom I felt it deserved its own wrapper. - return serve_file(path, "application/x-download", "attachment", name) - - -def _attempt(filename, content_types, debug=False): - if debug: - cherrypy.log('Attempting %r (content_types %r)' % - (filename, content_types), 'TOOLS.STATICDIR') - try: - # you can set the content types for a - # complete directory per extension - content_type = None - if content_types: - r, ext = os.path.splitext(filename) - content_type = content_types.get(ext[1:], None) - serve_file(filename, content_type=content_type, debug=debug) - return True - except cherrypy.NotFound: - # If we didn't find the static file, continue handling the - # request. We might find a dynamic handler instead. - if debug: - cherrypy.log('NotFound', 'TOOLS.STATICFILE') - return False - - -def staticdir(section, dir, root="", match="", content_types=None, index="", - debug=False): - """Serve a static resource from the given (root +) dir. - - match - If given, request.path_info will be searched for the given - regular expression before attempting to serve static content. - - content_types - If given, it should be a Python dictionary of - {file-extension: content-type} pairs, where 'file-extension' is - a string (e.g. "gif") and 'content-type' is the value to write - out in the Content-Type response header (e.g. "image/gif"). - - index - If provided, it should be the (relative) name of a file to - serve for directory requests. For example, if the dir argument is - '/home/me', the Request-URI is 'myapp', and the index arg is - 'index.html', the file '/home/me/myapp/index.html' will be sought. - """ - request = cherrypy.serving.request - if request.method not in ('GET', 'HEAD'): - if debug: - cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICDIR') - return False - - if match and not re.search(match, request.path_info): - if debug: - cherrypy.log('request.path_info %r does not match pattern %r' % - (request.path_info, match), 'TOOLS.STATICDIR') - return False - - # Allow the use of '~' to refer to a user's home directory. - dir = os.path.expanduser(dir) - - # If dir is relative, make absolute using "root". - if not os.path.isabs(dir): - if not root: - msg = "Static dir requires an absolute dir (or root)." - if debug: - cherrypy.log(msg, 'TOOLS.STATICDIR') - raise ValueError(msg) - dir = os.path.join(root, dir) - - # Determine where we are in the object tree relative to 'section' - # (where the static tool was defined). - if section == 'global': - section = "/" - section = section.rstrip(r"\/") - branch = request.path_info[len(section) + 1:] - branch = unquote(branch.lstrip(r"\/")) - - # If branch is "", filename will end in a slash - filename = os.path.join(dir, branch) - if debug: - cherrypy.log('Checking file %r to fulfill %r' % - (filename, request.path_info), 'TOOLS.STATICDIR') - - # There's a chance that the branch pulled from the URL might - # have ".." or similar uplevel attacks in it. Check that the final - # filename is a child of dir. - if not os.path.normpath(filename).startswith(os.path.normpath(dir)): - raise cherrypy.HTTPError(403) # Forbidden - - handled = _attempt(filename, content_types) - if not handled: - # Check for an index file if a folder was requested. - if index: - handled = _attempt(os.path.join(filename, index), content_types) - if handled: - request.is_index = filename[-1] in (r"\/") - return handled - - -def staticfile(filename, root=None, match="", content_types=None, debug=False): - """Serve a static resource from the given (root +) filename. - - match - If given, request.path_info will be searched for the given - regular expression before attempting to serve static content. - - content_types - If given, it should be a Python dictionary of - {file-extension: content-type} pairs, where 'file-extension' is - a string (e.g. "gif") and 'content-type' is the value to write - out in the Content-Type response header (e.g. "image/gif"). - - """ - request = cherrypy.serving.request - if request.method not in ('GET', 'HEAD'): - if debug: - cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICFILE') - return False - - if match and not re.search(match, request.path_info): - if debug: - cherrypy.log('request.path_info %r does not match pattern %r' % - (request.path_info, match), 'TOOLS.STATICFILE') - return False - - # If filename is relative, make absolute using "root". - if not os.path.isabs(filename): - if not root: - msg = "Static tool requires an absolute filename (got '%s')." % ( - filename,) - if debug: - cherrypy.log(msg, 'TOOLS.STATICFILE') - raise ValueError(msg) - filename = os.path.join(root, filename) - - return _attempt(filename, content_types, debug=debug) diff --git a/lib/cherrypy/lib/xmlrpcutil.py b/lib/cherrypy/lib/xmlrpcutil.py deleted file mode 100644 index 9fc9564f..00000000 --- a/lib/cherrypy/lib/xmlrpcutil.py +++ /dev/null @@ -1,57 +0,0 @@ -import sys - -import cherrypy -from cherrypy._cpcompat import ntob - - -def get_xmlrpclib(): - try: - import xmlrpc.client as x - except ImportError: - import xmlrpclib as x - return x - - -def process_body(): - """Return (params, method) from request body.""" - try: - return get_xmlrpclib().loads(cherrypy.request.body.read()) - except Exception: - return ('ERROR PARAMS', ), 'ERRORMETHOD' - - -def patched_path(path): - """Return 'path', doctored for RPC.""" - if not path.endswith('/'): - path += '/' - if path.startswith('/RPC2/'): - # strip the first /rpc2 - path = path[5:] - return path - - -def _set_response(body): - # The XML-RPC spec (http://www.xmlrpc.com/spec) says: - # "Unless there's a lower-level error, always return 200 OK." - # Since Python's xmlrpclib interprets a non-200 response - # as a "Protocol Error", we'll just return 200 every time. - response = cherrypy.response - response.status = '200 OK' - response.body = ntob(body, 'utf-8') - response.headers['Content-Type'] = 'text/xml' - response.headers['Content-Length'] = len(body) - - -def respond(body, encoding='utf-8', allow_none=0): - xmlrpclib = get_xmlrpclib() - if not isinstance(body, xmlrpclib.Fault): - body = (body,) - _set_response(xmlrpclib.dumps(body, methodresponse=1, - encoding=encoding, - allow_none=allow_none)) - - -def on_error(*args, **kwargs): - body = str(sys.exc_info()[1]) - xmlrpclib = get_xmlrpclib() - _set_response(xmlrpclib.dumps(xmlrpclib.Fault(1, body))) diff --git a/lib/cherrypy/process/__init__.py b/lib/cherrypy/process/__init__.py deleted file mode 100644 index f15b1237..00000000 --- a/lib/cherrypy/process/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -"""Site container for an HTTP server. - -A Web Site Process Bus object is used to connect applications, servers, -and frameworks with site-wide services such as daemonization, process -reload, signal handling, drop privileges, PID file management, logging -for all of these, and many more. - -The 'plugins' module defines a few abstract and concrete services for -use with the bus. Some use tool-specific channels; see the documentation -for each class. -""" - -from cherrypy.process.wspbus import bus -from cherrypy.process import plugins, servers diff --git a/lib/cherrypy/process/plugins.py b/lib/cherrypy/process/plugins.py deleted file mode 100644 index c787ba92..00000000 --- a/lib/cherrypy/process/plugins.py +++ /dev/null @@ -1,717 +0,0 @@ -"""Site services for use with a Web Site Process Bus.""" - -import os -import re -import signal as _signal -import sys -import time -import threading - -from cherrypy._cpcompat import basestring, get_daemon, get_thread_ident -from cherrypy._cpcompat import ntob, set, Timer, SetDaemonProperty - -# _module__file__base is used by Autoreload to make -# absolute any filenames retrieved from sys.modules which are not -# already absolute paths. This is to work around Python's quirk -# of importing the startup script and using a relative filename -# for it in sys.modules. -# -# Autoreload examines sys.modules afresh every time it runs. If an application -# changes the current directory by executing os.chdir(), then the next time -# Autoreload runs, it will not be able to find any filenames which are -# not absolute paths, because the current directory is not the same as when the -# module was first imported. Autoreload will then wrongly conclude the file -# has "changed", and initiate the shutdown/re-exec sequence. -# See ticket #917. -# For this workaround to have a decent probability of success, this module -# needs to be imported as early as possible, before the app has much chance -# to change the working directory. -_module__file__base = os.getcwd() - - -class SimplePlugin(object): - - """Plugin base class which auto-subscribes methods for known channels.""" - - bus = None - """A :class:`Bus `, usually cherrypy.engine. - """ - - def __init__(self, bus): - self.bus = bus - - def subscribe(self): - """Register this object as a (multi-channel) listener on the bus.""" - for channel in self.bus.listeners: - # Subscribe self.start, self.exit, etc. if present. - method = getattr(self, channel, None) - if method is not None: - self.bus.subscribe(channel, method) - - def unsubscribe(self): - """Unregister this object as a listener on the bus.""" - for channel in self.bus.listeners: - # Unsubscribe self.start, self.exit, etc. if present. - method = getattr(self, channel, None) - if method is not None: - self.bus.unsubscribe(channel, method) - - -class SignalHandler(object): - - """Register bus channels (and listeners) for system signals. - - You can modify what signals your application listens for, and what it does - when it receives signals, by modifying :attr:`SignalHandler.handlers`, - a dict of {signal name: callback} pairs. The default set is:: - - handlers = {'SIGTERM': self.bus.exit, - 'SIGHUP': self.handle_SIGHUP, - 'SIGUSR1': self.bus.graceful, - } - - The :func:`SignalHandler.handle_SIGHUP`` method calls - :func:`bus.restart()` - if the process is daemonized, but - :func:`bus.exit()` - if the process is attached to a TTY. This is because Unix window - managers tend to send SIGHUP to terminal windows when the user closes them. - - Feel free to add signals which are not available on every platform. - The :class:`SignalHandler` will ignore errors raised from attempting - to register handlers for unknown signals. - """ - - handlers = {} - """A map from signal names (e.g. 'SIGTERM') to handlers (e.g. bus.exit).""" - - signals = {} - """A map from signal numbers to names.""" - - for k, v in vars(_signal).items(): - if k.startswith('SIG') and not k.startswith('SIG_'): - signals[v] = k - del k, v - - def __init__(self, bus): - self.bus = bus - # Set default handlers - self.handlers = {'SIGTERM': self.bus.exit, - 'SIGHUP': self.handle_SIGHUP, - 'SIGUSR1': self.bus.graceful, - } - - if sys.platform[:4] == 'java': - del self.handlers['SIGUSR1'] - self.handlers['SIGUSR2'] = self.bus.graceful - self.bus.log("SIGUSR1 cannot be set on the JVM platform. " - "Using SIGUSR2 instead.") - self.handlers['SIGINT'] = self._jython_SIGINT_handler - - self._previous_handlers = {} - - def _jython_SIGINT_handler(self, signum=None, frame=None): - # See http://bugs.jython.org/issue1313 - self.bus.log('Keyboard Interrupt: shutting down bus') - self.bus.exit() - - def subscribe(self): - """Subscribe self.handlers to signals.""" - for sig, func in self.handlers.items(): - try: - self.set_handler(sig, func) - except ValueError: - pass - - def unsubscribe(self): - """Unsubscribe self.handlers from signals.""" - for signum, handler in self._previous_handlers.items(): - signame = self.signals[signum] - - if handler is None: - self.bus.log("Restoring %s handler to SIG_DFL." % signame) - handler = _signal.SIG_DFL - else: - self.bus.log("Restoring %s handler %r." % (signame, handler)) - - try: - our_handler = _signal.signal(signum, handler) - if our_handler is None: - self.bus.log("Restored old %s handler %r, but our " - "handler was not registered." % - (signame, handler), level=30) - except ValueError: - self.bus.log("Unable to restore %s handler %r." % - (signame, handler), level=40, traceback=True) - - def set_handler(self, signal, listener=None): - """Subscribe a handler for the given signal (number or name). - - If the optional 'listener' argument is provided, it will be - subscribed as a listener for the given signal's channel. - - If the given signal name or number is not available on the current - platform, ValueError is raised. - """ - if isinstance(signal, basestring): - signum = getattr(_signal, signal, None) - if signum is None: - raise ValueError("No such signal: %r" % signal) - signame = signal - else: - try: - signame = self.signals[signal] - except KeyError: - raise ValueError("No such signal: %r" % signal) - signum = signal - - prev = _signal.signal(signum, self._handle_signal) - self._previous_handlers[signum] = prev - - if listener is not None: - self.bus.log("Listening for %s." % signame) - self.bus.subscribe(signame, listener) - - def _handle_signal(self, signum=None, frame=None): - """Python signal handler (self.set_handler subscribes it for you).""" - signame = self.signals[signum] - self.bus.log("Caught signal %s." % signame) - self.bus.publish(signame) - - def handle_SIGHUP(self): - """Restart if daemonized, else exit.""" - if os.isatty(sys.stdin.fileno()): - # not daemonized (may be foreground or background) - self.bus.log("SIGHUP caught but not daemonized. Exiting.") - self.bus.exit() - else: - self.bus.log("SIGHUP caught while daemonized. Restarting.") - self.bus.restart() - - -try: - import pwd - import grp -except ImportError: - pwd, grp = None, None - - -class DropPrivileges(SimplePlugin): - - """Drop privileges. uid/gid arguments not available on Windows. - - Special thanks to `Gavin Baker `_ - """ - - def __init__(self, bus, umask=None, uid=None, gid=None): - SimplePlugin.__init__(self, bus) - self.finalized = False - self.uid = uid - self.gid = gid - self.umask = umask - - def _get_uid(self): - return self._uid - - def _set_uid(self, val): - if val is not None: - if pwd is None: - self.bus.log("pwd module not available; ignoring uid.", - level=30) - val = None - elif isinstance(val, basestring): - val = pwd.getpwnam(val)[2] - self._uid = val - uid = property(_get_uid, _set_uid, - doc="The uid under which to run. Availability: Unix.") - - def _get_gid(self): - return self._gid - - def _set_gid(self, val): - if val is not None: - if grp is None: - self.bus.log("grp module not available; ignoring gid.", - level=30) - val = None - elif isinstance(val, basestring): - val = grp.getgrnam(val)[2] - self._gid = val - gid = property(_get_gid, _set_gid, - doc="The gid under which to run. Availability: Unix.") - - def _get_umask(self): - return self._umask - - def _set_umask(self, val): - if val is not None: - try: - os.umask - except AttributeError: - self.bus.log("umask function not available; ignoring umask.", - level=30) - val = None - self._umask = val - umask = property( - _get_umask, - _set_umask, - doc="""The default permission mode for newly created files and - directories. - - Usually expressed in octal format, for example, ``0644``. - Availability: Unix, Windows. - """) - - def start(self): - # uid/gid - def current_ids(): - """Return the current (uid, gid) if available.""" - name, group = None, None - if pwd: - name = pwd.getpwuid(os.getuid())[0] - if grp: - group = grp.getgrgid(os.getgid())[0] - return name, group - - if self.finalized: - if not (self.uid is None and self.gid is None): - self.bus.log('Already running as uid: %r gid: %r' % - current_ids()) - else: - if self.uid is None and self.gid is None: - if pwd or grp: - self.bus.log('uid/gid not set', level=30) - else: - self.bus.log('Started as uid: %r gid: %r' % current_ids()) - if self.gid is not None: - os.setgid(self.gid) - os.setgroups([]) - if self.uid is not None: - os.setuid(self.uid) - self.bus.log('Running as uid: %r gid: %r' % current_ids()) - - # umask - if self.finalized: - if self.umask is not None: - self.bus.log('umask already set to: %03o' % self.umask) - else: - if self.umask is None: - self.bus.log('umask not set', level=30) - else: - old_umask = os.umask(self.umask) - self.bus.log('umask old: %03o, new: %03o' % - (old_umask, self.umask)) - - self.finalized = True - # This is slightly higher than the priority for server.start - # in order to facilitate the most common use: starting on a low - # port (which requires root) and then dropping to another user. - start.priority = 77 - - -class Daemonizer(SimplePlugin): - - """Daemonize the running script. - - Use this with a Web Site Process Bus via:: - - Daemonizer(bus).subscribe() - - When this component finishes, the process is completely decoupled from - the parent environment. Please note that when this component is used, - the return code from the parent process will still be 0 if a startup - error occurs in the forked children. Errors in the initial daemonizing - process still return proper exit codes. Therefore, if you use this - plugin to daemonize, don't use the return code as an accurate indicator - of whether the process fully started. In fact, that return code only - indicates if the process succesfully finished the first fork. - """ - - def __init__(self, bus, stdin='/dev/null', stdout='/dev/null', - stderr='/dev/null'): - SimplePlugin.__init__(self, bus) - self.stdin = stdin - self.stdout = stdout - self.stderr = stderr - self.finalized = False - - def start(self): - if self.finalized: - self.bus.log('Already deamonized.') - - # forking has issues with threads: - # http://www.opengroup.org/onlinepubs/000095399/functions/fork.html - # "The general problem with making fork() work in a multi-threaded - # world is what to do with all of the threads..." - # So we check for active threads: - if threading.activeCount() != 1: - self.bus.log('There are %r active threads. ' - 'Daemonizing now may cause strange failures.' % - threading.enumerate(), level=30) - - # See http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16 - # (or http://www.faqs.org/faqs/unix-faq/programmer/faq/ section 1.7) - # and http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66012 - - # Finish up with the current stdout/stderr - sys.stdout.flush() - sys.stderr.flush() - - # Do first fork. - try: - pid = os.fork() - if pid == 0: - # This is the child process. Continue. - pass - else: - # This is the first parent. Exit, now that we've forked. - self.bus.log('Forking once.') - os._exit(0) - except OSError: - # Python raises OSError rather than returning negative numbers. - exc = sys.exc_info()[1] - sys.exit("%s: fork #1 failed: (%d) %s\n" - % (sys.argv[0], exc.errno, exc.strerror)) - - os.setsid() - - # Do second fork - try: - pid = os.fork() - if pid > 0: - self.bus.log('Forking twice.') - os._exit(0) # Exit second parent - except OSError: - exc = sys.exc_info()[1] - sys.exit("%s: fork #2 failed: (%d) %s\n" - % (sys.argv[0], exc.errno, exc.strerror)) - - os.chdir("/") - os.umask(0) - - si = open(self.stdin, "r") - so = open(self.stdout, "a+") - se = open(self.stderr, "a+") - - # os.dup2(fd, fd2) will close fd2 if necessary, - # so we don't explicitly close stdin/out/err. - # See http://docs.python.org/lib/os-fd-ops.html - os.dup2(si.fileno(), sys.stdin.fileno()) - os.dup2(so.fileno(), sys.stdout.fileno()) - os.dup2(se.fileno(), sys.stderr.fileno()) - - self.bus.log('Daemonized to PID: %s' % os.getpid()) - self.finalized = True - start.priority = 65 - - -class PIDFile(SimplePlugin): - - """Maintain a PID file via a WSPBus.""" - - def __init__(self, bus, pidfile): - SimplePlugin.__init__(self, bus) - self.pidfile = pidfile - self.finalized = False - - def start(self): - pid = os.getpid() - if self.finalized: - self.bus.log('PID %r already written to %r.' % (pid, self.pidfile)) - else: - open(self.pidfile, "wb").write(ntob("%s\n" % pid, 'utf8')) - self.bus.log('PID %r written to %r.' % (pid, self.pidfile)) - self.finalized = True - start.priority = 70 - - def exit(self): - try: - os.remove(self.pidfile) - self.bus.log('PID file removed: %r.' % self.pidfile) - except (KeyboardInterrupt, SystemExit): - raise - except: - pass - - -class PerpetualTimer(Timer): - - """A responsive subclass of threading.Timer whose run() method repeats. - - Use this timer only when you really need a very interruptible timer; - this checks its 'finished' condition up to 20 times a second, which can - results in pretty high CPU usage - """ - - def __init__(self, *args, **kwargs): - "Override parent constructor to allow 'bus' to be provided." - self.bus = kwargs.pop('bus', None) - super(PerpetualTimer, self).__init__(*args, **kwargs) - - def run(self): - while True: - self.finished.wait(self.interval) - if self.finished.isSet(): - return - try: - self.function(*self.args, **self.kwargs) - except Exception: - if self.bus: - self.bus.log( - "Error in perpetual timer thread function %r." % - self.function, level=40, traceback=True) - # Quit on first error to avoid massive logs. - raise - - -class BackgroundTask(SetDaemonProperty, threading.Thread): - - """A subclass of threading.Thread whose run() method repeats. - - Use this class for most repeating tasks. It uses time.sleep() to wait - for each interval, which isn't very responsive; that is, even if you call - self.cancel(), you'll have to wait until the sleep() call finishes before - the thread stops. To compensate, it defaults to being daemonic, which means - it won't delay stopping the whole process. - """ - - def __init__(self, interval, function, args=[], kwargs={}, bus=None): - threading.Thread.__init__(self) - self.interval = interval - self.function = function - self.args = args - self.kwargs = kwargs - self.running = False - self.bus = bus - - # default to daemonic - self.daemon = True - - def cancel(self): - self.running = False - - def run(self): - self.running = True - while self.running: - time.sleep(self.interval) - if not self.running: - return - try: - self.function(*self.args, **self.kwargs) - except Exception: - if self.bus: - self.bus.log("Error in background task thread function %r." - % self.function, level=40, traceback=True) - # Quit on first error to avoid massive logs. - raise - - -class Monitor(SimplePlugin): - - """WSPBus listener to periodically run a callback in its own thread.""" - - callback = None - """The function to call at intervals.""" - - frequency = 60 - """The time in seconds between callback runs.""" - - thread = None - """A :class:`BackgroundTask` - thread. - """ - - def __init__(self, bus, callback, frequency=60, name=None): - SimplePlugin.__init__(self, bus) - self.callback = callback - self.frequency = frequency - self.thread = None - self.name = name - - def start(self): - """Start our callback in its own background thread.""" - if self.frequency > 0: - threadname = self.name or self.__class__.__name__ - if self.thread is None: - self.thread = BackgroundTask(self.frequency, self.callback, - bus=self.bus) - self.thread.setName(threadname) - self.thread.start() - self.bus.log("Started monitor thread %r." % threadname) - else: - self.bus.log("Monitor thread %r already started." % threadname) - start.priority = 70 - - def stop(self): - """Stop our callback's background task thread.""" - if self.thread is None: - self.bus.log("No thread running for %s." % - self.name or self.__class__.__name__) - else: - if self.thread is not threading.currentThread(): - name = self.thread.getName() - self.thread.cancel() - if not get_daemon(self.thread): - self.bus.log("Joining %r" % name) - self.thread.join() - self.bus.log("Stopped thread %r." % name) - self.thread = None - - def graceful(self): - """Stop the callback's background task thread and restart it.""" - self.stop() - self.start() - - -class Autoreloader(Monitor): - - """Monitor which re-executes the process when files change. - - This :ref:`plugin` restarts the process (via :func:`os.execv`) - if any of the files it monitors change (or is deleted). By default, the - autoreloader monitors all imported modules; you can add to the - set by adding to ``autoreload.files``:: - - cherrypy.engine.autoreload.files.add(myFile) - - If there are imported files you do *not* wish to monitor, you can - adjust the ``match`` attribute, a regular expression. For example, - to stop monitoring cherrypy itself:: - - cherrypy.engine.autoreload.match = r'^(?!cherrypy).+' - - Like all :class:`Monitor` plugins, - the autoreload plugin takes a ``frequency`` argument. The default is - 1 second; that is, the autoreloader will examine files once each second. - """ - - files = None - """The set of files to poll for modifications.""" - - frequency = 1 - """The interval in seconds at which to poll for modified files.""" - - match = '.*' - """A regular expression by which to match filenames.""" - - def __init__(self, bus, frequency=1, match='.*'): - self.mtimes = {} - self.files = set() - self.match = match - Monitor.__init__(self, bus, self.run, frequency) - - def start(self): - """Start our own background task thread for self.run.""" - if self.thread is None: - self.mtimes = {} - Monitor.start(self) - start.priority = 70 - - def sysfiles(self): - """Return a Set of sys.modules filenames to monitor.""" - files = set() - for k, m in list(sys.modules.items()): - if re.match(self.match, k): - if ( - hasattr(m, '__loader__') and - hasattr(m.__loader__, 'archive') - ): - f = m.__loader__.archive - else: - f = getattr(m, '__file__', None) - if f is not None and not os.path.isabs(f): - # ensure absolute paths so a os.chdir() in the app - # doesn't break me - f = os.path.normpath( - os.path.join(_module__file__base, f)) - files.add(f) - return files - - def run(self): - """Reload the process if registered files have been modified.""" - for filename in self.sysfiles() | self.files: - if filename: - if filename.endswith('.pyc'): - filename = filename[:-1] - - oldtime = self.mtimes.get(filename, 0) - if oldtime is None: - # Module with no .py file. Skip it. - continue - - try: - mtime = os.stat(filename).st_mtime - except OSError: - # Either a module with no .py file, or it's been deleted. - mtime = None - - if filename not in self.mtimes: - # If a module has no .py file, this will be None. - self.mtimes[filename] = mtime - else: - if mtime is None or mtime > oldtime: - # The file has been deleted or modified. - self.bus.log("Restarting because %s changed." % - filename) - self.thread.cancel() - self.bus.log("Stopped thread %r." % - self.thread.getName()) - self.bus.restart() - return - - -class ThreadManager(SimplePlugin): - - """Manager for HTTP request threads. - - If you have control over thread creation and destruction, publish to - the 'acquire_thread' and 'release_thread' channels (for each thread). - This will register/unregister the current thread and publish to - 'start_thread' and 'stop_thread' listeners in the bus as needed. - - If threads are created and destroyed by code you do not control - (e.g., Apache), then, at the beginning of every HTTP request, - publish to 'acquire_thread' only. You should not publish to - 'release_thread' in this case, since you do not know whether - the thread will be re-used or not. The bus will call - 'stop_thread' listeners for you when it stops. - """ - - threads = None - """A map of {thread ident: index number} pairs.""" - - def __init__(self, bus): - self.threads = {} - SimplePlugin.__init__(self, bus) - self.bus.listeners.setdefault('acquire_thread', set()) - self.bus.listeners.setdefault('start_thread', set()) - self.bus.listeners.setdefault('release_thread', set()) - self.bus.listeners.setdefault('stop_thread', set()) - - def acquire_thread(self): - """Run 'start_thread' listeners for the current thread. - - If the current thread has already been seen, any 'start_thread' - listeners will not be run again. - """ - thread_ident = get_thread_ident() - if thread_ident not in self.threads: - # We can't just use get_ident as the thread ID - # because some platforms reuse thread ID's. - i = len(self.threads) + 1 - self.threads[thread_ident] = i - self.bus.publish('start_thread', i) - - def release_thread(self): - """Release the current thread and run 'stop_thread' listeners.""" - thread_ident = get_thread_ident() - i = self.threads.pop(thread_ident, None) - if i is not None: - self.bus.publish('stop_thread', i) - - def stop(self): - """Release all threads and run all 'stop_thread' listeners.""" - for thread_ident, i in self.threads.items(): - self.bus.publish('stop_thread', i) - self.threads.clear() - graceful = stop diff --git a/lib/cherrypy/process/servers.py b/lib/cherrypy/process/servers.py deleted file mode 100644 index fef37f77..00000000 --- a/lib/cherrypy/process/servers.py +++ /dev/null @@ -1,465 +0,0 @@ -""" -Starting in CherryPy 3.1, cherrypy.server is implemented as an -:ref:`Engine Plugin`. It's an instance of -:class:`cherrypy._cpserver.Server`, which is a subclass of -:class:`cherrypy.process.servers.ServerAdapter`. The ``ServerAdapter`` class -is designed to control other servers, as well. - -Multiple servers/ports -====================== - -If you need to start more than one HTTP server (to serve on multiple ports, or -protocols, etc.), you can manually register each one and then start them all -with engine.start:: - - s1 = ServerAdapter(cherrypy.engine, MyWSGIServer(host='0.0.0.0', port=80)) - s2 = ServerAdapter(cherrypy.engine, - another.HTTPServer(host='127.0.0.1', - SSL=True)) - s1.subscribe() - s2.subscribe() - cherrypy.engine.start() - -.. index:: SCGI - -FastCGI/SCGI -============ - -There are also Flup\ **F**\ CGIServer and Flup\ **S**\ CGIServer classes in -:mod:`cherrypy.process.servers`. To start an fcgi server, for example, -wrap an instance of it in a ServerAdapter:: - - addr = ('0.0.0.0', 4000) - f = servers.FlupFCGIServer(application=cherrypy.tree, bindAddress=addr) - s = servers.ServerAdapter(cherrypy.engine, httpserver=f, bind_addr=addr) - s.subscribe() - -The :doc:`cherryd` startup script will do the above for -you via its `-f` flag. -Note that you need to download and install `flup `_ -yourself, whether you use ``cherryd`` or not. - -.. _fastcgi: -.. index:: FastCGI - -FastCGI -------- - -A very simple setup lets your cherry run with FastCGI. -You just need the flup library, -plus a running Apache server (with ``mod_fastcgi``) or lighttpd server. - -CherryPy code -^^^^^^^^^^^^^ - -hello.py:: - - #!/usr/bin/python - import cherrypy - - class HelloWorld: - \"""Sample request handler class.\""" - def index(self): - return "Hello world!" - index.exposed = True - - cherrypy.tree.mount(HelloWorld()) - # CherryPy autoreload must be disabled for the flup server to work - cherrypy.config.update({'engine.autoreload.on':False}) - -Then run :doc:`/deployguide/cherryd` with the '-f' arg:: - - cherryd -c -d -f -i hello.py - -Apache -^^^^^^ - -At the top level in httpd.conf:: - - FastCgiIpcDir /tmp - FastCgiServer /path/to/cherry.fcgi -idle-timeout 120 -processes 4 - -And inside the relevant VirtualHost section:: - - # FastCGI config - AddHandler fastcgi-script .fcgi - ScriptAliasMatch (.*$) /path/to/cherry.fcgi$1 - -Lighttpd -^^^^^^^^ - -For `Lighttpd `_ you can follow these -instructions. Within ``lighttpd.conf`` make sure ``mod_fastcgi`` is -active within ``server.modules``. Then, within your ``$HTTP["host"]`` -directive, configure your fastcgi script like the following:: - - $HTTP["url"] =~ "" { - fastcgi.server = ( - "/" => ( - "script.fcgi" => ( - "bin-path" => "/path/to/your/script.fcgi", - "socket" => "/tmp/script.sock", - "check-local" => "disable", - "disable-time" => 1, - "min-procs" => 1, - "max-procs" => 1, # adjust as needed - ), - ), - ) - } # end of $HTTP["url"] =~ "^/" - -Please see `Lighttpd FastCGI Docs -`_ for -an explanation of the possible configuration options. -""" - -import sys -import time -import warnings - - -class ServerAdapter(object): - - """Adapter for an HTTP server. - - If you need to start more than one HTTP server (to serve on multiple - ports, or protocols, etc.), you can manually register each one and then - start them all with bus.start: - - s1 = ServerAdapter(bus, MyWSGIServer(host='0.0.0.0', port=80)) - s2 = ServerAdapter(bus, another.HTTPServer(host='127.0.0.1', SSL=True)) - s1.subscribe() - s2.subscribe() - bus.start() - """ - - def __init__(self, bus, httpserver=None, bind_addr=None): - self.bus = bus - self.httpserver = httpserver - self.bind_addr = bind_addr - self.interrupt = None - self.running = False - - def subscribe(self): - self.bus.subscribe('start', self.start) - self.bus.subscribe('stop', self.stop) - - def unsubscribe(self): - self.bus.unsubscribe('start', self.start) - self.bus.unsubscribe('stop', self.stop) - - def start(self): - """Start the HTTP server.""" - if self.bind_addr is None: - on_what = "unknown interface (dynamic?)" - elif isinstance(self.bind_addr, tuple): - on_what = self._get_base() - else: - on_what = "socket file: %s" % self.bind_addr - - if self.running: - self.bus.log("Already serving on %s" % on_what) - return - - self.interrupt = None - if not self.httpserver: - raise ValueError("No HTTP server has been created.") - - # Start the httpserver in a new thread. - if isinstance(self.bind_addr, tuple): - wait_for_free_port(*self.bind_addr) - - import threading - t = threading.Thread(target=self._start_http_thread) - t.setName("HTTPServer " + t.getName()) - t.start() - - self.wait() - self.running = True - self.bus.log("Serving on %s" % on_what) - start.priority = 75 - - def _get_base(self): - if not self.httpserver: - return '' - host, port = self.bind_addr - if getattr(self.httpserver, 'ssl_certificate', None): - scheme = "https" - if port != 443: - host += ":%s" % port - else: - scheme = "http" - if port != 80: - host += ":%s" % port - - return "%s://%s" % (scheme, host) - - def _start_http_thread(self): - """HTTP servers MUST be running in new threads, so that the - main thread persists to receive KeyboardInterrupt's. If an - exception is raised in the httpserver's thread then it's - trapped here, and the bus (and therefore our httpserver) - are shut down. - """ - try: - self.httpserver.start() - except KeyboardInterrupt: - self.bus.log(" hit: shutting down HTTP server") - self.interrupt = sys.exc_info()[1] - self.bus.exit() - except SystemExit: - self.bus.log("SystemExit raised: shutting down HTTP server") - self.interrupt = sys.exc_info()[1] - self.bus.exit() - raise - except: - self.interrupt = sys.exc_info()[1] - self.bus.log("Error in HTTP server: shutting down", - traceback=True, level=40) - self.bus.exit() - raise - - def wait(self): - """Wait until the HTTP server is ready to receive requests.""" - while not getattr(self.httpserver, "ready", False): - if self.interrupt: - raise self.interrupt - time.sleep(.1) - - # Wait for port to be occupied - if isinstance(self.bind_addr, tuple): - host, port = self.bind_addr - wait_for_occupied_port(host, port) - - def stop(self): - """Stop the HTTP server.""" - if self.running: - # stop() MUST block until the server is *truly* stopped. - self.httpserver.stop() - # Wait for the socket to be truly freed. - if isinstance(self.bind_addr, tuple): - wait_for_free_port(*self.bind_addr) - self.running = False - self.bus.log("HTTP Server %s shut down" % self.httpserver) - else: - self.bus.log("HTTP Server %s already shut down" % self.httpserver) - stop.priority = 25 - - def restart(self): - """Restart the HTTP server.""" - self.stop() - self.start() - - -class FlupCGIServer(object): - - """Adapter for a flup.server.cgi.WSGIServer.""" - - def __init__(self, *args, **kwargs): - self.args = args - self.kwargs = kwargs - self.ready = False - - def start(self): - """Start the CGI server.""" - # We have to instantiate the server class here because its __init__ - # starts a threadpool. If we do it too early, daemonize won't work. - from flup.server.cgi import WSGIServer - - self.cgiserver = WSGIServer(*self.args, **self.kwargs) - self.ready = True - self.cgiserver.run() - - def stop(self): - """Stop the HTTP server.""" - self.ready = False - - -class FlupFCGIServer(object): - - """Adapter for a flup.server.fcgi.WSGIServer.""" - - def __init__(self, *args, **kwargs): - if kwargs.get('bindAddress', None) is None: - import socket - if not hasattr(socket, 'fromfd'): - raise ValueError( - 'Dynamic FCGI server not available on this platform. ' - 'You must use a static or external one by providing a ' - 'legal bindAddress.') - self.args = args - self.kwargs = kwargs - self.ready = False - - def start(self): - """Start the FCGI server.""" - # We have to instantiate the server class here because its __init__ - # starts a threadpool. If we do it too early, daemonize won't work. - from flup.server.fcgi import WSGIServer - self.fcgiserver = WSGIServer(*self.args, **self.kwargs) - # TODO: report this bug upstream to flup. - # If we don't set _oldSIGs on Windows, we get: - # File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py", - # line 108, in run - # self._restoreSignalHandlers() - # File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py", - # line 156, in _restoreSignalHandlers - # for signum,handler in self._oldSIGs: - # AttributeError: 'WSGIServer' object has no attribute '_oldSIGs' - self.fcgiserver._installSignalHandlers = lambda: None - self.fcgiserver._oldSIGs = [] - self.ready = True - self.fcgiserver.run() - - def stop(self): - """Stop the HTTP server.""" - # Forcibly stop the fcgi server main event loop. - self.fcgiserver._keepGoing = False - # Force all worker threads to die off. - self.fcgiserver._threadPool.maxSpare = ( - self.fcgiserver._threadPool._idleCount) - self.ready = False - - -class FlupSCGIServer(object): - - """Adapter for a flup.server.scgi.WSGIServer.""" - - def __init__(self, *args, **kwargs): - self.args = args - self.kwargs = kwargs - self.ready = False - - def start(self): - """Start the SCGI server.""" - # We have to instantiate the server class here because its __init__ - # starts a threadpool. If we do it too early, daemonize won't work. - from flup.server.scgi import WSGIServer - self.scgiserver = WSGIServer(*self.args, **self.kwargs) - # TODO: report this bug upstream to flup. - # If we don't set _oldSIGs on Windows, we get: - # File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py", - # line 108, in run - # self._restoreSignalHandlers() - # File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py", - # line 156, in _restoreSignalHandlers - # for signum,handler in self._oldSIGs: - # AttributeError: 'WSGIServer' object has no attribute '_oldSIGs' - self.scgiserver._installSignalHandlers = lambda: None - self.scgiserver._oldSIGs = [] - self.ready = True - self.scgiserver.run() - - def stop(self): - """Stop the HTTP server.""" - self.ready = False - # Forcibly stop the scgi server main event loop. - self.scgiserver._keepGoing = False - # Force all worker threads to die off. - self.scgiserver._threadPool.maxSpare = 0 - - -def client_host(server_host): - """Return the host on which a client can connect to the given listener.""" - if server_host == '0.0.0.0': - # 0.0.0.0 is INADDR_ANY, which should answer on localhost. - return '127.0.0.1' - if server_host in ('::', '::0', '::0.0.0.0'): - # :: is IN6ADDR_ANY, which should answer on localhost. - # ::0 and ::0.0.0.0 are non-canonical but common - # ways to write IN6ADDR_ANY. - return '::1' - return server_host - - -def check_port(host, port, timeout=1.0): - """Raise an error if the given port is not free on the given host.""" - if not host: - raise ValueError("Host values of '' or None are not allowed.") - host = client_host(host) - port = int(port) - - import socket - - # AF_INET or AF_INET6 socket - # Get the correct address family for our host (allows IPv6 addresses) - try: - info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM) - except socket.gaierror: - if ':' in host: - info = [( - socket.AF_INET6, socket.SOCK_STREAM, 0, "", (host, port, 0, 0) - )] - else: - info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", (host, port))] - - for res in info: - af, socktype, proto, canonname, sa = res - s = None - try: - s = socket.socket(af, socktype, proto) - # See http://groups.google.com/group/cherrypy-users/ - # browse_frm/thread/bbfe5eb39c904fe0 - s.settimeout(timeout) - s.connect((host, port)) - s.close() - except socket.error: - if s: - s.close() - else: - raise IOError("Port %s is in use on %s; perhaps the previous " - "httpserver did not shut down properly." % - (repr(port), repr(host))) - - -# Feel free to increase these defaults on slow systems: -free_port_timeout = 0.1 -occupied_port_timeout = 1.0 - - -def wait_for_free_port(host, port, timeout=None): - """Wait for the specified port to become free (drop requests).""" - if not host: - raise ValueError("Host values of '' or None are not allowed.") - if timeout is None: - timeout = free_port_timeout - - for trial in range(50): - try: - # we are expecting a free port, so reduce the timeout - check_port(host, port, timeout=timeout) - except IOError: - # Give the old server thread time to free the port. - time.sleep(timeout) - else: - return - - raise IOError("Port %r not free on %r" % (port, host)) - - -def wait_for_occupied_port(host, port, timeout=None): - """Wait for the specified port to become active (receive requests).""" - if not host: - raise ValueError("Host values of '' or None are not allowed.") - if timeout is None: - timeout = occupied_port_timeout - - for trial in range(50): - try: - check_port(host, port, timeout=timeout) - except IOError: - # port is occupied - return - else: - time.sleep(timeout) - - if host == client_host(host): - raise IOError("Port %r not bound on %r" % (port, host)) - - # On systems where a loopback interface is not available and the - # server is bound to all interfaces, it's difficult to determine - # whether the server is in fact occupying the port. In this case, - # just issue a warning and move on. See issue #1100. - msg = "Unable to verify that the server is bound on %r" % port - warnings.warn(msg) diff --git a/lib/cherrypy/process/win32.py b/lib/cherrypy/process/win32.py deleted file mode 100644 index 4afd3f14..00000000 --- a/lib/cherrypy/process/win32.py +++ /dev/null @@ -1,180 +0,0 @@ -"""Windows service. Requires pywin32.""" - -import os -import win32api -import win32con -import win32event -import win32service -import win32serviceutil - -from cherrypy.process import wspbus, plugins - - -class ConsoleCtrlHandler(plugins.SimplePlugin): - - """A WSPBus plugin for handling Win32 console events (like Ctrl-C).""" - - def __init__(self, bus): - self.is_set = False - plugins.SimplePlugin.__init__(self, bus) - - def start(self): - if self.is_set: - self.bus.log('Handler for console events already set.', level=40) - return - - result = win32api.SetConsoleCtrlHandler(self.handle, 1) - if result == 0: - self.bus.log('Could not SetConsoleCtrlHandler (error %r)' % - win32api.GetLastError(), level=40) - else: - self.bus.log('Set handler for console events.', level=40) - self.is_set = True - - def stop(self): - if not self.is_set: - self.bus.log('Handler for console events already off.', level=40) - return - - try: - result = win32api.SetConsoleCtrlHandler(self.handle, 0) - except ValueError: - # "ValueError: The object has not been registered" - result = 1 - - if result == 0: - self.bus.log('Could not remove SetConsoleCtrlHandler (error %r)' % - win32api.GetLastError(), level=40) - else: - self.bus.log('Removed handler for console events.', level=40) - self.is_set = False - - def handle(self, event): - """Handle console control events (like Ctrl-C).""" - if event in (win32con.CTRL_C_EVENT, win32con.CTRL_LOGOFF_EVENT, - win32con.CTRL_BREAK_EVENT, win32con.CTRL_SHUTDOWN_EVENT, - win32con.CTRL_CLOSE_EVENT): - self.bus.log('Console event %s: shutting down bus' % event) - - # Remove self immediately so repeated Ctrl-C doesn't re-call it. - try: - self.stop() - except ValueError: - pass - - self.bus.exit() - # 'First to return True stops the calls' - return 1 - return 0 - - -class Win32Bus(wspbus.Bus): - - """A Web Site Process Bus implementation for Win32. - - Instead of time.sleep, this bus blocks using native win32event objects. - """ - - def __init__(self): - self.events = {} - wspbus.Bus.__init__(self) - - def _get_state_event(self, state): - """Return a win32event for the given state (creating it if needed).""" - try: - return self.events[state] - except KeyError: - event = win32event.CreateEvent(None, 0, 0, - "WSPBus %s Event (pid=%r)" % - (state.name, os.getpid())) - self.events[state] = event - return event - - def _get_state(self): - return self._state - - def _set_state(self, value): - self._state = value - event = self._get_state_event(value) - win32event.PulseEvent(event) - state = property(_get_state, _set_state) - - def wait(self, state, interval=0.1, channel=None): - """Wait for the given state(s), KeyboardInterrupt or SystemExit. - - Since this class uses native win32event objects, the interval - argument is ignored. - """ - if isinstance(state, (tuple, list)): - # Don't wait for an event that beat us to the punch ;) - if self.state not in state: - events = tuple([self._get_state_event(s) for s in state]) - win32event.WaitForMultipleObjects( - events, 0, win32event.INFINITE) - else: - # Don't wait for an event that beat us to the punch ;) - if self.state != state: - event = self._get_state_event(state) - win32event.WaitForSingleObject(event, win32event.INFINITE) - - -class _ControlCodes(dict): - - """Control codes used to "signal" a service via ControlService. - - User-defined control codes are in the range 128-255. We generally use - the standard Python value for the Linux signal and add 128. Example: - - >>> signal.SIGUSR1 - 10 - control_codes['graceful'] = 128 + 10 - """ - - def key_for(self, obj): - """For the given value, return its corresponding key.""" - for key, val in self.items(): - if val is obj: - return key - raise ValueError("The given object could not be found: %r" % obj) - -control_codes = _ControlCodes({'graceful': 138}) - - -def signal_child(service, command): - if command == 'stop': - win32serviceutil.StopService(service) - elif command == 'restart': - win32serviceutil.RestartService(service) - else: - win32serviceutil.ControlService(service, control_codes[command]) - - -class PyWebService(win32serviceutil.ServiceFramework): - - """Python Web Service.""" - - _svc_name_ = "Python Web Service" - _svc_display_name_ = "Python Web Service" - _svc_deps_ = None # sequence of service names on which this depends - _exe_name_ = "pywebsvc" - _exe_args_ = None # Default to no arguments - - # Only exists on Windows 2000 or later, ignored on windows NT - _svc_description_ = "Python Web Service" - - def SvcDoRun(self): - from cherrypy import process - process.bus.start() - process.bus.block() - - def SvcStop(self): - from cherrypy import process - self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING) - process.bus.exit() - - def SvcOther(self, control): - process.bus.publish(control_codes.key_for(control)) - - -if __name__ == '__main__': - win32serviceutil.HandleCommandLine(PyWebService) diff --git a/lib/cherrypy/process/wspbus.py b/lib/cherrypy/process/wspbus.py deleted file mode 100644 index 5409d038..00000000 --- a/lib/cherrypy/process/wspbus.py +++ /dev/null @@ -1,448 +0,0 @@ -"""An implementation of the Web Site Process Bus. - -This module is completely standalone, depending only on the stdlib. - -Web Site Process Bus --------------------- - -A Bus object is used to contain and manage site-wide behavior: -daemonization, HTTP server start/stop, process reload, signal handling, -drop privileges, PID file management, logging for all of these, -and many more. - -In addition, a Bus object provides a place for each web framework -to register code that runs in response to site-wide events (like -process start and stop), or which controls or otherwise interacts with -the site-wide components mentioned above. For example, a framework which -uses file-based templates would add known template filenames to an -autoreload component. - -Ideally, a Bus object will be flexible enough to be useful in a variety -of invocation scenarios: - - 1. The deployer starts a site from the command line via a - framework-neutral deployment script; applications from multiple frameworks - are mixed in a single site. Command-line arguments and configuration - files are used to define site-wide components such as the HTTP server, - WSGI component graph, autoreload behavior, signal handling, etc. - 2. The deployer starts a site via some other process, such as Apache; - applications from multiple frameworks are mixed in a single site. - Autoreload and signal handling (from Python at least) are disabled. - 3. The deployer starts a site via a framework-specific mechanism; - for example, when running tests, exploring tutorials, or deploying - single applications from a single framework. The framework controls - which site-wide components are enabled as it sees fit. - -The Bus object in this package uses topic-based publish-subscribe -messaging to accomplish all this. A few topic channels are built in -('start', 'stop', 'exit', 'graceful', 'log', and 'main'). Frameworks and -site containers are free to define their own. If a message is sent to a -channel that has not been defined or has no listeners, there is no effect. - -In general, there should only ever be a single Bus object per process. -Frameworks and site containers share a single Bus object by publishing -messages and subscribing listeners. - -The Bus object works as a finite state machine which models the current -state of the process. Bus methods move it from one state to another; -those methods then publish to subscribed listeners on the channel for -the new state.:: - - O - | - V - STOPPING --> STOPPED --> EXITING -> X - A A | - | \___ | - | \ | - | V V - STARTED <-- STARTING - -""" - -import atexit -import os -import sys -import threading -import time -import traceback as _traceback -import warnings - -from cherrypy._cpcompat import set - -# Here I save the value of os.getcwd(), which, if I am imported early enough, -# will be the directory from which the startup script was run. This is needed -# by _do_execv(), to change back to the original directory before execv()ing a -# new process. This is a defense against the application having changed the -# current working directory (which could make sys.executable "not found" if -# sys.executable is a relative-path, and/or cause other problems). -_startup_cwd = os.getcwd() - - -class ChannelFailures(Exception): - - """Exception raised when errors occur in a listener during Bus.publish(). - """ - delimiter = '\n' - - def __init__(self, *args, **kwargs): - # Don't use 'super' here; Exceptions are old-style in Py2.4 - # See https://bitbucket.org/cherrypy/cherrypy/issue/959 - Exception.__init__(self, *args, **kwargs) - self._exceptions = list() - - def handle_exception(self): - """Append the current exception to self.""" - self._exceptions.append(sys.exc_info()[1]) - - def get_instances(self): - """Return a list of seen exception instances.""" - return self._exceptions[:] - - def __str__(self): - exception_strings = map(repr, self.get_instances()) - return self.delimiter.join(exception_strings) - - __repr__ = __str__ - - def __bool__(self): - return bool(self._exceptions) - __nonzero__ = __bool__ - -# Use a flag to indicate the state of the bus. - - -class _StateEnum(object): - - class State(object): - name = None - - def __repr__(self): - return "states.%s" % self.name - - def __setattr__(self, key, value): - if isinstance(value, self.State): - value.name = key - object.__setattr__(self, key, value) -states = _StateEnum() -states.STOPPED = states.State() -states.STARTING = states.State() -states.STARTED = states.State() -states.STOPPING = states.State() -states.EXITING = states.State() - - -try: - import fcntl -except ImportError: - max_files = 0 -else: - try: - max_files = os.sysconf('SC_OPEN_MAX') - except AttributeError: - max_files = 1024 - - -class Bus(object): - - """Process state-machine and messenger for HTTP site deployment. - - All listeners for a given channel are guaranteed to be called even - if others at the same channel fail. Each failure is logged, but - execution proceeds on to the next listener. The only way to stop all - processing from inside a listener is to raise SystemExit and stop the - whole server. - """ - - states = states - state = states.STOPPED - execv = False - max_cloexec_files = max_files - - def __init__(self): - self.execv = False - self.state = states.STOPPED - self.listeners = dict( - [(channel, set()) for channel - in ('start', 'stop', 'exit', 'graceful', 'log', 'main')]) - self._priorities = {} - - def subscribe(self, channel, callback, priority=None): - """Add the given callback at the given channel (if not present).""" - if channel not in self.listeners: - self.listeners[channel] = set() - self.listeners[channel].add(callback) - - if priority is None: - priority = getattr(callback, 'priority', 50) - self._priorities[(channel, callback)] = priority - - def unsubscribe(self, channel, callback): - """Discard the given callback (if present).""" - listeners = self.listeners.get(channel) - if listeners and callback in listeners: - listeners.discard(callback) - del self._priorities[(channel, callback)] - - def publish(self, channel, *args, **kwargs): - """Return output of all subscribers for the given channel.""" - if channel not in self.listeners: - return [] - - exc = ChannelFailures() - output = [] - - items = [(self._priorities[(channel, listener)], listener) - for listener in self.listeners[channel]] - try: - items.sort(key=lambda item: item[0]) - except TypeError: - # Python 2.3 had no 'key' arg, but that doesn't matter - # since it could sort dissimilar types just fine. - items.sort() - for priority, listener in items: - try: - output.append(listener(*args, **kwargs)) - except KeyboardInterrupt: - raise - except SystemExit: - e = sys.exc_info()[1] - # If we have previous errors ensure the exit code is non-zero - if exc and e.code == 0: - e.code = 1 - raise - except: - exc.handle_exception() - if channel == 'log': - # Assume any further messages to 'log' will fail. - pass - else: - self.log("Error in %r listener %r" % (channel, listener), - level=40, traceback=True) - if exc: - raise exc - return output - - def _clean_exit(self): - """An atexit handler which asserts the Bus is not running.""" - if self.state != states.EXITING: - warnings.warn( - "The main thread is exiting, but the Bus is in the %r state; " - "shutting it down automatically now. You must either call " - "bus.block() after start(), or call bus.exit() before the " - "main thread exits." % self.state, RuntimeWarning) - self.exit() - - def start(self): - """Start all services.""" - atexit.register(self._clean_exit) - - self.state = states.STARTING - self.log('Bus STARTING') - try: - self.publish('start') - self.state = states.STARTED - self.log('Bus STARTED') - except (KeyboardInterrupt, SystemExit): - raise - except: - self.log("Shutting down due to error in start listener:", - level=40, traceback=True) - e_info = sys.exc_info()[1] - try: - self.exit() - except: - # Any stop/exit errors will be logged inside publish(). - pass - # Re-raise the original error - raise e_info - - def exit(self): - """Stop all services and prepare to exit the process.""" - exitstate = self.state - try: - self.stop() - - self.state = states.EXITING - self.log('Bus EXITING') - self.publish('exit') - # This isn't strictly necessary, but it's better than seeing - # "Waiting for child threads to terminate..." and then nothing. - self.log('Bus EXITED') - except: - # This method is often called asynchronously (whether thread, - # signal handler, console handler, or atexit handler), so we - # can't just let exceptions propagate out unhandled. - # Assume it's been logged and just die. - os._exit(70) # EX_SOFTWARE - - if exitstate == states.STARTING: - # exit() was called before start() finished, possibly due to - # Ctrl-C because a start listener got stuck. In this case, - # we could get stuck in a loop where Ctrl-C never exits the - # process, so we just call os.exit here. - os._exit(70) # EX_SOFTWARE - - def restart(self): - """Restart the process (may close connections). - - This method does not restart the process from the calling thread; - instead, it stops the bus and asks the main thread to call execv. - """ - self.execv = True - self.exit() - - def graceful(self): - """Advise all services to reload.""" - self.log('Bus graceful') - self.publish('graceful') - - def block(self, interval=0.1): - """Wait for the EXITING state, KeyboardInterrupt or SystemExit. - - This function is intended to be called only by the main thread. - After waiting for the EXITING state, it also waits for all threads - to terminate, and then calls os.execv if self.execv is True. This - design allows another thread to call bus.restart, yet have the main - thread perform the actual execv call (required on some platforms). - """ - try: - self.wait(states.EXITING, interval=interval, channel='main') - except (KeyboardInterrupt, IOError): - # The time.sleep call might raise - # "IOError: [Errno 4] Interrupted function call" on KBInt. - self.log('Keyboard Interrupt: shutting down bus') - self.exit() - except SystemExit: - self.log('SystemExit raised: shutting down bus') - self.exit() - raise - - # Waiting for ALL child threads to finish is necessary on OS X. - # See https://bitbucket.org/cherrypy/cherrypy/issue/581. - # It's also good to let them all shut down before allowing - # the main thread to call atexit handlers. - # See https://bitbucket.org/cherrypy/cherrypy/issue/751. - self.log("Waiting for child threads to terminate...") - for t in threading.enumerate(): - # Validate the we're not trying to join the MainThread - # that will cause a deadlock and the case exist when - # implemented as a windows service and in any other case - # that another thread executes cherrypy.engine.exit() - if ( - t != threading.currentThread() and - t.isAlive() and - not isinstance(t, threading._MainThread) - ): - # Note that any dummy (external) threads are always daemonic. - if hasattr(threading.Thread, "daemon"): - # Python 2.6+ - d = t.daemon - else: - d = t.isDaemon() - if not d: - self.log("Waiting for thread %s." % t.getName()) - t.join() - - if self.execv: - self._do_execv() - - def wait(self, state, interval=0.1, channel=None): - """Poll for the given state(s) at intervals; publish to channel.""" - if isinstance(state, (tuple, list)): - states = state - else: - states = [state] - - def _wait(): - while self.state not in states: - time.sleep(interval) - self.publish(channel) - - # From http://psyco.sourceforge.net/psycoguide/bugs.html: - # "The compiled machine code does not include the regular polling - # done by Python, meaning that a KeyboardInterrupt will not be - # detected before execution comes back to the regular Python - # interpreter. Your program cannot be interrupted if caught - # into an infinite Psyco-compiled loop." - try: - sys.modules['psyco'].cannotcompile(_wait) - except (KeyError, AttributeError): - pass - - _wait() - - def _do_execv(self): - """Re-execute the current process. - - This must be called from the main thread, because certain platforms - (OS X) don't allow execv to be called in a child thread very well. - """ - args = sys.argv[:] - self.log('Re-spawning %s' % ' '.join(args)) - - if sys.platform[:4] == 'java': - from _systemrestart import SystemRestart - raise SystemRestart - else: - args.insert(0, sys.executable) - if sys.platform == 'win32': - args = ['"%s"' % arg for arg in args] - - os.chdir(_startup_cwd) - if self.max_cloexec_files: - self._set_cloexec() - os.execv(sys.executable, args) - - def _set_cloexec(self): - """Set the CLOEXEC flag on all open files (except stdin/out/err). - - If self.max_cloexec_files is an integer (the default), then on - platforms which support it, it represents the max open files setting - for the operating system. This function will be called just before - the process is restarted via os.execv() to prevent open files - from persisting into the new process. - - Set self.max_cloexec_files to 0 to disable this behavior. - """ - for fd in range(3, self.max_cloexec_files): # skip stdin/out/err - try: - flags = fcntl.fcntl(fd, fcntl.F_GETFD) - except IOError: - continue - fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) - - def stop(self): - """Stop all services.""" - self.state = states.STOPPING - self.log('Bus STOPPING') - self.publish('stop') - self.state = states.STOPPED - self.log('Bus STOPPED') - - def start_with_callback(self, func, args=None, kwargs=None): - """Start 'func' in a new thread T, then start self (and return T).""" - if args is None: - args = () - if kwargs is None: - kwargs = {} - args = (func,) + args - - def _callback(func, *a, **kw): - self.wait(states.STARTED) - func(*a, **kw) - t = threading.Thread(target=_callback, args=args, kwargs=kwargs) - t.setName('Bus Callback ' + t.getName()) - t.start() - - self.start() - - return t - - def log(self, msg="", level=20, traceback=False): - """Log the given message. Append the last traceback if requested.""" - if traceback: - msg += "\n" + "".join(_traceback.format_exception(*sys.exc_info())) - self.publish('log', msg, level) - -bus = Bus() diff --git a/lib/cherrypy/scaffold/__init__.py b/lib/cherrypy/scaffold/__init__.py deleted file mode 100644 index 50de34bb..00000000 --- a/lib/cherrypy/scaffold/__init__.py +++ /dev/null @@ -1,61 +0,0 @@ -""", a CherryPy application. - -Use this as a base for creating new CherryPy applications. When you want -to make a new app, copy and paste this folder to some other location -(maybe site-packages) and rename it to the name of your project, -then tweak as desired. - -Even before any tweaking, this should serve a few demonstration pages. -Change to this directory and run: - - ../cherryd -c site.conf - -""" - -import cherrypy -from cherrypy import tools, url - -import os -local_dir = os.path.join(os.getcwd(), os.path.dirname(__file__)) - - -class Root: - - _cp_config = {'tools.log_tracebacks.on': True, - } - - def index(self): - return """ -Try some other path, -or a default path.
-Or, just look at the pretty picture:
- -""" % (url("other"), url("else"), - url("files/made_with_cherrypy_small.png")) - index.exposed = True - - def default(self, *args, **kwargs): - return "args: %s kwargs: %s" % (args, kwargs) - default.exposed = True - - def other(self, a=2, b='bananas', c=None): - cherrypy.response.headers['Content-Type'] = 'text/plain' - if c is None: - return "Have %d %s." % (int(a), b) - else: - return "Have %d %s, %s." % (int(a), b, c) - other.exposed = True - - files = cherrypy.tools.staticdir.handler( - section="/files", - dir=os.path.join(local_dir, "static"), - # Ignore .php files, etc. - match=r'\.(css|gif|html?|ico|jpe?g|js|png|swf|xml)$', - ) - - -root = Root() - -# Uncomment the following to use your own favicon instead of CP's default. -#favicon_path = os.path.join(local_dir, "favicon.ico") -#root.favicon_ico = tools.staticfile.handler(filename=favicon_path) diff --git a/lib/cherrypy/scaffold/apache-fcgi.conf b/lib/cherrypy/scaffold/apache-fcgi.conf deleted file mode 100644 index 922398ea..00000000 --- a/lib/cherrypy/scaffold/apache-fcgi.conf +++ /dev/null @@ -1,22 +0,0 @@ -# Apache2 server conf file for using CherryPy with mod_fcgid. - -# This doesn't have to be "C:/", but it has to be a directory somewhere, and -# MUST match the directory used in the FastCgiExternalServer directive, below. -DocumentRoot "C:/" - -ServerName 127.0.0.1 -Listen 80 -LoadModule fastcgi_module modules/mod_fastcgi.dll -LoadModule rewrite_module modules/mod_rewrite.so - -Options ExecCGI -SetHandler fastcgi-script -RewriteEngine On -# Send requests for any URI to our fastcgi handler. -RewriteRule ^(.*)$ /fastcgi.pyc [L] - -# The FastCgiExternalServer directive defines filename as an external FastCGI application. -# If filename does not begin with a slash (/) then it is assumed to be relative to the ServerRoot. -# The filename does not have to exist in the local filesystem. URIs that Apache resolves to this -# filename will be handled by this external FastCGI application. -FastCgiExternalServer "C:/fastcgi.pyc" -host 127.0.0.1:8088 \ No newline at end of file diff --git a/lib/cherrypy/scaffold/example.conf b/lib/cherrypy/scaffold/example.conf deleted file mode 100644 index 93a6e53c..00000000 --- a/lib/cherrypy/scaffold/example.conf +++ /dev/null @@ -1,3 +0,0 @@ -[/] -log.error_file: "error.log" -log.access_file: "access.log" \ No newline at end of file diff --git a/lib/cherrypy/scaffold/site.conf b/lib/cherrypy/scaffold/site.conf deleted file mode 100644 index 6ed38983..00000000 --- a/lib/cherrypy/scaffold/site.conf +++ /dev/null @@ -1,14 +0,0 @@ -[global] -# Uncomment this when you're done developing -#environment: "production" - -server.socket_host: "0.0.0.0" -server.socket_port: 8088 - -# Uncomment the following lines to run on HTTPS at the same time -#server.2.socket_host: "0.0.0.0" -#server.2.socket_port: 8433 -#server.2.ssl_certificate: '../test/test.pem' -#server.2.ssl_private_key: '../test/test.pem' - -tree.myapp: cherrypy.Application(scaffold.root, "/", "example.conf") diff --git a/lib/cherrypy/scaffold/static/made_with_cherrypy_small.png b/lib/cherrypy/scaffold/static/made_with_cherrypy_small.png deleted file mode 100644 index c3aafeed952190f5da9982bb359aa75b107ff079..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7455 zcmV+)9pK`LP)7N6iYPr5@U(R*fj>!*b#f9NC#;mT|kg1D2jq&Lj>tf5D-D6cTjpq zM2*V(&+dUE$T@H@a&NdlKF>Vo`?k!^&c5I5?CvbS4*K`nzw94S`&vnU?rYUm<*)VZ zKlrsb-hAs{CSiv-Eoy)P>)-P4@xvMfTz0~N?aQ%e@i^>WG#2rZLH`!v(s_J^DycTIW{%{Z8$3ex2 zJwN{Ye*4vGhvaBGVAYbvdG-)hRoQS0(8QrrbKv5!75RlXDZQFg?b9mUNpTedsvcPE z_x}BCVY&M9FZ=uK??QBtq&g@;?Xw5}_|tgBz^VnF-bd}Q_mf%$u73`!+D8PcbeaJ}k)Q2^zs@ef5tl{CNA!}w`+bR9j(v1($D9<&6fHUBMQR|~V-NCgX(7O9Ij&o-L`i}^O5*hpVzB~2X3{8((H8QI zbKQnO0*w+O1=c7^$*0W=Wfpxy5a z>?c5&jOoMQ5B|^-Y)UIj(nWEcHmqiRk5jA0JZ?7ur09lAJ&>Q>> z+?D3QdBbGvdhcxl)@Gg`=eh+tgT5v}F2)tb*}QJ1zd|EfZ#t}lTK7-L3LW6-F-{w? z?TyzF>mny;gVL2g|B%49hx0f%suzJulgq#D_1B?FQ=?k9t|+23FRl5|=>0j&Cicfk zl2@zm7rZa`NzRt^Q=F_v{&Hm71MAttR6L^9!Ox5ULvXRt~1X1Y=?`_ zHi;-c(ON7oaczi8ugSH={Y?5QTR~9{YoJbpS(&EG>tzM(#g4b$K>kft{$8H6AA3LQ zgcSGfP56ddN<)9>w>&-OWwrC{I zZT_hdPuAAH&p$@e<*hv3QD!XcmyW`zKgaX;go|5V zU5}@EY0iVqFGF})_MQ{0h#hagiGnD#7WGp>JhJXsw=uBDBtEH%L~xioU;Rbh(kCEz z5?vEX!I3&R%S&7v;?f@#o+70&#Y#@`=QnXHRbQtxUK2ateG8=tn!+?@ z1yZhVa==1&Tg6lCWrZ?{>bp%jeTEyM9#T3Tx6%vG`oP;3AK{u`RtkH z34o<8^NKnifrk-N>p<8?#`5>m z3eWh*-ZXvf;br~w=EI2msS|&U7S;_hUka0Pz4?22iYAzh370kr^RKEft2$ixUKk?) zcSHG_lOu&boYJC}w;o>FV&xUaz>=oG#CQ_|z@=vCMwA{LJ!D%&X#~cW__i+pBC2yt zB?1*wZ1@prX!ib0SU!Um9n29fx~*I{Xc~M#lHBAt?ftkDO)(?juxunb!#$u?SGP1# z4JKoIL!;NH)1J!Loe^?q8RwJYu?1>0{V|`+e(6ZcABBD-o>q~j zM%GG}R)qWbdxqaO1ews$mGc_1YWt9Qd84pyd5S8c99AI2d@-_vcF?MC8wzg8H{u<2 zd?jsHETF0F58B3HgWOa`CR0wx&PI7@B}=PYl@@ivcqWJKAzP8``Qf(O;TcZifo`#KT;ti6;eC!m)T=ovB4x0T%g2v~V8 zuJG?agahhD3K}sMF=~o9t~njip5(|TIG{T9I35+8p#gRlNuD7JIC^EZ#AHvvtITt3 z#H*&@G@?UH;p(J^1G;;#Rc&_>OlcWTepf1e@$HP#!gs^pY%hEbcfiMRC%kmF;+pmrTvL;SxAtZPm~BDC`OPSb zC*STB_ANP-78);OvGOlmhF|aRM1rawWB~)DaDD6)M9y7=m=$YrM|LaXckDohi3%!i z+LBdsYDE5FDmZd!rNngHeH|VaJm--^kr79&m9hNyMfm2MZ}0~B1PnO!G;78f0uWg) zYPTrW4&M#v!ShZZQ)nW~i?Z=|0@=fSdpsluzr9dqj0pmV{|Min197Q)Kb-DH8@J!~ zh70|A^yp89VJJezPKKY{LKG)kzOsI0#u%ZbfTA8+_}9pJe~^}efI-8X1(jc~?+{E@ zE8>@Vh+Qj-BqcRu7@Koq&v#@uquAGkEXXM#Mc>ww7*q^^u0C6ZbyIJdW532u^ytwY zojSb>nLp;k-c}vH?q;}p-T>x?+u-<@hOm5T=246sKClj0dmG8oP^R|&d^g?-0i(y_ zQn$VcKW%`lTPIMM?2N3N4!C(r2X2dI;Ti#qjaQhWG<~QiUx_ZVVl7ZonA{Ss8VCB_ zUL)HqsF=m_?>7XY_k(mZ#lbvl{nPj*t~N&M7zD61Ep) zBqr}*d0ww5)dsqnJMh_OpP_%hUWf^E;FheO@AF>@@2G35BKs)Dj2_H^H~PYKbbd|H9Ns!#S`npV9!(y z)#TYDKI|aQI%+~sdpi{Otc9NDMw~yXgyJ-tMs*63tr6m>L+y~WoNb5XNE4htr3ow3 zy|6zlk1((Oyw3G&`{9089jtujZ8KavtqE&Wc^tErhu>ul+zCC1a9>?S2N~c&E=|WT z^fNEX3K4#Kh$LeDWBGYWmhhi41(&<^Lsqbn$iBp#RpN`Uv)%imEX58f5hjQt9=8MO zIOnc_PeWp|-tyN+w4W|2GDPbq>ox;cy5zvhv9oUxRNP{bQy)5E8gBi*5=n}xDE9KD zAj2o9eewjw-oBKkhG@!*S-BK7d4CBhC`?HrP%-eOCLBP1q6Jn+&%^7qcci>`755Pu z@)P%5nQe<1KaJqPGSgp6)97|gnm8VvJ9oz0Z@-QA-g^(-x_yhq^T*RbA8N$&L9QJ< z&#Oaj<0>lC4R5{m7We1a@8_R?jsX_X z18_Iqyh%S@$nyR??I#SA8jP|``cO%)xd|5VpZXKr2v}*p1|oG6>!FF@;bU;R-#}Dm zI^dSoZroU~6oEg_LQarwlRCF9Ya;5;b?}%r69v%}?AKuY>Yp=Wj9X{1l2kP%0+wIz zK?oT&fjjPZWVazzTc1ydjM(#?P1|_h^)ZuZ)pb}9iAYqKZVpS=Ww1E6q**#dQ4*zD zmh>g7&;azun>4WT5tc$e_58&BR#KaDW{&2-8YVS_2J~CVuKyi}jCVp`YXe4&7{K!w zV2$=KZ^SpmQxnUUF2K9*z6)iA4Y(a1 zf|=n~1YX%kykF;Kj+m`!(pPn*74%WO#X-m{Dr>tS*#f>(CgTbNmXBVOI!s1?TLlCT z83pG#)A=-bFY*Ac^&12qG7#ZfE1T2_Cg25pHw>qzOrlxFQKZh(qYYaJtoWVs5&?@% zmw|&v@aZsW(Q?GDm!lP-JSX-XwU{>JCQs*ODq@Yvv{M8kBZ@?H57^csupF+eLiIgs z*qY1nDe$W=yP`VR{yCpIUiCZ~Uru%HR1hfZfwgqebQGnZqC8t+net3)3>(s&58|8| zQj}-I^Gedpp}6}`4k`m}IsUaaGWhewH-?R}1{E zs}t#588mucNhZ8Qy&JO$-{?jej|k z#;jLlnhSwtXU@WZo5s2;?);TopOM3Qp@zb4S()peDi|?bid&%4OajLX^)jRN@$ttW z@&OLKqQvvd(#Z3*gwZ zKfFzqNPl+1ddY5z@a{DL9^LyR-BVeZo?ym^%XL6va|joGQ7G zm8`K}Vqh`EGvEXa9LDGHkkJ!4v1evVS)O0tL8#6$ZxUM_Kt~cL6c8wm!XsFqzXjA3 z7I9$p>GcH-hU78p6`7_DfVEC!Z1-WKmE6Y}HM~#VKwIFLr5yLehD-G!M_n0b?3LhX zCl3dkz4*&&4-T8|#K;j+eDE)x+`;))W}1;dH;kv9wA~~&j{J`$6L=XDy_I6i6eb&? zcMrkFuCD5QVL2<~P2{hA1T{rgmwhCHxc%Z3V|e{Mm5-SZjf2mSA8>WRP&gCN?7sV+ zLTOeaEws*>H;DCTGJDUSC^${ou|UW#4JN=d}9YVJPSUGmbS_Q27 zl_Ccb`CXjoxjy~Ij0tq;Yu1D+vrJ%rSw<|dvJ-W99euTh99ZwsLGQgd1ChK$@h{IX zZUC&cBJ0_it)@M(PM9!uAWaVzyo{FeGVY`^U#4r$9w z?#}YOPGzPE=FJ(y!Qf~sD^|w!tEO?E&O}$H*?wp$&gHy@522tv(o<2gA!L@%#{2Jg zLVAp4qq@0qdhq>u8a&DJTveCFb$b;=xa>!2=pj_)oZui7r5~0Sb3~T=SyzS)p|j6} zv;s7Nx8i!7BMX<}Z`7=e=~3D!YYniU9lo)A&l&C{Uqm@#2oDC|ntazRBjDuMC+js%6Y&M9MQFiqV z0kCwM??+0620r?*6E7R?C67=Kc?=si6f(chpuuM0)z4%0@5a&$Gj%GT(&8`elUP5D zMarbo%>^IP$!GkiVf1IH4kameZ(sP7dsLv z3Z{s}?1N${l1|wVz5py2Puf_mt5Za=7RQPrsu-IT7gpu|_4y>LO$6k`cE*FTGy*{rz-GCMV0{_X;X1$m?hWJx@ z3NK@(B}9UBL2Dx%1bgN_pw;X);RFZZKOu8olRk`1}&0L z;O0ga>v|}QAAerYOm~rRYhd0lQn*K#0b1f)nyibd6NmD8R)#Vn%OBKU&iAw?jG++T zEK(%38SS?hohXox)zgswg(3u4OnCEsC`;ADte*wII&pZN$nupL29RDdNdT<FKwtAXK9T!ME$zNHmmfG}C`!ZGg@cyk%d0Zp&%b+=}T^N0_%3fr|c=LTe78 zS=m$_gO>)DU!U)A*O~6iWirJ}PNAXOVB@-k?!DPG`nbRktG?jr-7x@%tKCZ5sL~I!5Lp&i-g+vf_-t|Y*tMgR zwi^gAgZQeg9%O9Q$OeFs+`8Eum_>;iw7cGirnPrqpuxg_o653j^%PnazJoEN2Jkz1 z4a*eKSE;&pKEC>*t8f~wZ{zjXX~ibRrcWbYC-06bZMOF2ZP}UqDYC7SWKEiy2lFzz zs`T+lpc_YPX}(cAYx)mOG0Q*Ukex223t9@{8Ea#5P88OV?S+vnvmJo zz_}B9csmzPDd1)R-6za!T&E<}l=G^fZ4Pn$DQ!WX7wesOSc^|S=?WvAm121^xl8G^ zpfufz+Lk<)zhPT3?i5b_<3xDMON-V+-NloVBz1&bkwaE$s6^IEq{V7j^I>it*#1`B z)?Kt2D%AvJ?ARt*LkD-G&mk>bo=(tN+%*gN?Vy*{L3ye=4Cx$??VUvjwyhq04{wIY z&>s-v%5D&eZmT#!71t>yJ|^5*VoB4IL>Yx^R zS@frg=zifnx(L87542Ux_5U*8GSZuy(`L+?ISo=Y#a8lZy5)->tu4c1;)5=1Lx}7K zoE$2JGTJ14kw+E6dP)j;s@#_~Hx5HR}E9U+>i=Q~?Ypf*Q?S19?3v}59&qEX|Cc6pV6szUBO9q)y z3cQ_+$UR^&?Ki#TaLsugLcGmTQI^{(OI2U^l>)1tDck3`ml=-47+1vHIDu%2{Olm{ zI*1H9im6k^afh8POlHlTfw)_j+eBwq|CAwzT?%d#Cx3MMO!}Wc+=T7Kgq=WaBwe2) zU+Q5^I0?`Ps8)FgG)WiZcB= (3, 0): - def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): - return wsgiserver.CP_makefile(sock, mode, bufsize) - else: - def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): - return wsgiserver.CP_fileobject(sock, mode, bufsize) diff --git a/lib/cherrypy/wsgiserver/ssl_pyopenssl.py b/lib/cherrypy/wsgiserver/ssl_pyopenssl.py deleted file mode 100644 index f8f2dafe..00000000 --- a/lib/cherrypy/wsgiserver/ssl_pyopenssl.py +++ /dev/null @@ -1,253 +0,0 @@ -"""A library for integrating pyOpenSSL with CherryPy. - -The OpenSSL module must be importable for SSL functionality. -You can obtain it from `here `_. - -To use this module, set CherryPyWSGIServer.ssl_adapter to an instance of -SSLAdapter. There are two ways to use SSL: - -Method One ----------- - - * ``ssl_adapter.context``: an instance of SSL.Context. - -If this is not None, it is assumed to be an SSL.Context instance, -and will be passed to SSL.Connection on bind(). The developer is -responsible for forming a valid Context object. This approach is -to be preferred for more flexibility, e.g. if the cert and key are -streams instead of files, or need decryption, or SSL.SSLv3_METHOD -is desired instead of the default SSL.SSLv23_METHOD, etc. Consult -the pyOpenSSL documentation for complete options. - -Method Two (shortcut) ---------------------- - - * ``ssl_adapter.certificate``: the filename of the server SSL certificate. - * ``ssl_adapter.private_key``: the filename of the server's private key file. - -Both are None by default. If ssl_adapter.context is None, but .private_key -and .certificate are both given and valid, they will be read, and the -context will be automatically created from them. -""" - -import socket -import threading -import time - -from cherrypy import wsgiserver - -try: - from OpenSSL import SSL - from OpenSSL import crypto -except ImportError: - SSL = None - - -class SSL_fileobject(wsgiserver.CP_fileobject): - - """SSL file object attached to a socket object.""" - - ssl_timeout = 3 - ssl_retry = .01 - - def _safe_call(self, is_reader, call, *args, **kwargs): - """Wrap the given call with SSL error-trapping. - - is_reader: if False EOF errors will be raised. If True, EOF errors - will return "" (to emulate normal sockets). - """ - start = time.time() - while True: - try: - return call(*args, **kwargs) - except SSL.WantReadError: - # Sleep and try again. This is dangerous, because it means - # the rest of the stack has no way of differentiating - # between a "new handshake" error and "client dropped". - # Note this isn't an endless loop: there's a timeout below. - time.sleep(self.ssl_retry) - except SSL.WantWriteError: - time.sleep(self.ssl_retry) - except SSL.SysCallError, e: - if is_reader and e.args == (-1, 'Unexpected EOF'): - return "" - - errnum = e.args[0] - if is_reader and errnum in wsgiserver.socket_errors_to_ignore: - return "" - raise socket.error(errnum) - except SSL.Error, e: - if is_reader and e.args == (-1, 'Unexpected EOF'): - return "" - - thirdarg = None - try: - thirdarg = e.args[0][0][2] - except IndexError: - pass - - if thirdarg == 'http request': - # The client is talking HTTP to an HTTPS server. - raise wsgiserver.NoSSLError() - - raise wsgiserver.FatalSSLAlert(*e.args) - except: - raise - - if time.time() - start > self.ssl_timeout: - raise socket.timeout("timed out") - - def recv(self, size): - return self._safe_call(True, super(SSL_fileobject, self).recv, size) - - def sendall(self, *args, **kwargs): - return self._safe_call(False, super(SSL_fileobject, self).sendall, - *args, **kwargs) - - def send(self, *args, **kwargs): - return self._safe_call(False, super(SSL_fileobject, self).send, - *args, **kwargs) - - -class SSLConnection: - - """A thread-safe wrapper for an SSL.Connection. - - ``*args``: the arguments to create the wrapped ``SSL.Connection(*args)``. - """ - - def __init__(self, *args): - self._ssl_conn = SSL.Connection(*args) - self._lock = threading.RLock() - - for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read', - 'renegotiate', 'bind', 'listen', 'connect', 'accept', - 'setblocking', 'fileno', 'close', 'get_cipher_list', - 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', - 'makefile', 'get_app_data', 'set_app_data', 'state_string', - 'sock_shutdown', 'get_peer_certificate', 'want_read', - 'want_write', 'set_connect_state', 'set_accept_state', - 'connect_ex', 'sendall', 'settimeout', 'gettimeout'): - exec("""def %s(self, *args): - self._lock.acquire() - try: - return self._ssl_conn.%s(*args) - finally: - self._lock.release() -""" % (f, f)) - - def shutdown(self, *args): - self._lock.acquire() - try: - # pyOpenSSL.socket.shutdown takes no args - return self._ssl_conn.shutdown() - finally: - self._lock.release() - - -class pyOpenSSLAdapter(wsgiserver.SSLAdapter): - - """A wrapper for integrating pyOpenSSL with CherryPy.""" - - context = None - """An instance of SSL.Context.""" - - certificate = None - """The filename of the server SSL certificate.""" - - private_key = None - """The filename of the server's private key file.""" - - certificate_chain = None - """Optional. The filename of CA's intermediate certificate bundle. - - This is needed for cheaper "chained root" SSL certificates, and should be - left as None if not required.""" - - def __init__(self, certificate, private_key, certificate_chain=None): - if SSL is None: - raise ImportError("You must install pyOpenSSL to use HTTPS.") - - self.context = None - self.certificate = certificate - self.private_key = private_key - self.certificate_chain = certificate_chain - self._environ = None - - def bind(self, sock): - """Wrap and return the given socket.""" - if self.context is None: - self.context = self.get_context() - conn = SSLConnection(self.context, sock) - self._environ = self.get_environ() - return conn - - def wrap(self, sock): - """Wrap and return the given socket, plus WSGI environ entries.""" - return sock, self._environ.copy() - - def get_context(self): - """Return an SSL.Context from self attributes.""" - # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442473 - c = SSL.Context(SSL.SSLv23_METHOD) - c.use_privatekey_file(self.private_key) - if self.certificate_chain: - c.load_verify_locations(self.certificate_chain) - c.use_certificate_file(self.certificate) - return c - - def get_environ(self): - """Return WSGI environ entries to be merged into each request.""" - ssl_environ = { - "HTTPS": "on", - # pyOpenSSL doesn't provide access to any of these AFAICT - # 'SSL_PROTOCOL': 'SSLv2', - # SSL_CIPHER string The cipher specification name - # SSL_VERSION_INTERFACE string The mod_ssl program version - # SSL_VERSION_LIBRARY string The OpenSSL program version - } - - if self.certificate: - # Server certificate attributes - cert = open(self.certificate, 'rb').read() - cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert) - ssl_environ.update({ - 'SSL_SERVER_M_VERSION': cert.get_version(), - 'SSL_SERVER_M_SERIAL': cert.get_serial_number(), - # 'SSL_SERVER_V_START': - # Validity of server's certificate (start time), - # 'SSL_SERVER_V_END': - # Validity of server's certificate (end time), - }) - - for prefix, dn in [("I", cert.get_issuer()), - ("S", cert.get_subject())]: - # X509Name objects don't seem to have a way to get the - # complete DN string. Use str() and slice it instead, - # because str(dn) == "" - dnstr = str(dn)[18:-2] - - wsgikey = 'SSL_SERVER_%s_DN' % prefix - ssl_environ[wsgikey] = dnstr - - # The DN should be of the form: /k1=v1/k2=v2, but we must allow - # for any value to contain slashes itself (in a URL). - while dnstr: - pos = dnstr.rfind("=") - dnstr, value = dnstr[:pos], dnstr[pos + 1:] - pos = dnstr.rfind("/") - dnstr, key = dnstr[:pos], dnstr[pos + 1:] - if key and value: - wsgikey = 'SSL_SERVER_%s_DN_%s' % (prefix, key) - ssl_environ[wsgikey] = value - - return ssl_environ - - def makefile(self, sock, mode='r', bufsize=-1): - if SSL and isinstance(sock, SSL.ConnectionType): - timeout = sock.gettimeout() - f = SSL_fileobject(sock, mode, bufsize) - f.ssl_timeout = timeout - return f - else: - return wsgiserver.CP_fileobject(sock, mode, bufsize) diff --git a/lib/cherrypy/wsgiserver/wsgiserver2.py b/lib/cherrypy/wsgiserver/wsgiserver2.py deleted file mode 100644 index b4919cf5..00000000 --- a/lib/cherrypy/wsgiserver/wsgiserver2.py +++ /dev/null @@ -1,2481 +0,0 @@ -"""A high-speed, production ready, thread pooled, generic HTTP server. - -Simplest example on how to use this module directly -(without using CherryPy's application machinery):: - - from cherrypy import wsgiserver - - def my_crazy_app(environ, start_response): - status = '200 OK' - response_headers = [('Content-type','text/plain')] - start_response(status, response_headers) - return ['Hello world!'] - - server = wsgiserver.CherryPyWSGIServer( - ('0.0.0.0', 8070), my_crazy_app, - server_name='www.cherrypy.example') - server.start() - -The CherryPy WSGI server can serve as many WSGI applications -as you want in one instance by using a WSGIPathInfoDispatcher:: - - d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) - server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) - -Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance. - -This won't call the CherryPy engine (application side) at all, only the -HTTP server, which is independent from the rest of CherryPy. Don't -let the name "CherryPyWSGIServer" throw you; the name merely reflects -its origin, not its coupling. - -For those of you wanting to understand internals of this module, here's the -basic call flow. The server's listening thread runs a very tight loop, -sticking incoming connections onto a Queue:: - - server = CherryPyWSGIServer(...) - server.start() - while True: - tick() - # This blocks until a request comes in: - child = socket.accept() - conn = HTTPConnection(child, ...) - server.requests.put(conn) - -Worker threads are kept in a pool and poll the Queue, popping off and then -handling each connection in turn. Each connection can consist of an arbitrary -number of requests and their responses, so we run a nested loop:: - - while True: - conn = server.requests.get() - conn.communicate() - -> while True: - req = HTTPRequest(...) - req.parse_request() - -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" - req.rfile.readline() - read_headers(req.rfile, req.inheaders) - req.respond() - -> response = app(...) - try: - for chunk in response: - if chunk: - req.write(chunk) - finally: - if hasattr(response, "close"): - response.close() - if req.close_connection: - return -""" - -__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', - 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', - 'CP_fileobject', - 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', - 'WorkerThread', 'ThreadPool', 'SSLAdapter', - 'CherryPyWSGIServer', - 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', - 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] - -import os -try: - import queue -except: - import Queue as queue -import re -import rfc822 -import socket -import sys -if 'win' in sys.platform and hasattr(socket, "AF_INET6"): - if not hasattr(socket, 'IPPROTO_IPV6'): - socket.IPPROTO_IPV6 = 41 - if not hasattr(socket, 'IPV6_V6ONLY'): - socket.IPV6_V6ONLY = 27 -try: - import cStringIO as StringIO -except ImportError: - import StringIO -DEFAULT_BUFFER_SIZE = -1 - - -class FauxSocket(object): - - """Faux socket with the minimal interface required by pypy""" - - def _reuse(self): - pass - -_fileobject_uses_str_type = isinstance( - socket._fileobject(FauxSocket())._rbuf, basestring) -del FauxSocket # this class is not longer required for anything. - -import threading -import time -import traceback - - -def format_exc(limit=None): - """Like print_exc() but return a string. Backport for Python 2.3.""" - try: - etype, value, tb = sys.exc_info() - return ''.join(traceback.format_exception(etype, value, tb, limit)) - finally: - etype = value = tb = None - -import operator - -from urllib import unquote -import warnings - -if sys.version_info >= (3, 0): - bytestr = bytes - unicodestr = str - basestring = (bytes, str) - - def ntob(n, encoding='ISO-8859-1'): - """Return the given native string as a byte string in the given - encoding. - """ - # In Python 3, the native string type is unicode - return n.encode(encoding) -else: - bytestr = str - unicodestr = unicode - basestring = basestring - - def ntob(n, encoding='ISO-8859-1'): - """Return the given native string as a byte string in the given - encoding. - """ - # In Python 2, the native string type is bytes. Assume it's already - # in the given encoding, which for ISO-8859-1 is almost always what - # was intended. - return n - -LF = ntob('\n') -CRLF = ntob('\r\n') -TAB = ntob('\t') -SPACE = ntob(' ') -COLON = ntob(':') -SEMICOLON = ntob(';') -EMPTY = ntob('') -NUMBER_SIGN = ntob('#') -QUESTION_MARK = ntob('?') -ASTERISK = ntob('*') -FORWARD_SLASH = ntob('/') -quoted_slash = re.compile(ntob("(?i)%2F")) - -import errno - - -def plat_specific_errors(*errnames): - """Return error numbers for all errors in errnames on this platform. - - The 'errno' module contains different global constants depending on - the specific platform (OS). This function will return the list of - numeric values for a given list of potential names. - """ - errno_names = dir(errno) - nums = [getattr(errno, k) for k in errnames if k in errno_names] - # de-dupe the list - return list(dict.fromkeys(nums).keys()) - -socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") - -socket_errors_to_ignore = plat_specific_errors( - "EPIPE", - "EBADF", "WSAEBADF", - "ENOTSOCK", "WSAENOTSOCK", - "ETIMEDOUT", "WSAETIMEDOUT", - "ECONNREFUSED", "WSAECONNREFUSED", - "ECONNRESET", "WSAECONNRESET", - "ECONNABORTED", "WSAECONNABORTED", - "ENETRESET", "WSAENETRESET", - "EHOSTDOWN", "EHOSTUNREACH", -) -socket_errors_to_ignore.append("timed out") -socket_errors_to_ignore.append("The read operation timed out") - -socket_errors_nonblocking = plat_specific_errors( - 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') - -comma_separated_headers = [ - ntob(h) for h in - ['Accept', 'Accept-Charset', 'Accept-Encoding', - 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', - 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', - 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', - 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', - 'WWW-Authenticate'] -] - - -import logging -if not hasattr(logging, 'statistics'): - logging.statistics = {} - - -def read_headers(rfile, hdict=None): - """Read headers from the given stream into the given header dict. - - If hdict is None, a new header dict is created. Returns the populated - header dict. - - Headers which are repeated are folded together using a comma if their - specification so dictates. - - This function raises ValueError when the read bytes violate the HTTP spec. - You should probably return "400 Bad Request" if this happens. - """ - if hdict is None: - hdict = {} - - while True: - line = rfile.readline() - if not line: - # No more data--illegal end of headers - raise ValueError("Illegal end of headers.") - - if line == CRLF: - # Normal end of headers - break - if not line.endswith(CRLF): - raise ValueError("HTTP requires CRLF terminators") - - if line[0] in (SPACE, TAB): - # It's a continuation line. - v = line.strip() - else: - try: - k, v = line.split(COLON, 1) - except ValueError: - raise ValueError("Illegal header line.") - # TODO: what about TE and WWW-Authenticate? - k = k.strip().title() - v = v.strip() - hname = k - - if k in comma_separated_headers: - existing = hdict.get(hname) - if existing: - v = ", ".join((existing, v)) - hdict[hname] = v - - return hdict - - -class MaxSizeExceeded(Exception): - pass - - -class SizeCheckWrapper(object): - - """Wraps a file-like object, raising MaxSizeExceeded if too large.""" - - def __init__(self, rfile, maxlen): - self.rfile = rfile - self.maxlen = maxlen - self.bytes_read = 0 - - def _check_length(self): - if self.maxlen and self.bytes_read > self.maxlen: - raise MaxSizeExceeded() - - def read(self, size=None): - data = self.rfile.read(size) - self.bytes_read += len(data) - self._check_length() - return data - - def readline(self, size=None): - if size is not None: - data = self.rfile.readline(size) - self.bytes_read += len(data) - self._check_length() - return data - - # User didn't specify a size ... - # We read the line in chunks to make sure it's not a 100MB line ! - res = [] - while True: - data = self.rfile.readline(256) - self.bytes_read += len(data) - self._check_length() - res.append(data) - # See https://bitbucket.org/cherrypy/cherrypy/issue/421 - if len(data) < 256 or data[-1:] == LF: - return EMPTY.join(res) - - def readlines(self, sizehint=0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - - def close(self): - self.rfile.close() - - def __iter__(self): - return self - - def __next__(self): - data = next(self.rfile) - self.bytes_read += len(data) - self._check_length() - return data - - def next(self): - data = self.rfile.next() - self.bytes_read += len(data) - self._check_length() - return data - - -class KnownLengthRFile(object): - - """Wraps a file-like object, returning an empty string when exhausted.""" - - def __init__(self, rfile, content_length): - self.rfile = rfile - self.remaining = content_length - - def read(self, size=None): - if self.remaining == 0: - return '' - if size is None: - size = self.remaining - else: - size = min(size, self.remaining) - - data = self.rfile.read(size) - self.remaining -= len(data) - return data - - def readline(self, size=None): - if self.remaining == 0: - return '' - if size is None: - size = self.remaining - else: - size = min(size, self.remaining) - - data = self.rfile.readline(size) - self.remaining -= len(data) - return data - - def readlines(self, sizehint=0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline(sizehint) - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline(sizehint) - return lines - - def close(self): - self.rfile.close() - - def __iter__(self): - return self - - def __next__(self): - data = next(self.rfile) - self.remaining -= len(data) - return data - - -class ChunkedRFile(object): - - """Wraps a file-like object, returning an empty string when exhausted. - - This class is intended to provide a conforming wsgi.input value for - request entities that have been encoded with the 'chunked' transfer - encoding. - """ - - def __init__(self, rfile, maxlen, bufsize=8192): - self.rfile = rfile - self.maxlen = maxlen - self.bytes_read = 0 - self.buffer = EMPTY - self.bufsize = bufsize - self.closed = False - - def _fetch(self): - if self.closed: - return - - line = self.rfile.readline() - self.bytes_read += len(line) - - if self.maxlen and self.bytes_read > self.maxlen: - raise MaxSizeExceeded("Request Entity Too Large", self.maxlen) - - line = line.strip().split(SEMICOLON, 1) - - try: - chunk_size = line.pop(0) - chunk_size = int(chunk_size, 16) - except ValueError: - raise ValueError("Bad chunked transfer size: " + repr(chunk_size)) - - if chunk_size <= 0: - self.closed = True - return - -## if line: chunk_extension = line[0] - - if self.maxlen and self.bytes_read + chunk_size > self.maxlen: - raise IOError("Request Entity Too Large") - - chunk = self.rfile.read(chunk_size) - self.bytes_read += len(chunk) - self.buffer += chunk - - crlf = self.rfile.read(2) - if crlf != CRLF: - raise ValueError( - "Bad chunked transfer coding (expected '\\r\\n', " - "got " + repr(crlf) + ")") - - def read(self, size=None): - data = EMPTY - while True: - if size and len(data) >= size: - return data - - if not self.buffer: - self._fetch() - if not self.buffer: - # EOF - return data - - if size: - remaining = size - len(data) - data += self.buffer[:remaining] - self.buffer = self.buffer[remaining:] - else: - data += self.buffer - - def readline(self, size=None): - data = EMPTY - while True: - if size and len(data) >= size: - return data - - if not self.buffer: - self._fetch() - if not self.buffer: - # EOF - return data - - newline_pos = self.buffer.find(LF) - if size: - if newline_pos == -1: - remaining = size - len(data) - data += self.buffer[:remaining] - self.buffer = self.buffer[remaining:] - else: - remaining = min(size - len(data), newline_pos) - data += self.buffer[:remaining] - self.buffer = self.buffer[remaining:] - else: - if newline_pos == -1: - data += self.buffer - else: - data += self.buffer[:newline_pos] - self.buffer = self.buffer[newline_pos:] - - def readlines(self, sizehint=0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline(sizehint) - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline(sizehint) - return lines - - def read_trailer_lines(self): - if not self.closed: - raise ValueError( - "Cannot read trailers until the request body has been read.") - - while True: - line = self.rfile.readline() - if not line: - # No more data--illegal end of headers - raise ValueError("Illegal end of headers.") - - self.bytes_read += len(line) - if self.maxlen and self.bytes_read > self.maxlen: - raise IOError("Request Entity Too Large") - - if line == CRLF: - # Normal end of headers - break - if not line.endswith(CRLF): - raise ValueError("HTTP requires CRLF terminators") - - yield line - - def close(self): - self.rfile.close() - - def __iter__(self): - # Shamelessly stolen from StringIO - total = 0 - line = self.readline(sizehint) - while line: - yield line - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline(sizehint) - - -class HTTPRequest(object): - - """An HTTP Request (and response). - - A single HTTP connection may consist of multiple request/response pairs. - """ - - server = None - """The HTTPServer object which is receiving this request.""" - - conn = None - """The HTTPConnection object on which this request connected.""" - - inheaders = {} - """A dict of request headers.""" - - outheaders = [] - """A list of header tuples to write in the response.""" - - ready = False - """When True, the request has been parsed and is ready to begin generating - the response. When False, signals the calling Connection that the response - should not be generated and the connection should close.""" - - close_connection = False - """Signals the calling Connection that the request should close. This does - not imply an error! The client and/or server may each request that the - connection be closed.""" - - chunked_write = False - """If True, output will be encoded with the "chunked" transfer-coding. - - This value is set automatically inside send_headers.""" - - def __init__(self, server, conn): - self.server = server - self.conn = conn - - self.ready = False - self.started_request = False - self.scheme = ntob("http") - if self.server.ssl_adapter is not None: - self.scheme = ntob("https") - # Use the lowest-common protocol in case read_request_line errors. - self.response_protocol = 'HTTP/1.0' - self.inheaders = {} - - self.status = "" - self.outheaders = [] - self.sent_headers = False - self.close_connection = self.__class__.close_connection - self.chunked_read = False - self.chunked_write = self.__class__.chunked_write - - def parse_request(self): - """Parse the next HTTP request start-line and message-headers.""" - self.rfile = SizeCheckWrapper(self.conn.rfile, - self.server.max_request_header_size) - try: - success = self.read_request_line() - except MaxSizeExceeded: - self.simple_response( - "414 Request-URI Too Long", - "The Request-URI sent with the request exceeds the maximum " - "allowed bytes.") - return - else: - if not success: - return - - try: - success = self.read_request_headers() - except MaxSizeExceeded: - self.simple_response( - "413 Request Entity Too Large", - "The headers sent with the request exceed the maximum " - "allowed bytes.") - return - else: - if not success: - return - - self.ready = True - - def read_request_line(self): - # HTTP/1.1 connections are persistent by default. If a client - # requests a page, then idles (leaves the connection open), - # then rfile.readline() will raise socket.error("timed out"). - # Note that it does this based on the value given to settimeout(), - # and doesn't need the client to request or acknowledge the close - # (although your TCP stack might suffer for it: cf Apache's history - # with FIN_WAIT_2). - request_line = self.rfile.readline() - - # Set started_request to True so communicate() knows to send 408 - # from here on out. - self.started_request = True - if not request_line: - return False - - if request_line == CRLF: - # RFC 2616 sec 4.1: "...if the server is reading the protocol - # stream at the beginning of a message and receives a CRLF - # first, it should ignore the CRLF." - # But only ignore one leading line! else we enable a DoS. - request_line = self.rfile.readline() - if not request_line: - return False - - if not request_line.endswith(CRLF): - self.simple_response( - "400 Bad Request", "HTTP requires CRLF terminators") - return False - - try: - method, uri, req_protocol = request_line.strip().split(SPACE, 2) - rp = int(req_protocol[5]), int(req_protocol[7]) - except (ValueError, IndexError): - self.simple_response("400 Bad Request", "Malformed Request-Line") - return False - - self.uri = uri - self.method = method - - # uri may be an abs_path (including "http://host.domain.tld"); - scheme, authority, path = self.parse_request_uri(uri) - if NUMBER_SIGN in path: - self.simple_response("400 Bad Request", - "Illegal #fragment in Request-URI.") - return False - - if scheme: - self.scheme = scheme - - qs = EMPTY - if QUESTION_MARK in path: - path, qs = path.split(QUESTION_MARK, 1) - - # Unquote the path+params (e.g. "/this%20path" -> "/this path"). - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 - # - # But note that "...a URI must be separated into its components - # before the escaped characters within those components can be - # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 - # Therefore, "/this%2Fpath" becomes "/this%2Fpath", not "/this/path". - try: - atoms = [unquote(x) for x in quoted_slash.split(path)] - except ValueError: - ex = sys.exc_info()[1] - self.simple_response("400 Bad Request", ex.args[0]) - return False - path = "%2F".join(atoms) - self.path = path - - # Note that, like wsgiref and most other HTTP servers, - # we "% HEX HEX"-unquote the path but not the query string. - self.qs = qs - - # Compare request and server HTTP protocol versions, in case our - # server does not support the requested protocol. Limit our output - # to min(req, server). We want the following output: - # request server actual written supported response - # protocol protocol response protocol feature set - # a 1.0 1.0 1.0 1.0 - # b 1.0 1.1 1.1 1.0 - # c 1.1 1.0 1.0 1.0 - # d 1.1 1.1 1.1 1.1 - # Notice that, in (b), the response will be "HTTP/1.1" even though - # the client only understands 1.0. RFC 2616 10.5.6 says we should - # only return 505 if the _major_ version is different. - sp = int(self.server.protocol[5]), int(self.server.protocol[7]) - - if sp[0] != rp[0]: - self.simple_response("505 HTTP Version Not Supported") - return False - - self.request_protocol = req_protocol - self.response_protocol = "HTTP/%s.%s" % min(rp, sp) - - return True - - def read_request_headers(self): - """Read self.rfile into self.inheaders. Return success.""" - - # then all the http headers - try: - read_headers(self.rfile, self.inheaders) - except ValueError: - ex = sys.exc_info()[1] - self.simple_response("400 Bad Request", ex.args[0]) - return False - - mrbs = self.server.max_request_body_size - if mrbs and int(self.inheaders.get("Content-Length", 0)) > mrbs: - self.simple_response( - "413 Request Entity Too Large", - "The entity sent with the request exceeds the maximum " - "allowed bytes.") - return False - - # Persistent connection support - if self.response_protocol == "HTTP/1.1": - # Both server and client are HTTP/1.1 - if self.inheaders.get("Connection", "") == "close": - self.close_connection = True - else: - # Either the server or client (or both) are HTTP/1.0 - if self.inheaders.get("Connection", "") != "Keep-Alive": - self.close_connection = True - - # Transfer-Encoding support - te = None - if self.response_protocol == "HTTP/1.1": - te = self.inheaders.get("Transfer-Encoding") - if te: - te = [x.strip().lower() for x in te.split(",") if x.strip()] - - self.chunked_read = False - - if te: - for enc in te: - if enc == "chunked": - self.chunked_read = True - else: - # Note that, even if we see "chunked", we must reject - # if there is an extension we don't recognize. - self.simple_response("501 Unimplemented") - self.close_connection = True - return False - - # From PEP 333: - # "Servers and gateways that implement HTTP 1.1 must provide - # transparent support for HTTP 1.1's "expect/continue" mechanism. - # This may be done in any of several ways: - # 1. Respond to requests containing an Expect: 100-continue request - # with an immediate "100 Continue" response, and proceed normally. - # 2. Proceed with the request normally, but provide the application - # with a wsgi.input stream that will send the "100 Continue" - # response if/when the application first attempts to read from - # the input stream. The read request must then remain blocked - # until the client responds. - # 3. Wait until the client decides that the server does not support - # expect/continue, and sends the request body on its own. - # (This is suboptimal, and is not recommended.) - # - # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, - # but it seems like it would be a big slowdown for such a rare case. - if self.inheaders.get("Expect", "") == "100-continue": - # Don't use simple_response here, because it emits headers - # we don't want. See - # https://bitbucket.org/cherrypy/cherrypy/issue/951 - msg = self.server.protocol + " 100 Continue\r\n\r\n" - try: - self.conn.wfile.sendall(msg) - except socket.error: - x = sys.exc_info()[1] - if x.args[0] not in socket_errors_to_ignore: - raise - return True - - def parse_request_uri(self, uri): - """Parse a Request-URI into (scheme, authority, path). - - Note that Request-URI's must be one of:: - - Request-URI = "*" | absoluteURI | abs_path | authority - - Therefore, a Request-URI which starts with a double forward-slash - cannot be a "net_path":: - - net_path = "//" authority [ abs_path ] - - Instead, it must be interpreted as an "abs_path" with an empty first - path segment:: - - abs_path = "/" path_segments - path_segments = segment *( "/" segment ) - segment = *pchar *( ";" param ) - param = *pchar - """ - if uri == ASTERISK: - return None, None, uri - - i = uri.find('://') - if i > 0 and QUESTION_MARK not in uri[:i]: - # An absoluteURI. - # If there's a scheme (and it must be http or https), then: - # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query - # ]] - scheme, remainder = uri[:i].lower(), uri[i + 3:] - authority, path = remainder.split(FORWARD_SLASH, 1) - path = FORWARD_SLASH + path - return scheme, authority, path - - if uri.startswith(FORWARD_SLASH): - # An abs_path. - return None, None, uri - else: - # An authority. - return None, uri, None - - def respond(self): - """Call the gateway and write its iterable output.""" - mrbs = self.server.max_request_body_size - if self.chunked_read: - self.rfile = ChunkedRFile(self.conn.rfile, mrbs) - else: - cl = int(self.inheaders.get("Content-Length", 0)) - if mrbs and mrbs < cl: - if not self.sent_headers: - self.simple_response( - "413 Request Entity Too Large", - "The entity sent with the request exceeds the maximum " - "allowed bytes.") - return - self.rfile = KnownLengthRFile(self.conn.rfile, cl) - - self.server.gateway(self).respond() - - if (self.ready and not self.sent_headers): - self.sent_headers = True - self.send_headers() - if self.chunked_write: - self.conn.wfile.sendall("0\r\n\r\n") - - def simple_response(self, status, msg=""): - """Write a simple response back to the client.""" - status = str(status) - buf = [self.server.protocol + SPACE + - status + CRLF, - "Content-Length: %s\r\n" % len(msg), - "Content-Type: text/plain\r\n"] - - if status[:3] in ("413", "414"): - # Request Entity Too Large / Request-URI Too Long - self.close_connection = True - if self.response_protocol == 'HTTP/1.1': - # This will not be true for 414, since read_request_line - # usually raises 414 before reading the whole line, and we - # therefore cannot know the proper response_protocol. - buf.append("Connection: close\r\n") - else: - # HTTP/1.0 had no 413/414 status nor Connection header. - # Emit 400 instead and trust the message body is enough. - status = "400 Bad Request" - - buf.append(CRLF) - if msg: - if isinstance(msg, unicodestr): - msg = msg.encode("ISO-8859-1") - buf.append(msg) - - try: - self.conn.wfile.sendall("".join(buf)) - except socket.error: - x = sys.exc_info()[1] - if x.args[0] not in socket_errors_to_ignore: - raise - - def write(self, chunk): - """Write unbuffered data to the client.""" - if self.chunked_write and chunk: - buf = [hex(len(chunk))[2:], CRLF, chunk, CRLF] - self.conn.wfile.sendall(EMPTY.join(buf)) - else: - self.conn.wfile.sendall(chunk) - - def send_headers(self): - """Assert, process, and send the HTTP response message-headers. - - You must set self.status, and self.outheaders before calling this. - """ - hkeys = [key.lower() for key, value in self.outheaders] - status = int(self.status[:3]) - - if status == 413: - # Request Entity Too Large. Close conn to avoid garbage. - self.close_connection = True - elif "content-length" not in hkeys: - # "All 1xx (informational), 204 (no content), - # and 304 (not modified) responses MUST NOT - # include a message-body." So no point chunking. - if status < 200 or status in (204, 205, 304): - pass - else: - if (self.response_protocol == 'HTTP/1.1' - and self.method != 'HEAD'): - # Use the chunked transfer-coding - self.chunked_write = True - self.outheaders.append(("Transfer-Encoding", "chunked")) - else: - # Closing the conn is the only way to determine len. - self.close_connection = True - - if "connection" not in hkeys: - if self.response_protocol == 'HTTP/1.1': - # Both server and client are HTTP/1.1 or better - if self.close_connection: - self.outheaders.append(("Connection", "close")) - else: - # Server and/or client are HTTP/1.0 - if not self.close_connection: - self.outheaders.append(("Connection", "Keep-Alive")) - - if (not self.close_connection) and (not self.chunked_read): - # Read any remaining request body data on the socket. - # "If an origin server receives a request that does not include an - # Expect request-header field with the "100-continue" expectation, - # the request includes a request body, and the server responds - # with a final status code before reading the entire request body - # from the transport connection, then the server SHOULD NOT close - # the transport connection until it has read the entire request, - # or until the client closes the connection. Otherwise, the client - # might not reliably receive the response message. However, this - # requirement is not be construed as preventing a server from - # defending itself against denial-of-service attacks, or from - # badly broken client implementations." - remaining = getattr(self.rfile, 'remaining', 0) - if remaining > 0: - self.rfile.read(remaining) - - if "date" not in hkeys: - self.outheaders.append(("Date", rfc822.formatdate())) - - if "server" not in hkeys: - self.outheaders.append(("Server", self.server.server_name)) - - buf = [self.server.protocol + SPACE + self.status + CRLF] - for k, v in self.outheaders: - buf.append(k + COLON + SPACE + v + CRLF) - buf.append(CRLF) - self.conn.wfile.sendall(EMPTY.join(buf)) - - -class NoSSLError(Exception): - - """Exception raised when a client speaks HTTP to an HTTPS socket.""" - pass - - -class FatalSSLAlert(Exception): - - """Exception raised when the SSL implementation signals a fatal alert.""" - pass - - -class CP_fileobject(socket._fileobject): - - """Faux file object attached to a socket object.""" - - def __init__(self, *args, **kwargs): - self.bytes_read = 0 - self.bytes_written = 0 - socket._fileobject.__init__(self, *args, **kwargs) - - def sendall(self, data): - """Sendall for non-blocking sockets.""" - while data: - try: - bytes_sent = self.send(data) - data = data[bytes_sent:] - except socket.error, e: - if e.args[0] not in socket_errors_nonblocking: - raise - - def send(self, data): - bytes_sent = self._sock.send(data) - self.bytes_written += bytes_sent - return bytes_sent - - def flush(self): - if self._wbuf: - buffer = "".join(self._wbuf) - self._wbuf = [] - self.sendall(buffer) - - def recv(self, size): - while True: - try: - data = self._sock.recv(size) - self.bytes_read += len(data) - return data - except socket.error, e: - if (e.args[0] not in socket_errors_nonblocking - and e.args[0] not in socket_error_eintr): - raise - - if not _fileobject_uses_str_type: - def read(self, size=-1): - # Use max, disallow tiny reads in a loop as they are very - # inefficient. - # We never leave read() with any leftover data from a new recv() - # call in our internal buffer. - rbufsize = max(self._rbufsize, self.default_bufsize) - # Our use of StringIO rather than lists of string objects returned - # by recv() minimizes memory usage and fragmentation that occurs - # when rbufsize is large compared to the typical return value of - # recv(). - buf = self._rbuf - buf.seek(0, 2) # seek end - if size < 0: - # Read until EOF - # reset _rbuf. we consume it via buf. - self._rbuf = StringIO.StringIO() - while True: - data = self.recv(rbufsize) - if not data: - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or EOF seen, whichever comes first - buf_len = buf.tell() - if buf_len >= size: - # Already have size bytes in our buffer? Extract and - # return. - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return rv - - # reset _rbuf. we consume it via buf. - self._rbuf = StringIO.StringIO() - while True: - left = size - buf_len - # recv() will malloc the amount of memory given as its - # parameter even though it often returns much less data - # than that. The returned data string is short lived - # as we copy it into a StringIO and free it. This avoids - # fragmentation issues on many platforms. - data = self.recv(left) - if not data: - break - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid buffer data copies when: - # - We have no data in our buffer. - # AND - # - Our call to recv returned exactly the - # number of bytes we were asked to read. - return data - if n == left: - buf.write(data) - del data # explicit free - break - assert n <= left, "recv(%d) returned %d bytes" % (left, n) - buf.write(data) - buf_len += n - del data # explicit free - #assert buf_len == buf.tell() - return buf.getvalue() - - def readline(self, size=-1): - buf = self._rbuf - buf.seek(0, 2) # seek end - if buf.tell() > 0: - # check if we already have it in our buffer - buf.seek(0) - bline = buf.readline(size) - if bline.endswith('\n') or len(bline) == size: - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return bline - del bline - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - buf.seek(0) - buffers = [buf.read()] - # reset _rbuf. we consume it via buf. - self._rbuf = StringIO.StringIO() - data = None - recv = self.recv - while data != "\n": - data = recv(1) - if not data: - break - buffers.append(data) - return "".join(buffers) - - buf.seek(0, 2) # seek end - # reset _rbuf. we consume it via buf. - self._rbuf = StringIO.StringIO() - while True: - data = self.recv(self._rbufsize) - if not data: - break - nl = data.find('\n') - if nl >= 0: - nl += 1 - buf.write(data[:nl]) - self._rbuf.write(data[nl:]) - del data - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or \n or EOF seen, whichever comes - # first - buf.seek(0, 2) # seek end - buf_len = buf.tell() - if buf_len >= size: - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO.StringIO() - self._rbuf.write(buf.read()) - return rv - # reset _rbuf. we consume it via buf. - self._rbuf = StringIO.StringIO() - while True: - data = self.recv(self._rbufsize) - if not data: - break - left = size - buf_len - # did we just receive a newline? - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - # save the excess data to _rbuf - self._rbuf.write(data[nl:]) - if buf_len: - buf.write(data[:nl]) - break - else: - # Shortcut. Avoid data copy through buf when - # returning a substring of our first recv(). - return data[:nl] - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid data copy through buf when - # returning exactly all of our first recv(). - return data - if n >= left: - buf.write(data[:left]) - self._rbuf.write(data[left:]) - break - buf.write(data) - buf_len += n - #assert buf_len == buf.tell() - return buf.getvalue() - else: - def read(self, size=-1): - if size < 0: - # Read until EOF - buffers = [self._rbuf] - self._rbuf = "" - if self._rbufsize <= 1: - recv_size = self.default_bufsize - else: - recv_size = self._rbufsize - - while True: - data = self.recv(recv_size) - if not data: - break - buffers.append(data) - return "".join(buffers) - else: - # Read until size bytes or EOF seen, whichever comes first - data = self._rbuf - buf_len = len(data) - if buf_len >= size: - self._rbuf = data[size:] - return data[:size] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - left = size - buf_len - recv_size = max(self._rbufsize, left) - data = self.recv(recv_size) - if not data: - break - buffers.append(data) - n = len(data) - if n >= left: - self._rbuf = data[left:] - buffers[-1] = data[:left] - break - buf_len += n - return "".join(buffers) - - def readline(self, size=-1): - data = self._rbuf - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - assert data == "" - buffers = [] - while data != "\n": - data = self.recv(1) - if not data: - break - buffers.append(data) - return "".join(buffers) - nl = data.find('\n') - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - return data[:nl] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - data = self.recv(self._rbufsize) - if not data: - break - buffers.append(data) - nl = data.find('\n') - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - buffers[-1] = data[:nl] - break - return "".join(buffers) - else: - # Read until size bytes or \n or EOF seen, whichever comes - # first - nl = data.find('\n', 0, size) - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - return data[:nl] - buf_len = len(data) - if buf_len >= size: - self._rbuf = data[size:] - return data[:size] - buffers = [] - if data: - buffers.append(data) - self._rbuf = "" - while True: - data = self.recv(self._rbufsize) - if not data: - break - buffers.append(data) - left = size - buf_len - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - self._rbuf = data[nl:] - buffers[-1] = data[:nl] - break - n = len(data) - if n >= left: - self._rbuf = data[left:] - buffers[-1] = data[:left] - break - buf_len += n - return "".join(buffers) - - -class HTTPConnection(object): - - """An HTTP connection (active socket). - - server: the Server object which received this connection. - socket: the raw socket object (usually TCP) for this connection. - makefile: a fileobject class for reading from the socket. - """ - - remote_addr = None - remote_port = None - ssl_env = None - rbufsize = DEFAULT_BUFFER_SIZE - wbufsize = DEFAULT_BUFFER_SIZE - RequestHandlerClass = HTTPRequest - - def __init__(self, server, sock, makefile=CP_fileobject): - self.server = server - self.socket = sock - self.rfile = makefile(sock, "rb", self.rbufsize) - self.wfile = makefile(sock, "wb", self.wbufsize) - self.requests_seen = 0 - - def communicate(self): - """Read each request and respond appropriately.""" - request_seen = False - try: - while True: - # (re)set req to None so that if something goes wrong in - # the RequestHandlerClass constructor, the error doesn't - # get written to the previous request. - req = None - req = self.RequestHandlerClass(self.server, self) - - # This order of operations should guarantee correct pipelining. - req.parse_request() - if self.server.stats['Enabled']: - self.requests_seen += 1 - if not req.ready: - # Something went wrong in the parsing (and the server has - # probably already made a simple_response). Return and - # let the conn close. - return - - request_seen = True - req.respond() - if req.close_connection: - return - except socket.error: - e = sys.exc_info()[1] - errnum = e.args[0] - # sadly SSL sockets return a different (longer) time out string - if ( - errnum == 'timed out' or - errnum == 'The read operation timed out' - ): - # Don't error if we're between requests; only error - # if 1) no request has been started at all, or 2) we're - # in the middle of a request. - # See https://bitbucket.org/cherrypy/cherrypy/issue/853 - if (not request_seen) or (req and req.started_request): - # Don't bother writing the 408 if the response - # has already started being written. - if req and not req.sent_headers: - try: - req.simple_response("408 Request Timeout") - except FatalSSLAlert: - # Close the connection. - return - elif errnum not in socket_errors_to_ignore: - self.server.error_log("socket.error %s" % repr(errnum), - level=logging.WARNING, traceback=True) - if req and not req.sent_headers: - try: - req.simple_response("500 Internal Server Error") - except FatalSSLAlert: - # Close the connection. - return - return - except (KeyboardInterrupt, SystemExit): - raise - except FatalSSLAlert: - # Close the connection. - return - except NoSSLError: - if req and not req.sent_headers: - # Unwrap our wfile - self.wfile = CP_fileobject( - self.socket._sock, "wb", self.wbufsize) - req.simple_response( - "400 Bad Request", - "The client sent a plain HTTP request, but " - "this server only speaks HTTPS on this port.") - self.linger = True - except Exception: - e = sys.exc_info()[1] - self.server.error_log(repr(e), level=logging.ERROR, traceback=True) - if req and not req.sent_headers: - try: - req.simple_response("500 Internal Server Error") - except FatalSSLAlert: - # Close the connection. - return - - linger = False - - def close(self): - """Close the socket underlying this connection.""" - self.rfile.close() - - if not self.linger: - # Python's socket module does NOT call close on the kernel - # socket when you call socket.close(). We do so manually here - # because we want this server to send a FIN TCP segment - # immediately. Note this must be called *before* calling - # socket.close(), because the latter drops its reference to - # the kernel socket. - if hasattr(self.socket, '_sock'): - self.socket._sock.close() - self.socket.close() - else: - # On the other hand, sometimes we want to hang around for a bit - # to make sure the client has a chance to read our entire - # response. Skipping the close() calls here delays the FIN - # packet until the socket object is garbage-collected later. - # Someday, perhaps, we'll do the full lingering_close that - # Apache does, but not today. - pass - - -class TrueyZero(object): - - """An object which equals and does math like the integer 0 but evals True. - """ - - def __add__(self, other): - return other - - def __radd__(self, other): - return other -trueyzero = TrueyZero() - - -_SHUTDOWNREQUEST = None - - -class WorkerThread(threading.Thread): - - """Thread which continuously polls a Queue for Connection objects. - - Due to the timing issues of polling a Queue, a WorkerThread does not - check its own 'ready' flag after it has started. To stop the thread, - it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue - (one for each running WorkerThread). - """ - - conn = None - """The current connection pulled off the Queue, or None.""" - - server = None - """The HTTP Server which spawned this thread, and which owns the - Queue and is placing active connections into it.""" - - ready = False - """A simple flag for the calling server to know when this thread - has begun polling the Queue.""" - - def __init__(self, server): - self.ready = False - self.server = server - - self.requests_seen = 0 - self.bytes_read = 0 - self.bytes_written = 0 - self.start_time = None - self.work_time = 0 - self.stats = { - 'Requests': lambda s: self.requests_seen + ( - (self.start_time is None) and - trueyzero or - self.conn.requests_seen - ), - 'Bytes Read': lambda s: self.bytes_read + ( - (self.start_time is None) and - trueyzero or - self.conn.rfile.bytes_read - ), - 'Bytes Written': lambda s: self.bytes_written + ( - (self.start_time is None) and - trueyzero or - self.conn.wfile.bytes_written - ), - 'Work Time': lambda s: self.work_time + ( - (self.start_time is None) and - trueyzero or - time.time() - self.start_time - ), - 'Read Throughput': lambda s: s['Bytes Read'](s) / ( - s['Work Time'](s) or 1e-6), - 'Write Throughput': lambda s: s['Bytes Written'](s) / ( - s['Work Time'](s) or 1e-6), - } - threading.Thread.__init__(self) - - def run(self): - self.server.stats['Worker Threads'][self.getName()] = self.stats - try: - self.ready = True - while True: - conn = self.server.requests.get() - if conn is _SHUTDOWNREQUEST: - return - - self.conn = conn - if self.server.stats['Enabled']: - self.start_time = time.time() - try: - conn.communicate() - finally: - conn.close() - if self.server.stats['Enabled']: - self.requests_seen += self.conn.requests_seen - self.bytes_read += self.conn.rfile.bytes_read - self.bytes_written += self.conn.wfile.bytes_written - self.work_time += time.time() - self.start_time - self.start_time = None - self.conn = None - except (KeyboardInterrupt, SystemExit): - exc = sys.exc_info()[1] - self.server.interrupt = exc - - -class ThreadPool(object): - - """A Request Queue for an HTTPServer which pools threads. - - ThreadPool objects must provide min, get(), put(obj), start() - and stop(timeout) attributes. - """ - - def __init__(self, server, min=10, max=-1, - accepted_queue_size=-1, accepted_queue_timeout=10): - self.server = server - self.min = min - self.max = max - self._threads = [] - self._queue = queue.Queue(maxsize=accepted_queue_size) - self._queue_put_timeout = accepted_queue_timeout - self.get = self._queue.get - - def start(self): - """Start the pool of threads.""" - for i in range(self.min): - self._threads.append(WorkerThread(self.server)) - for worker in self._threads: - worker.setName("CP Server " + worker.getName()) - worker.start() - for worker in self._threads: - while not worker.ready: - time.sleep(.1) - - def _get_idle(self): - """Number of worker threads which are idle. Read-only.""" - return len([t for t in self._threads if t.conn is None]) - idle = property(_get_idle, doc=_get_idle.__doc__) - - def put(self, obj): - self._queue.put(obj, block=True, timeout=self._queue_put_timeout) - if obj is _SHUTDOWNREQUEST: - return - - def grow(self, amount): - """Spawn new worker threads (not above self.max).""" - if self.max > 0: - budget = max(self.max - len(self._threads), 0) - else: - # self.max <= 0 indicates no maximum - budget = float('inf') - - n_new = min(amount, budget) - - workers = [self._spawn_worker() for i in range(n_new)] - while not self._all(operator.attrgetter('ready'), workers): - time.sleep(.1) - self._threads.extend(workers) - - def _spawn_worker(self): - worker = WorkerThread(self.server) - worker.setName("CP Server " + worker.getName()) - worker.start() - return worker - - def _all(func, items): - results = [func(item) for item in items] - return reduce(operator.and_, results, True) - _all = staticmethod(_all) - - def shrink(self, amount): - """Kill off worker threads (not below self.min).""" - # Grow/shrink the pool if necessary. - # Remove any dead threads from our list - for t in self._threads: - if not t.isAlive(): - self._threads.remove(t) - amount -= 1 - - # calculate the number of threads above the minimum - n_extra = max(len(self._threads) - self.min, 0) - - # don't remove more than amount - n_to_remove = min(amount, n_extra) - - # put shutdown requests on the queue equal to the number of threads - # to remove. As each request is processed by a worker, that worker - # will terminate and be culled from the list. - for n in range(n_to_remove): - self._queue.put(_SHUTDOWNREQUEST) - - def stop(self, timeout=5): - # Must shut down threads here so the code that calls - # this method can know when all threads are stopped. - for worker in self._threads: - self._queue.put(_SHUTDOWNREQUEST) - - # Don't join currentThread (when stop is called inside a request). - current = threading.currentThread() - if timeout and timeout >= 0: - endtime = time.time() + timeout - while self._threads: - worker = self._threads.pop() - if worker is not current and worker.isAlive(): - try: - if timeout is None or timeout < 0: - worker.join() - else: - remaining_time = endtime - time.time() - if remaining_time > 0: - worker.join(remaining_time) - if worker.isAlive(): - # We exhausted the timeout. - # Forcibly shut down the socket. - c = worker.conn - if c and not c.rfile.closed: - try: - c.socket.shutdown(socket.SHUT_RD) - except TypeError: - # pyOpenSSL sockets don't take an arg - c.socket.shutdown() - worker.join() - except (AssertionError, - # Ignore repeated Ctrl-C. - # See - # https://bitbucket.org/cherrypy/cherrypy/issue/691. - KeyboardInterrupt): - pass - - def _get_qsize(self): - return self._queue.qsize() - qsize = property(_get_qsize) - - -try: - import fcntl -except ImportError: - try: - from ctypes import windll, WinError - import ctypes.wintypes - _SetHandleInformation = windll.kernel32.SetHandleInformation - _SetHandleInformation.argtypes = [ - ctypes.wintypes.HANDLE, - ctypes.wintypes.DWORD, - ctypes.wintypes.DWORD, - ] - _SetHandleInformation.restype = ctypes.wintypes.BOOL - except ImportError: - def prevent_socket_inheritance(sock): - """Dummy function, since neither fcntl nor ctypes are available.""" - pass - else: - def prevent_socket_inheritance(sock): - """Mark the given socket fd as non-inheritable (Windows).""" - if not _SetHandleInformation(sock.fileno(), 1, 0): - raise WinError() -else: - def prevent_socket_inheritance(sock): - """Mark the given socket fd as non-inheritable (POSIX).""" - fd = sock.fileno() - old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) - fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) - - -class SSLAdapter(object): - - """Base class for SSL driver library adapters. - - Required methods: - - * ``wrap(sock) -> (wrapped socket, ssl environ dict)`` - * ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) -> - socket file object`` - """ - - def __init__(self, certificate, private_key, certificate_chain=None): - self.certificate = certificate - self.private_key = private_key - self.certificate_chain = certificate_chain - - def wrap(self, sock): - raise NotImplemented - - def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): - raise NotImplemented - - -class HTTPServer(object): - - """An HTTP server.""" - - _bind_addr = "127.0.0.1" - _interrupt = None - - gateway = None - """A Gateway instance.""" - - minthreads = None - """The minimum number of worker threads to create (default 10).""" - - maxthreads = None - """The maximum number of worker threads to create (default -1 = no limit). - """ - - server_name = None - """The name of the server; defaults to socket.gethostname().""" - - protocol = "HTTP/1.1" - """The version string to write in the Status-Line of all HTTP responses. - - For example, "HTTP/1.1" is the default. This also limits the supported - features used in the response.""" - - request_queue_size = 5 - """The 'backlog' arg to socket.listen(); max queued connections - (default 5). - """ - - shutdown_timeout = 5 - """The total time, in seconds, to wait for worker threads to cleanly exit. - """ - - timeout = 10 - """The timeout in seconds for accepted connections (default 10).""" - - version = "CherryPy/3.6.0" - """A version string for the HTTPServer.""" - - software = None - """The value to set for the SERVER_SOFTWARE entry in the WSGI environ. - - If None, this defaults to ``'%s Server' % self.version``.""" - - ready = False - """An internal flag which marks whether the socket is accepting connections - """ - - max_request_header_size = 0 - """The maximum size, in bytes, for request headers, or 0 for no limit.""" - - max_request_body_size = 0 - """The maximum size, in bytes, for request bodies, or 0 for no limit.""" - - nodelay = True - """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" - - ConnectionClass = HTTPConnection - """The class to use for handling HTTP connections.""" - - ssl_adapter = None - """An instance of SSLAdapter (or a subclass). - - You must have the corresponding SSL driver library installed.""" - - def __init__(self, bind_addr, gateway, minthreads=10, maxthreads=-1, - server_name=None): - self.bind_addr = bind_addr - self.gateway = gateway - - self.requests = ThreadPool(self, min=minthreads or 1, max=maxthreads) - - if not server_name: - server_name = socket.gethostname() - self.server_name = server_name - self.clear_stats() - - def clear_stats(self): - self._start_time = None - self._run_time = 0 - self.stats = { - 'Enabled': False, - 'Bind Address': lambda s: repr(self.bind_addr), - 'Run time': lambda s: (not s['Enabled']) and -1 or self.runtime(), - 'Accepts': 0, - 'Accepts/sec': lambda s: s['Accepts'] / self.runtime(), - 'Queue': lambda s: getattr(self.requests, "qsize", None), - 'Threads': lambda s: len(getattr(self.requests, "_threads", [])), - 'Threads Idle': lambda s: getattr(self.requests, "idle", None), - 'Socket Errors': 0, - 'Requests': lambda s: (not s['Enabled']) and -1 or sum( - [w['Requests'](w) for w in s['Worker Threads'].values()], 0), - 'Bytes Read': lambda s: (not s['Enabled']) and -1 or sum( - [w['Bytes Read'](w) for w in s['Worker Threads'].values()], 0), - 'Bytes Written': lambda s: (not s['Enabled']) and -1 or sum( - [w['Bytes Written'](w) for w in s['Worker Threads'].values()], - 0), - 'Work Time': lambda s: (not s['Enabled']) and -1 or sum( - [w['Work Time'](w) for w in s['Worker Threads'].values()], 0), - 'Read Throughput': lambda s: (not s['Enabled']) and -1 or sum( - [w['Bytes Read'](w) / (w['Work Time'](w) or 1e-6) - for w in s['Worker Threads'].values()], 0), - 'Write Throughput': lambda s: (not s['Enabled']) and -1 or sum( - [w['Bytes Written'](w) / (w['Work Time'](w) or 1e-6) - for w in s['Worker Threads'].values()], 0), - 'Worker Threads': {}, - } - logging.statistics["CherryPy HTTPServer %d" % id(self)] = self.stats - - def runtime(self): - if self._start_time is None: - return self._run_time - else: - return self._run_time + (time.time() - self._start_time) - - def __str__(self): - return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, - self.bind_addr) - - def _get_bind_addr(self): - return self._bind_addr - - def _set_bind_addr(self, value): - if isinstance(value, tuple) and value[0] in ('', None): - # Despite the socket module docs, using '' does not - # allow AI_PASSIVE to work. Passing None instead - # returns '0.0.0.0' like we want. In other words: - # host AI_PASSIVE result - # '' Y 192.168.x.y - # '' N 192.168.x.y - # None Y 0.0.0.0 - # None N 127.0.0.1 - # But since you can get the same effect with an explicit - # '0.0.0.0', we deny both the empty string and None as values. - raise ValueError("Host values of '' or None are not allowed. " - "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " - "to listen on all active interfaces.") - self._bind_addr = value - bind_addr = property( - _get_bind_addr, - _set_bind_addr, - doc="""The interface on which to listen for connections. - - For TCP sockets, a (host, port) tuple. Host values may be any IPv4 - or IPv6 address, or any valid hostname. The string 'localhost' is a - synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). - The string '0.0.0.0' is a special IPv4 entry meaning "any active - interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for - IPv6. The empty string or None are not allowed. - - For UNIX sockets, supply the filename as a string.""") - - def start(self): - """Run the server forever.""" - # We don't have to trap KeyboardInterrupt or SystemExit here, - # because cherrpy.server already does so, calling self.stop() for us. - # If you're using this server with another framework, you should - # trap those exceptions in whatever code block calls start(). - self._interrupt = None - - if self.software is None: - self.software = "%s Server" % self.version - - # SSL backward compatibility - if (self.ssl_adapter is None and - getattr(self, 'ssl_certificate', None) and - getattr(self, 'ssl_private_key', None)): - warnings.warn( - "SSL attributes are deprecated in CherryPy 3.2, and will " - "be removed in CherryPy 3.3. Use an ssl_adapter attribute " - "instead.", - DeprecationWarning - ) - try: - from cherrypy.wsgiserver.ssl_pyopenssl import pyOpenSSLAdapter - except ImportError: - pass - else: - self.ssl_adapter = pyOpenSSLAdapter( - self.ssl_certificate, self.ssl_private_key, - getattr(self, 'ssl_certificate_chain', None)) - - # Select the appropriate socket - if isinstance(self.bind_addr, basestring): - # AF_UNIX socket - - # So we can reuse the socket... - try: - os.unlink(self.bind_addr) - except: - pass - - # So everyone can access the socket... - try: - os.chmod(self.bind_addr, 511) # 0777 - except: - pass - - info = [ - (socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] - else: - # AF_INET or AF_INET6 socket - # Get the correct address family for our host (allows IPv6 - # addresses) - host, port = self.bind_addr - try: - info = socket.getaddrinfo( - host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM, 0, socket.AI_PASSIVE) - except socket.gaierror: - if ':' in self.bind_addr[0]: - info = [(socket.AF_INET6, socket.SOCK_STREAM, - 0, "", self.bind_addr + (0, 0))] - else: - info = [(socket.AF_INET, socket.SOCK_STREAM, - 0, "", self.bind_addr)] - - self.socket = None - msg = "No socket could be created" - for res in info: - af, socktype, proto, canonname, sa = res - try: - self.bind(af, socktype, proto) - except socket.error, serr: - msg = "%s -- (%s: %s)" % (msg, sa, serr) - if self.socket: - self.socket.close() - self.socket = None - continue - break - if not self.socket: - raise socket.error(msg) - - # Timeout so KeyboardInterrupt can be caught on Win32 - self.socket.settimeout(1) - self.socket.listen(self.request_queue_size) - - # Create worker threads - self.requests.start() - - self.ready = True - self._start_time = time.time() - while self.ready: - try: - self.tick() - except (KeyboardInterrupt, SystemExit): - raise - except: - self.error_log("Error in HTTPServer.tick", level=logging.ERROR, - traceback=True) - - if self.interrupt: - while self.interrupt is True: - # Wait for self.stop() to complete. See _set_interrupt. - time.sleep(0.1) - if self.interrupt: - raise self.interrupt - - def error_log(self, msg="", level=20, traceback=False): - # Override this in subclasses as desired - sys.stderr.write(msg + '\n') - sys.stderr.flush() - if traceback: - tblines = format_exc() - sys.stderr.write(tblines) - sys.stderr.flush() - - def bind(self, family, type, proto=0): - """Create (or recreate) the actual socket object.""" - self.socket = socket.socket(family, type, proto) - prevent_socket_inheritance(self.socket) - self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - if self.nodelay and not isinstance(self.bind_addr, str): - self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - - if self.ssl_adapter is not None: - self.socket = self.ssl_adapter.bind(self.socket) - - # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), - # activate dual-stack. See - # https://bitbucket.org/cherrypy/cherrypy/issue/871. - if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6 - and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')): - try: - self.socket.setsockopt( - socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) - except (AttributeError, socket.error): - # Apparently, the socket option is not available in - # this machine's TCP stack - pass - - self.socket.bind(self.bind_addr) - - def tick(self): - """Accept a new connection and put it on the Queue.""" - try: - s, addr = self.socket.accept() - if self.stats['Enabled']: - self.stats['Accepts'] += 1 - if not self.ready: - return - - prevent_socket_inheritance(s) - if hasattr(s, 'settimeout'): - s.settimeout(self.timeout) - - makefile = CP_fileobject - ssl_env = {} - # if ssl cert and key are set, we try to be a secure HTTP server - if self.ssl_adapter is not None: - try: - s, ssl_env = self.ssl_adapter.wrap(s) - except NoSSLError: - msg = ("The client sent a plain HTTP request, but " - "this server only speaks HTTPS on this port.") - buf = ["%s 400 Bad Request\r\n" % self.protocol, - "Content-Length: %s\r\n" % len(msg), - "Content-Type: text/plain\r\n\r\n", - msg] - - wfile = makefile(s._sock, "wb", DEFAULT_BUFFER_SIZE) - try: - wfile.sendall("".join(buf)) - except socket.error: - x = sys.exc_info()[1] - if x.args[0] not in socket_errors_to_ignore: - raise - return - if not s: - return - makefile = self.ssl_adapter.makefile - # Re-apply our timeout since we may have a new socket object - if hasattr(s, 'settimeout'): - s.settimeout(self.timeout) - - conn = self.ConnectionClass(self, s, makefile) - - if not isinstance(self.bind_addr, basestring): - # optional values - # Until we do DNS lookups, omit REMOTE_HOST - if addr is None: # sometimes this can happen - # figure out if AF_INET or AF_INET6. - if len(s.getsockname()) == 2: - # AF_INET - addr = ('0.0.0.0', 0) - else: - # AF_INET6 - addr = ('::', 0) - conn.remote_addr = addr[0] - conn.remote_port = addr[1] - - conn.ssl_env = ssl_env - - try: - self.requests.put(conn) - except queue.Full: - # Just drop the conn. TODO: write 503 back? - conn.close() - return - except socket.timeout: - # The only reason for the timeout in start() is so we can - # notice keyboard interrupts on Win32, which don't interrupt - # accept() by default - return - except socket.error: - x = sys.exc_info()[1] - if self.stats['Enabled']: - self.stats['Socket Errors'] += 1 - if x.args[0] in socket_error_eintr: - # I *think* this is right. EINTR should occur when a signal - # is received during the accept() call; all docs say retry - # the call, and I *think* I'm reading it right that Python - # will then go ahead and poll for and handle the signal - # elsewhere. See - # https://bitbucket.org/cherrypy/cherrypy/issue/707. - return - if x.args[0] in socket_errors_nonblocking: - # Just try again. See - # https://bitbucket.org/cherrypy/cherrypy/issue/479. - return - if x.args[0] in socket_errors_to_ignore: - # Our socket was closed. - # See https://bitbucket.org/cherrypy/cherrypy/issue/686. - return - raise - - def _get_interrupt(self): - return self._interrupt - - def _set_interrupt(self, interrupt): - self._interrupt = True - self.stop() - self._interrupt = interrupt - interrupt = property(_get_interrupt, _set_interrupt, - doc="Set this to an Exception instance to " - "interrupt the server.") - - def stop(self): - """Gracefully shutdown a server that is serving forever.""" - self.ready = False - if self._start_time is not None: - self._run_time += (time.time() - self._start_time) - self._start_time = None - - sock = getattr(self, "socket", None) - if sock: - if not isinstance(self.bind_addr, basestring): - # Touch our own socket to make accept() return immediately. - try: - host, port = sock.getsockname()[:2] - except socket.error: - x = sys.exc_info()[1] - if x.args[0] not in socket_errors_to_ignore: - # Changed to use error code and not message - # See - # https://bitbucket.org/cherrypy/cherrypy/issue/860. - raise - else: - # Note that we're explicitly NOT using AI_PASSIVE, - # here, because we want an actual IP to touch. - # localhost won't work if we've bound to a public IP, - # but it will if we bound to '0.0.0.0' (INADDR_ANY). - for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - s = None - try: - s = socket.socket(af, socktype, proto) - # See - # http://groups.google.com/group/cherrypy-users/ - # browse_frm/thread/bbfe5eb39c904fe0 - s.settimeout(1.0) - s.connect((host, port)) - s.close() - except socket.error: - if s: - s.close() - if hasattr(sock, "close"): - sock.close() - self.socket = None - - self.requests.stop(self.shutdown_timeout) - - -class Gateway(object): - - """A base class to interface HTTPServer with other systems, such as WSGI. - """ - - def __init__(self, req): - self.req = req - - def respond(self): - """Process the current request. Must be overridden in a subclass.""" - raise NotImplemented - - -# These may either be wsgiserver.SSLAdapter subclasses or the string names -# of such classes (in which case they will be lazily loaded). -ssl_adapters = { - 'builtin': 'cherrypy.wsgiserver.ssl_builtin.BuiltinSSLAdapter', - 'pyopenssl': 'cherrypy.wsgiserver.ssl_pyopenssl.pyOpenSSLAdapter', -} - - -def get_ssl_adapter_class(name='pyopenssl'): - """Return an SSL adapter class for the given name.""" - adapter = ssl_adapters[name.lower()] - if isinstance(adapter, basestring): - last_dot = adapter.rfind(".") - attr_name = adapter[last_dot + 1:] - mod_path = adapter[:last_dot] - - try: - mod = sys.modules[mod_path] - if mod is None: - raise KeyError() - except KeyError: - # The last [''] is important. - mod = __import__(mod_path, globals(), locals(), ['']) - - # Let an AttributeError propagate outward. - try: - adapter = getattr(mod, attr_name) - except AttributeError: - raise AttributeError("'%s' object has no attribute '%s'" - % (mod_path, attr_name)) - - return adapter - -# ------------------------------- WSGI Stuff -------------------------------- # - - -class CherryPyWSGIServer(HTTPServer): - - """A subclass of HTTPServer which calls a WSGI application.""" - - wsgi_version = (1, 0) - """The version of WSGI to produce.""" - - def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, - max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5, - accepted_queue_size=-1, accepted_queue_timeout=10): - self.requests = ThreadPool(self, min=numthreads or 1, max=max, - accepted_queue_size=accepted_queue_size, - accepted_queue_timeout=accepted_queue_timeout) - self.wsgi_app = wsgi_app - self.gateway = wsgi_gateways[self.wsgi_version] - - self.bind_addr = bind_addr - if not server_name: - server_name = socket.gethostname() - self.server_name = server_name - self.request_queue_size = request_queue_size - - self.timeout = timeout - self.shutdown_timeout = shutdown_timeout - self.clear_stats() - - def _get_numthreads(self): - return self.requests.min - - def _set_numthreads(self, value): - self.requests.min = value - numthreads = property(_get_numthreads, _set_numthreads) - - -class WSGIGateway(Gateway): - - """A base class to interface HTTPServer with WSGI.""" - - def __init__(self, req): - self.req = req - self.started_response = False - self.env = self.get_environ() - self.remaining_bytes_out = None - - def get_environ(self): - """Return a new environ dict targeting the given wsgi.version""" - raise NotImplemented - - def respond(self): - """Process the current request.""" - response = self.req.server.wsgi_app(self.env, self.start_response) - try: - for chunk in response: - # "The start_response callable must not actually transmit - # the response headers. Instead, it must store them for the - # server or gateway to transmit only after the first - # iteration of the application return value that yields - # a NON-EMPTY string, or upon the application's first - # invocation of the write() callable." (PEP 333) - if chunk: - if isinstance(chunk, unicodestr): - chunk = chunk.encode('ISO-8859-1') - self.write(chunk) - finally: - if hasattr(response, "close"): - response.close() - - def start_response(self, status, headers, exc_info=None): - """WSGI callable to begin the HTTP response.""" - # "The application may call start_response more than once, - # if and only if the exc_info argument is provided." - if self.started_response and not exc_info: - raise AssertionError("WSGI start_response called a second " - "time with no exc_info.") - self.started_response = True - - # "if exc_info is provided, and the HTTP headers have already been - # sent, start_response must raise an error, and should raise the - # exc_info tuple." - if self.req.sent_headers: - try: - raise exc_info[0], exc_info[1], exc_info[2] - finally: - exc_info = None - - self.req.status = status - for k, v in headers: - if not isinstance(k, str): - raise TypeError( - "WSGI response header key %r is not of type str." % k) - if not isinstance(v, str): - raise TypeError( - "WSGI response header value %r is not of type str." % v) - if k.lower() == 'content-length': - self.remaining_bytes_out = int(v) - self.req.outheaders.extend(headers) - - return self.write - - def write(self, chunk): - """WSGI callable to write unbuffered data to the client. - - This method is also used internally by start_response (to write - data from the iterable returned by the WSGI application). - """ - if not self.started_response: - raise AssertionError("WSGI write called before start_response.") - - chunklen = len(chunk) - rbo = self.remaining_bytes_out - if rbo is not None and chunklen > rbo: - if not self.req.sent_headers: - # Whew. We can send a 500 to the client. - self.req.simple_response( - "500 Internal Server Error", - "The requested resource returned more bytes than the " - "declared Content-Length.") - else: - # Dang. We have probably already sent data. Truncate the chunk - # to fit (so the client doesn't hang) and raise an error later. - chunk = chunk[:rbo] - - if not self.req.sent_headers: - self.req.sent_headers = True - self.req.send_headers() - - self.req.write(chunk) - - if rbo is not None: - rbo -= chunklen - if rbo < 0: - raise ValueError( - "Response body exceeds the declared Content-Length.") - - -class WSGIGateway_10(WSGIGateway): - - """A Gateway class to interface HTTPServer with WSGI 1.0.x.""" - - def get_environ(self): - """Return a new environ dict targeting the given wsgi.version""" - req = self.req - env = { - # set a non-standard environ entry so the WSGI app can know what - # the *real* server protocol is (and what features to support). - # See http://www.faqs.org/rfcs/rfc2145.html. - 'ACTUAL_SERVER_PROTOCOL': req.server.protocol, - 'PATH_INFO': req.path, - 'QUERY_STRING': req.qs, - 'REMOTE_ADDR': req.conn.remote_addr or '', - 'REMOTE_PORT': str(req.conn.remote_port or ''), - 'REQUEST_METHOD': req.method, - 'REQUEST_URI': req.uri, - 'SCRIPT_NAME': '', - 'SERVER_NAME': req.server.server_name, - # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. - 'SERVER_PROTOCOL': req.request_protocol, - 'SERVER_SOFTWARE': req.server.software, - 'wsgi.errors': sys.stderr, - 'wsgi.input': req.rfile, - 'wsgi.multiprocess': False, - 'wsgi.multithread': True, - 'wsgi.run_once': False, - 'wsgi.url_scheme': req.scheme, - 'wsgi.version': (1, 0), - } - - if isinstance(req.server.bind_addr, basestring): - # AF_UNIX. This isn't really allowed by WSGI, which doesn't - # address unix domain sockets. But it's better than nothing. - env["SERVER_PORT"] = "" - else: - env["SERVER_PORT"] = str(req.server.bind_addr[1]) - - # Request headers - for k, v in req.inheaders.iteritems(): - env["HTTP_" + k.upper().replace("-", "_")] = v - - # CONTENT_TYPE/CONTENT_LENGTH - ct = env.pop("HTTP_CONTENT_TYPE", None) - if ct is not None: - env["CONTENT_TYPE"] = ct - cl = env.pop("HTTP_CONTENT_LENGTH", None) - if cl is not None: - env["CONTENT_LENGTH"] = cl - - if req.conn.ssl_env: - env.update(req.conn.ssl_env) - - return env - - -class WSGIGateway_u0(WSGIGateway_10): - - """A Gateway class to interface HTTPServer with WSGI u.0. - - WSGI u.0 is an experimental protocol, which uses unicode for keys and - values in both Python 2 and Python 3. - """ - - def get_environ(self): - """Return a new environ dict targeting the given wsgi.version""" - req = self.req - env_10 = WSGIGateway_10.get_environ(self) - env = dict([(k.decode('ISO-8859-1'), v) - for k, v in env_10.iteritems()]) - env[u'wsgi.version'] = ('u', 0) - - # Request-URI - env.setdefault(u'wsgi.url_encoding', u'utf-8') - try: - for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]: - env[key] = env_10[str(key)].decode(env[u'wsgi.url_encoding']) - except UnicodeDecodeError: - # Fall back to latin 1 so apps can transcode if needed. - env[u'wsgi.url_encoding'] = u'ISO-8859-1' - for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]: - env[key] = env_10[str(key)].decode(env[u'wsgi.url_encoding']) - - for k, v in sorted(env.items()): - if isinstance(v, str) and k not in ('REQUEST_URI', 'wsgi.input'): - env[k] = v.decode('ISO-8859-1') - - return env - -wsgi_gateways = { - (1, 0): WSGIGateway_10, - ('u', 0): WSGIGateway_u0, -} - - -class WSGIPathInfoDispatcher(object): - - """A WSGI dispatcher for dispatch based on the PATH_INFO. - - apps: a dict or list of (path_prefix, app) pairs. - """ - - def __init__(self, apps): - try: - apps = list(apps.items()) - except AttributeError: - pass - - # Sort the apps by len(path), descending - apps.sort(cmp=lambda x, y: cmp(len(x[0]), len(y[0]))) - apps.reverse() - - # The path_prefix strings must start, but not end, with a slash. - # Use "" instead of "/". - self.apps = [(p.rstrip("/"), a) for p, a in apps] - - def __call__(self, environ, start_response): - path = environ["PATH_INFO"] or "/" - for p, app in self.apps: - # The apps list should be sorted by length, descending. - if path.startswith(p + "/") or path == p: - environ = environ.copy() - environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p - environ["PATH_INFO"] = path[len(p):] - return app(environ, start_response) - - start_response('404 Not Found', [('Content-Type', 'text/plain'), - ('Content-Length', '0')]) - return [''] diff --git a/lib/cherrypy/wsgiserver/wsgiserver3.py b/lib/cherrypy/wsgiserver/wsgiserver3.py deleted file mode 100644 index 8bba6261..00000000 --- a/lib/cherrypy/wsgiserver/wsgiserver3.py +++ /dev/null @@ -1,2176 +0,0 @@ -"""A high-speed, production ready, thread pooled, generic HTTP server. - -Simplest example on how to use this module directly -(without using CherryPy's application machinery):: - - from cherrypy import wsgiserver - - def my_crazy_app(environ, start_response): - status = '200 OK' - response_headers = [('Content-type','text/plain')] - start_response(status, response_headers) - return ['Hello world!'] - - server = wsgiserver.CherryPyWSGIServer( - ('0.0.0.0', 8070), my_crazy_app, - server_name='www.cherrypy.example') - server.start() - -The CherryPy WSGI server can serve as many WSGI applications -as you want in one instance by using a WSGIPathInfoDispatcher:: - - d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) - server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) - -Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance. - -This won't call the CherryPy engine (application side) at all, only the -HTTP server, which is independent from the rest of CherryPy. Don't -let the name "CherryPyWSGIServer" throw you; the name merely reflects -its origin, not its coupling. - -For those of you wanting to understand internals of this module, here's the -basic call flow. The server's listening thread runs a very tight loop, -sticking incoming connections onto a Queue:: - - server = CherryPyWSGIServer(...) - server.start() - while True: - tick() - # This blocks until a request comes in: - child = socket.accept() - conn = HTTPConnection(child, ...) - server.requests.put(conn) - -Worker threads are kept in a pool and poll the Queue, popping off and then -handling each connection in turn. Each connection can consist of an arbitrary -number of requests and their responses, so we run a nested loop:: - - while True: - conn = server.requests.get() - conn.communicate() - -> while True: - req = HTTPRequest(...) - req.parse_request() - -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" - req.rfile.readline() - read_headers(req.rfile, req.inheaders) - req.respond() - -> response = app(...) - try: - for chunk in response: - if chunk: - req.write(chunk) - finally: - if hasattr(response, "close"): - response.close() - if req.close_connection: - return -""" - -__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', - 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', - 'CP_makefile', - 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', - 'WorkerThread', 'ThreadPool', 'SSLAdapter', - 'CherryPyWSGIServer', - 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', - 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] - -import os -try: - import queue -except: - import Queue as queue -import re -import email.utils -import socket -import sys -if 'win' in sys.platform and hasattr(socket, "AF_INET6"): - if not hasattr(socket, 'IPPROTO_IPV6'): - socket.IPPROTO_IPV6 = 41 - if not hasattr(socket, 'IPV6_V6ONLY'): - socket.IPV6_V6ONLY = 27 -if sys.version_info < (3, 1): - import io -else: - import _pyio as io -DEFAULT_BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE - -import threading -import time -from traceback import format_exc - -if sys.version_info >= (3, 0): - bytestr = bytes - unicodestr = str - basestring = (bytes, str) - - def ntob(n, encoding='ISO-8859-1'): - """Return the given native string as a byte string in the given - encoding. - """ - # In Python 3, the native string type is unicode - return n.encode(encoding) -else: - bytestr = str - unicodestr = unicode - basestring = basestring - - def ntob(n, encoding='ISO-8859-1'): - """Return the given native string as a byte string in the given - encoding. - """ - # In Python 2, the native string type is bytes. Assume it's already - # in the given encoding, which for ISO-8859-1 is almost always what - # was intended. - return n - -LF = ntob('\n') -CRLF = ntob('\r\n') -TAB = ntob('\t') -SPACE = ntob(' ') -COLON = ntob(':') -SEMICOLON = ntob(';') -EMPTY = ntob('') -NUMBER_SIGN = ntob('#') -QUESTION_MARK = ntob('?') -ASTERISK = ntob('*') -FORWARD_SLASH = ntob('/') -quoted_slash = re.compile(ntob("(?i)%2F")) - -import errno - - -def plat_specific_errors(*errnames): - """Return error numbers for all errors in errnames on this platform. - - The 'errno' module contains different global constants depending on - the specific platform (OS). This function will return the list of - numeric values for a given list of potential names. - """ - errno_names = dir(errno) - nums = [getattr(errno, k) for k in errnames if k in errno_names] - # de-dupe the list - return list(dict.fromkeys(nums).keys()) - -socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") - -socket_errors_to_ignore = plat_specific_errors( - "EPIPE", - "EBADF", "WSAEBADF", - "ENOTSOCK", "WSAENOTSOCK", - "ETIMEDOUT", "WSAETIMEDOUT", - "ECONNREFUSED", "WSAECONNREFUSED", - "ECONNRESET", "WSAECONNRESET", - "ECONNABORTED", "WSAECONNABORTED", - "ENETRESET", "WSAENETRESET", - "EHOSTDOWN", "EHOSTUNREACH", -) -socket_errors_to_ignore.append("timed out") -socket_errors_to_ignore.append("The read operation timed out") - -socket_errors_nonblocking = plat_specific_errors( - 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') - -comma_separated_headers = [ - ntob(h) for h in - ['Accept', 'Accept-Charset', 'Accept-Encoding', - 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', - 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', - 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', - 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', - 'WWW-Authenticate'] -] - - -import logging -if not hasattr(logging, 'statistics'): - logging.statistics = {} - - -def read_headers(rfile, hdict=None): - """Read headers from the given stream into the given header dict. - - If hdict is None, a new header dict is created. Returns the populated - header dict. - - Headers which are repeated are folded together using a comma if their - specification so dictates. - - This function raises ValueError when the read bytes violate the HTTP spec. - You should probably return "400 Bad Request" if this happens. - """ - if hdict is None: - hdict = {} - - while True: - line = rfile.readline() - if not line: - # No more data--illegal end of headers - raise ValueError("Illegal end of headers.") - - if line == CRLF: - # Normal end of headers - break - if not line.endswith(CRLF): - raise ValueError("HTTP requires CRLF terminators") - - if line[0] in (SPACE, TAB): - # It's a continuation line. - v = line.strip() - else: - try: - k, v = line.split(COLON, 1) - except ValueError: - raise ValueError("Illegal header line.") - # TODO: what about TE and WWW-Authenticate? - k = k.strip().title() - v = v.strip() - hname = k - - if k in comma_separated_headers: - existing = hdict.get(hname) - if existing: - v = b", ".join((existing, v)) - hdict[hname] = v - - return hdict - - -class MaxSizeExceeded(Exception): - pass - - -class SizeCheckWrapper(object): - - """Wraps a file-like object, raising MaxSizeExceeded if too large.""" - - def __init__(self, rfile, maxlen): - self.rfile = rfile - self.maxlen = maxlen - self.bytes_read = 0 - - def _check_length(self): - if self.maxlen and self.bytes_read > self.maxlen: - raise MaxSizeExceeded() - - def read(self, size=None): - data = self.rfile.read(size) - self.bytes_read += len(data) - self._check_length() - return data - - def readline(self, size=None): - if size is not None: - data = self.rfile.readline(size) - self.bytes_read += len(data) - self._check_length() - return data - - # User didn't specify a size ... - # We read the line in chunks to make sure it's not a 100MB line ! - res = [] - while True: - data = self.rfile.readline(256) - self.bytes_read += len(data) - self._check_length() - res.append(data) - # See https://bitbucket.org/cherrypy/cherrypy/issue/421 - if len(data) < 256 or data[-1:] == LF: - return EMPTY.join(res) - - def readlines(self, sizehint=0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline() - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline() - return lines - - def close(self): - self.rfile.close() - - def __iter__(self): - return self - - def __next__(self): - data = next(self.rfile) - self.bytes_read += len(data) - self._check_length() - return data - - def next(self): - data = self.rfile.next() - self.bytes_read += len(data) - self._check_length() - return data - - -class KnownLengthRFile(object): - - """Wraps a file-like object, returning an empty string when exhausted.""" - - def __init__(self, rfile, content_length): - self.rfile = rfile - self.remaining = content_length - - def read(self, size=None): - if self.remaining == 0: - return b'' - if size is None: - size = self.remaining - else: - size = min(size, self.remaining) - - data = self.rfile.read(size) - self.remaining -= len(data) - return data - - def readline(self, size=None): - if self.remaining == 0: - return b'' - if size is None: - size = self.remaining - else: - size = min(size, self.remaining) - - data = self.rfile.readline(size) - self.remaining -= len(data) - return data - - def readlines(self, sizehint=0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline(sizehint) - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline(sizehint) - return lines - - def close(self): - self.rfile.close() - - def __iter__(self): - return self - - def __next__(self): - data = next(self.rfile) - self.remaining -= len(data) - return data - - -class ChunkedRFile(object): - - """Wraps a file-like object, returning an empty string when exhausted. - - This class is intended to provide a conforming wsgi.input value for - request entities that have been encoded with the 'chunked' transfer - encoding. - """ - - def __init__(self, rfile, maxlen, bufsize=8192): - self.rfile = rfile - self.maxlen = maxlen - self.bytes_read = 0 - self.buffer = EMPTY - self.bufsize = bufsize - self.closed = False - - def _fetch(self): - if self.closed: - return - - line = self.rfile.readline() - self.bytes_read += len(line) - - if self.maxlen and self.bytes_read > self.maxlen: - raise MaxSizeExceeded("Request Entity Too Large", self.maxlen) - - line = line.strip().split(SEMICOLON, 1) - - try: - chunk_size = line.pop(0) - chunk_size = int(chunk_size, 16) - except ValueError: - raise ValueError("Bad chunked transfer size: " + repr(chunk_size)) - - if chunk_size <= 0: - self.closed = True - return - -## if line: chunk_extension = line[0] - - if self.maxlen and self.bytes_read + chunk_size > self.maxlen: - raise IOError("Request Entity Too Large") - - chunk = self.rfile.read(chunk_size) - self.bytes_read += len(chunk) - self.buffer += chunk - - crlf = self.rfile.read(2) - if crlf != CRLF: - raise ValueError( - "Bad chunked transfer coding (expected '\\r\\n', " - "got " + repr(crlf) + ")") - - def read(self, size=None): - data = EMPTY - while True: - if size and len(data) >= size: - return data - - if not self.buffer: - self._fetch() - if not self.buffer: - # EOF - return data - - if size: - remaining = size - len(data) - data += self.buffer[:remaining] - self.buffer = self.buffer[remaining:] - else: - data += self.buffer - - def readline(self, size=None): - data = EMPTY - while True: - if size and len(data) >= size: - return data - - if not self.buffer: - self._fetch() - if not self.buffer: - # EOF - return data - - newline_pos = self.buffer.find(LF) - if size: - if newline_pos == -1: - remaining = size - len(data) - data += self.buffer[:remaining] - self.buffer = self.buffer[remaining:] - else: - remaining = min(size - len(data), newline_pos) - data += self.buffer[:remaining] - self.buffer = self.buffer[remaining:] - else: - if newline_pos == -1: - data += self.buffer - else: - data += self.buffer[:newline_pos] - self.buffer = self.buffer[newline_pos:] - - def readlines(self, sizehint=0): - # Shamelessly stolen from StringIO - total = 0 - lines = [] - line = self.readline(sizehint) - while line: - lines.append(line) - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline(sizehint) - return lines - - def read_trailer_lines(self): - if not self.closed: - raise ValueError( - "Cannot read trailers until the request body has been read.") - - while True: - line = self.rfile.readline() - if not line: - # No more data--illegal end of headers - raise ValueError("Illegal end of headers.") - - self.bytes_read += len(line) - if self.maxlen and self.bytes_read > self.maxlen: - raise IOError("Request Entity Too Large") - - if line == CRLF: - # Normal end of headers - break - if not line.endswith(CRLF): - raise ValueError("HTTP requires CRLF terminators") - - yield line - - def close(self): - self.rfile.close() - - def __iter__(self): - # Shamelessly stolen from StringIO - total = 0 - line = self.readline(sizehint) - while line: - yield line - total += len(line) - if 0 < sizehint <= total: - break - line = self.readline(sizehint) - - -class HTTPRequest(object): - - """An HTTP Request (and response). - - A single HTTP connection may consist of multiple request/response pairs. - """ - - server = None - """The HTTPServer object which is receiving this request.""" - - conn = None - """The HTTPConnection object on which this request connected.""" - - inheaders = {} - """A dict of request headers.""" - - outheaders = [] - """A list of header tuples to write in the response.""" - - ready = False - """When True, the request has been parsed and is ready to begin generating - the response. When False, signals the calling Connection that the response - should not be generated and the connection should close.""" - - close_connection = False - """Signals the calling Connection that the request should close. This does - not imply an error! The client and/or server may each request that the - connection be closed.""" - - chunked_write = False - """If True, output will be encoded with the "chunked" transfer-coding. - - This value is set automatically inside send_headers.""" - - def __init__(self, server, conn): - self.server = server - self.conn = conn - - self.ready = False - self.started_request = False - self.scheme = ntob("http") - if self.server.ssl_adapter is not None: - self.scheme = ntob("https") - # Use the lowest-common protocol in case read_request_line errors. - self.response_protocol = 'HTTP/1.0' - self.inheaders = {} - - self.status = "" - self.outheaders = [] - self.sent_headers = False - self.close_connection = self.__class__.close_connection - self.chunked_read = False - self.chunked_write = self.__class__.chunked_write - - def parse_request(self): - """Parse the next HTTP request start-line and message-headers.""" - self.rfile = SizeCheckWrapper(self.conn.rfile, - self.server.max_request_header_size) - try: - success = self.read_request_line() - except MaxSizeExceeded: - self.simple_response( - "414 Request-URI Too Long", - "The Request-URI sent with the request exceeds the maximum " - "allowed bytes.") - return - else: - if not success: - return - - try: - success = self.read_request_headers() - except MaxSizeExceeded: - self.simple_response( - "413 Request Entity Too Large", - "The headers sent with the request exceed the maximum " - "allowed bytes.") - return - else: - if not success: - return - - self.ready = True - - def read_request_line(self): - # HTTP/1.1 connections are persistent by default. If a client - # requests a page, then idles (leaves the connection open), - # then rfile.readline() will raise socket.error("timed out"). - # Note that it does this based on the value given to settimeout(), - # and doesn't need the client to request or acknowledge the close - # (although your TCP stack might suffer for it: cf Apache's history - # with FIN_WAIT_2). - request_line = self.rfile.readline() - - # Set started_request to True so communicate() knows to send 408 - # from here on out. - self.started_request = True - if not request_line: - return False - - if request_line == CRLF: - # RFC 2616 sec 4.1: "...if the server is reading the protocol - # stream at the beginning of a message and receives a CRLF - # first, it should ignore the CRLF." - # But only ignore one leading line! else we enable a DoS. - request_line = self.rfile.readline() - if not request_line: - return False - - if not request_line.endswith(CRLF): - self.simple_response( - "400 Bad Request", "HTTP requires CRLF terminators") - return False - - try: - method, uri, req_protocol = request_line.strip().split(SPACE, 2) - # The [x:y] slicing is necessary for byte strings to avoid getting - # ord's - rp = int(req_protocol[5:6]), int(req_protocol[7:8]) - except ValueError: - self.simple_response("400 Bad Request", "Malformed Request-Line") - return False - - self.uri = uri - self.method = method - - # uri may be an abs_path (including "http://host.domain.tld"); - scheme, authority, path = self.parse_request_uri(uri) - if NUMBER_SIGN in path: - self.simple_response("400 Bad Request", - "Illegal #fragment in Request-URI.") - return False - - if scheme: - self.scheme = scheme - - qs = EMPTY - if QUESTION_MARK in path: - path, qs = path.split(QUESTION_MARK, 1) - - # Unquote the path+params (e.g. "/this%20path" -> "/this path"). - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 - # - # But note that "...a URI must be separated into its components - # before the escaped characters within those components can be - # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 - # Therefore, "/this%2Fpath" becomes "/this%2Fpath", not "/this/path". - try: - atoms = [self.unquote_bytes(x) for x in quoted_slash.split(path)] - except ValueError: - ex = sys.exc_info()[1] - self.simple_response("400 Bad Request", ex.args[0]) - return False - path = b"%2F".join(atoms) - self.path = path - - # Note that, like wsgiref and most other HTTP servers, - # we "% HEX HEX"-unquote the path but not the query string. - self.qs = qs - - # Compare request and server HTTP protocol versions, in case our - # server does not support the requested protocol. Limit our output - # to min(req, server). We want the following output: - # request server actual written supported response - # protocol protocol response protocol feature set - # a 1.0 1.0 1.0 1.0 - # b 1.0 1.1 1.1 1.0 - # c 1.1 1.0 1.0 1.0 - # d 1.1 1.1 1.1 1.1 - # Notice that, in (b), the response will be "HTTP/1.1" even though - # the client only understands 1.0. RFC 2616 10.5.6 says we should - # only return 505 if the _major_ version is different. - # The [x:y] slicing is necessary for byte strings to avoid getting - # ord's - sp = int(self.server.protocol[5:6]), int(self.server.protocol[7:8]) - - if sp[0] != rp[0]: - self.simple_response("505 HTTP Version Not Supported") - return False - - self.request_protocol = req_protocol - self.response_protocol = "HTTP/%s.%s" % min(rp, sp) - return True - - def read_request_headers(self): - """Read self.rfile into self.inheaders. Return success.""" - - # then all the http headers - try: - read_headers(self.rfile, self.inheaders) - except ValueError: - ex = sys.exc_info()[1] - self.simple_response("400 Bad Request", ex.args[0]) - return False - - mrbs = self.server.max_request_body_size - if mrbs and int(self.inheaders.get(b"Content-Length", 0)) > mrbs: - self.simple_response( - "413 Request Entity Too Large", - "The entity sent with the request exceeds the maximum " - "allowed bytes.") - return False - - # Persistent connection support - if self.response_protocol == "HTTP/1.1": - # Both server and client are HTTP/1.1 - if self.inheaders.get(b"Connection", b"") == b"close": - self.close_connection = True - else: - # Either the server or client (or both) are HTTP/1.0 - if self.inheaders.get(b"Connection", b"") != b"Keep-Alive": - self.close_connection = True - - # Transfer-Encoding support - te = None - if self.response_protocol == "HTTP/1.1": - te = self.inheaders.get(b"Transfer-Encoding") - if te: - te = [x.strip().lower() for x in te.split(b",") if x.strip()] - - self.chunked_read = False - - if te: - for enc in te: - if enc == b"chunked": - self.chunked_read = True - else: - # Note that, even if we see "chunked", we must reject - # if there is an extension we don't recognize. - self.simple_response("501 Unimplemented") - self.close_connection = True - return False - - # From PEP 333: - # "Servers and gateways that implement HTTP 1.1 must provide - # transparent support for HTTP 1.1's "expect/continue" mechanism. - # This may be done in any of several ways: - # 1. Respond to requests containing an Expect: 100-continue request - # with an immediate "100 Continue" response, and proceed normally. - # 2. Proceed with the request normally, but provide the application - # with a wsgi.input stream that will send the "100 Continue" - # response if/when the application first attempts to read from - # the input stream. The read request must then remain blocked - # until the client responds. - # 3. Wait until the client decides that the server does not support - # expect/continue, and sends the request body on its own. - # (This is suboptimal, and is not recommended.) - # - # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, - # but it seems like it would be a big slowdown for such a rare case. - if self.inheaders.get(b"Expect", b"") == b"100-continue": - # Don't use simple_response here, because it emits headers - # we don't want. See - # https://bitbucket.org/cherrypy/cherrypy/issue/951 - msg = self.server.protocol.encode( - 'ascii') + b" 100 Continue\r\n\r\n" - try: - self.conn.wfile.write(msg) - except socket.error: - x = sys.exc_info()[1] - if x.args[0] not in socket_errors_to_ignore: - raise - return True - - def parse_request_uri(self, uri): - """Parse a Request-URI into (scheme, authority, path). - - Note that Request-URI's must be one of:: - - Request-URI = "*" | absoluteURI | abs_path | authority - - Therefore, a Request-URI which starts with a double forward-slash - cannot be a "net_path":: - - net_path = "//" authority [ abs_path ] - - Instead, it must be interpreted as an "abs_path" with an empty first - path segment:: - - abs_path = "/" path_segments - path_segments = segment *( "/" segment ) - segment = *pchar *( ";" param ) - param = *pchar - """ - if uri == ASTERISK: - return None, None, uri - - scheme, sep, remainder = uri.partition(b'://') - if sep and QUESTION_MARK not in scheme: - # An absoluteURI. - # If there's a scheme (and it must be http or https), then: - # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query - # ]] - authority, path_a, path_b = remainder.partition(FORWARD_SLASH) - return scheme.lower(), authority, path_a + path_b - - if uri.startswith(FORWARD_SLASH): - # An abs_path. - return None, None, uri - else: - # An authority. - return None, uri, None - - def unquote_bytes(self, path): - """takes quoted string and unquotes % encoded values""" - res = path.split(b'%') - - for i in range(1, len(res)): - item = res[i] - try: - res[i] = bytes([int(item[:2], 16)]) + item[2:] - except ValueError: - raise - return b''.join(res) - - def respond(self): - """Call the gateway and write its iterable output.""" - mrbs = self.server.max_request_body_size - if self.chunked_read: - self.rfile = ChunkedRFile(self.conn.rfile, mrbs) - else: - cl = int(self.inheaders.get(b"Content-Length", 0)) - if mrbs and mrbs < cl: - if not self.sent_headers: - self.simple_response( - "413 Request Entity Too Large", - "The entity sent with the request exceeds the " - "maximum allowed bytes.") - return - self.rfile = KnownLengthRFile(self.conn.rfile, cl) - - self.server.gateway(self).respond() - - if (self.ready and not self.sent_headers): - self.sent_headers = True - self.send_headers() - if self.chunked_write: - self.conn.wfile.write(b"0\r\n\r\n") - - def simple_response(self, status, msg=""): - """Write a simple response back to the client.""" - status = str(status) - buf = [bytes(self.server.protocol, "ascii") + SPACE + - bytes(status, "ISO-8859-1") + CRLF, - bytes("Content-Length: %s\r\n" % len(msg), "ISO-8859-1"), - b"Content-Type: text/plain\r\n"] - - if status[:3] in ("413", "414"): - # Request Entity Too Large / Request-URI Too Long - self.close_connection = True - if self.response_protocol == 'HTTP/1.1': - # This will not be true for 414, since read_request_line - # usually raises 414 before reading the whole line, and we - # therefore cannot know the proper response_protocol. - buf.append(b"Connection: close\r\n") - else: - # HTTP/1.0 had no 413/414 status nor Connection header. - # Emit 400 instead and trust the message body is enough. - status = "400 Bad Request" - - buf.append(CRLF) - if msg: - if isinstance(msg, unicodestr): - msg = msg.encode("ISO-8859-1") - buf.append(msg) - - try: - self.conn.wfile.write(b"".join(buf)) - except socket.error: - x = sys.exc_info()[1] - if x.args[0] not in socket_errors_to_ignore: - raise - - def write(self, chunk): - """Write unbuffered data to the client.""" - if self.chunked_write and chunk: - buf = [bytes(hex(len(chunk)), 'ASCII')[2:], CRLF, chunk, CRLF] - self.conn.wfile.write(EMPTY.join(buf)) - else: - self.conn.wfile.write(chunk) - - def send_headers(self): - """Assert, process, and send the HTTP response message-headers. - - You must set self.status, and self.outheaders before calling this. - """ - hkeys = [key.lower() for key, value in self.outheaders] - status = int(self.status[:3]) - - if status == 413: - # Request Entity Too Large. Close conn to avoid garbage. - self.close_connection = True - elif b"content-length" not in hkeys: - # "All 1xx (informational), 204 (no content), - # and 304 (not modified) responses MUST NOT - # include a message-body." So no point chunking. - if status < 200 or status in (204, 205, 304): - pass - else: - if (self.response_protocol == 'HTTP/1.1' - and self.method != b'HEAD'): - # Use the chunked transfer-coding - self.chunked_write = True - self.outheaders.append((b"Transfer-Encoding", b"chunked")) - else: - # Closing the conn is the only way to determine len. - self.close_connection = True - - if b"connection" not in hkeys: - if self.response_protocol == 'HTTP/1.1': - # Both server and client are HTTP/1.1 or better - if self.close_connection: - self.outheaders.append((b"Connection", b"close")) - else: - # Server and/or client are HTTP/1.0 - if not self.close_connection: - self.outheaders.append((b"Connection", b"Keep-Alive")) - - if (not self.close_connection) and (not self.chunked_read): - # Read any remaining request body data on the socket. - # "If an origin server receives a request that does not include an - # Expect request-header field with the "100-continue" expectation, - # the request includes a request body, and the server responds - # with a final status code before reading the entire request body - # from the transport connection, then the server SHOULD NOT close - # the transport connection until it has read the entire request, - # or until the client closes the connection. Otherwise, the client - # might not reliably receive the response message. However, this - # requirement is not be construed as preventing a server from - # defending itself against denial-of-service attacks, or from - # badly broken client implementations." - remaining = getattr(self.rfile, 'remaining', 0) - if remaining > 0: - self.rfile.read(remaining) - - if b"date" not in hkeys: - self.outheaders.append(( - b"Date", - email.utils.formatdate(usegmt=True).encode('ISO-8859-1') - )) - - if b"server" not in hkeys: - self.outheaders.append( - (b"Server", self.server.server_name.encode('ISO-8859-1'))) - - buf = [self.server.protocol.encode( - 'ascii') + SPACE + self.status + CRLF] - for k, v in self.outheaders: - buf.append(k + COLON + SPACE + v + CRLF) - buf.append(CRLF) - self.conn.wfile.write(EMPTY.join(buf)) - - -class NoSSLError(Exception): - - """Exception raised when a client speaks HTTP to an HTTPS socket.""" - pass - - -class FatalSSLAlert(Exception): - - """Exception raised when the SSL implementation signals a fatal alert.""" - pass - - -class CP_BufferedWriter(io.BufferedWriter): - - """Faux file object attached to a socket object.""" - - def write(self, b): - self._checkClosed() - if isinstance(b, str): - raise TypeError("can't write str to binary stream") - - with self._write_lock: - self._write_buf.extend(b) - self._flush_unlocked() - return len(b) - - def _flush_unlocked(self): - self._checkClosed("flush of closed file") - while self._write_buf: - try: - # ssl sockets only except 'bytes', not bytearrays - # so perhaps we should conditionally wrap this for perf? - n = self.raw.write(bytes(self._write_buf)) - except io.BlockingIOError as e: - n = e.characters_written - del self._write_buf[:n] - - -def CP_makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): - if 'r' in mode: - return io.BufferedReader(socket.SocketIO(sock, mode), bufsize) - else: - return CP_BufferedWriter(socket.SocketIO(sock, mode), bufsize) - - -class HTTPConnection(object): - - """An HTTP connection (active socket). - - server: the Server object which received this connection. - socket: the raw socket object (usually TCP) for this connection. - makefile: a fileobject class for reading from the socket. - """ - - remote_addr = None - remote_port = None - ssl_env = None - rbufsize = DEFAULT_BUFFER_SIZE - wbufsize = DEFAULT_BUFFER_SIZE - RequestHandlerClass = HTTPRequest - - def __init__(self, server, sock, makefile=CP_makefile): - self.server = server - self.socket = sock - self.rfile = makefile(sock, "rb", self.rbufsize) - self.wfile = makefile(sock, "wb", self.wbufsize) - self.requests_seen = 0 - - def communicate(self): - """Read each request and respond appropriately.""" - request_seen = False - try: - while True: - # (re)set req to None so that if something goes wrong in - # the RequestHandlerClass constructor, the error doesn't - # get written to the previous request. - req = None - req = self.RequestHandlerClass(self.server, self) - - # This order of operations should guarantee correct pipelining. - req.parse_request() - if self.server.stats['Enabled']: - self.requests_seen += 1 - if not req.ready: - # Something went wrong in the parsing (and the server has - # probably already made a simple_response). Return and - # let the conn close. - return - - request_seen = True - req.respond() - if req.close_connection: - return - except socket.error: - e = sys.exc_info()[1] - errnum = e.args[0] - # sadly SSL sockets return a different (longer) time out string - if ( - errnum == 'timed out' or - errnum == 'The read operation timed out' - ): - # Don't error if we're between requests; only error - # if 1) no request has been started at all, or 2) we're - # in the middle of a request. - # See https://bitbucket.org/cherrypy/cherrypy/issue/853 - if (not request_seen) or (req and req.started_request): - # Don't bother writing the 408 if the response - # has already started being written. - if req and not req.sent_headers: - try: - req.simple_response("408 Request Timeout") - except FatalSSLAlert: - # Close the connection. - return - elif errnum not in socket_errors_to_ignore: - self.server.error_log("socket.error %s" % repr(errnum), - level=logging.WARNING, traceback=True) - if req and not req.sent_headers: - try: - req.simple_response("500 Internal Server Error") - except FatalSSLAlert: - # Close the connection. - return - return - except (KeyboardInterrupt, SystemExit): - raise - except FatalSSLAlert: - # Close the connection. - return - except NoSSLError: - if req and not req.sent_headers: - # Unwrap our wfile - self.wfile = CP_makefile( - self.socket._sock, "wb", self.wbufsize) - req.simple_response( - "400 Bad Request", - "The client sent a plain HTTP request, but this server " - "only speaks HTTPS on this port.") - self.linger = True - except Exception: - e = sys.exc_info()[1] - self.server.error_log(repr(e), level=logging.ERROR, traceback=True) - if req and not req.sent_headers: - try: - req.simple_response("500 Internal Server Error") - except FatalSSLAlert: - # Close the connection. - return - - linger = False - - def close(self): - """Close the socket underlying this connection.""" - self.rfile.close() - - if not self.linger: - # Python's socket module does NOT call close on the kernel - # socket when you call socket.close(). We do so manually here - # because we want this server to send a FIN TCP segment - # immediately. Note this must be called *before* calling - # socket.close(), because the latter drops its reference to - # the kernel socket. - # Python 3 *probably* fixed this with socket._real_close; - # hard to tell. -# self.socket._sock.close() - self.socket.close() - else: - # On the other hand, sometimes we want to hang around for a bit - # to make sure the client has a chance to read our entire - # response. Skipping the close() calls here delays the FIN - # packet until the socket object is garbage-collected later. - # Someday, perhaps, we'll do the full lingering_close that - # Apache does, but not today. - pass - - -class TrueyZero(object): - - """An object which equals and does math like the integer 0 but evals True. - """ - - def __add__(self, other): - return other - - def __radd__(self, other): - return other -trueyzero = TrueyZero() - - -_SHUTDOWNREQUEST = None - - -class WorkerThread(threading.Thread): - - """Thread which continuously polls a Queue for Connection objects. - - Due to the timing issues of polling a Queue, a WorkerThread does not - check its own 'ready' flag after it has started. To stop the thread, - it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue - (one for each running WorkerThread). - """ - - conn = None - """The current connection pulled off the Queue, or None.""" - - server = None - """The HTTP Server which spawned this thread, and which owns the - Queue and is placing active connections into it.""" - - ready = False - """A simple flag for the calling server to know when this thread - has begun polling the Queue.""" - - def __init__(self, server): - self.ready = False - self.server = server - - self.requests_seen = 0 - self.bytes_read = 0 - self.bytes_written = 0 - self.start_time = None - self.work_time = 0 - self.stats = { - 'Requests': lambda s: self.requests_seen + ( - (self.start_time is None) and - trueyzero or - self.conn.requests_seen - ), - 'Bytes Read': lambda s: self.bytes_read + ( - (self.start_time is None) and - trueyzero or - self.conn.rfile.bytes_read - ), - 'Bytes Written': lambda s: self.bytes_written + ( - (self.start_time is None) and - trueyzero or - self.conn.wfile.bytes_written - ), - 'Work Time': lambda s: self.work_time + ( - (self.start_time is None) and - trueyzero or - time.time() - self.start_time - ), - 'Read Throughput': lambda s: s['Bytes Read'](s) / ( - s['Work Time'](s) or 1e-6), - 'Write Throughput': lambda s: s['Bytes Written'](s) / ( - s['Work Time'](s) or 1e-6), - } - threading.Thread.__init__(self) - - def run(self): - self.server.stats['Worker Threads'][self.getName()] = self.stats - try: - self.ready = True - while True: - conn = self.server.requests.get() - if conn is _SHUTDOWNREQUEST: - return - - self.conn = conn - if self.server.stats['Enabled']: - self.start_time = time.time() - try: - conn.communicate() - finally: - conn.close() - if self.server.stats['Enabled']: - self.requests_seen += self.conn.requests_seen - self.bytes_read += self.conn.rfile.bytes_read - self.bytes_written += self.conn.wfile.bytes_written - self.work_time += time.time() - self.start_time - self.start_time = None - self.conn = None - except (KeyboardInterrupt, SystemExit): - exc = sys.exc_info()[1] - self.server.interrupt = exc - - -class ThreadPool(object): - - """A Request Queue for an HTTPServer which pools threads. - - ThreadPool objects must provide min, get(), put(obj), start() - and stop(timeout) attributes. - """ - - def __init__(self, server, min=10, max=-1, - accepted_queue_size=-1, accepted_queue_timeout=10): - self.server = server - self.min = min - self.max = max - self._threads = [] - self._queue = queue.Queue(maxsize=accepted_queue_size) - self._queue_put_timeout = accepted_queue_timeout - self.get = self._queue.get - - def start(self): - """Start the pool of threads.""" - for i in range(self.min): - self._threads.append(WorkerThread(self.server)) - for worker in self._threads: - worker.setName("CP Server " + worker.getName()) - worker.start() - for worker in self._threads: - while not worker.ready: - time.sleep(.1) - - def _get_idle(self): - """Number of worker threads which are idle. Read-only.""" - return len([t for t in self._threads if t.conn is None]) - idle = property(_get_idle, doc=_get_idle.__doc__) - - def put(self, obj): - self._queue.put(obj, block=True, timeout=self._queue_put_timeout) - if obj is _SHUTDOWNREQUEST: - return - - def grow(self, amount): - """Spawn new worker threads (not above self.max).""" - if self.max > 0: - budget = max(self.max - len(self._threads), 0) - else: - # self.max <= 0 indicates no maximum - budget = float('inf') - - n_new = min(amount, budget) - - workers = [self._spawn_worker() for i in range(n_new)] - while not all(worker.ready for worker in workers): - time.sleep(.1) - self._threads.extend(workers) - - def _spawn_worker(self): - worker = WorkerThread(self.server) - worker.setName("CP Server " + worker.getName()) - worker.start() - return worker - - def shrink(self, amount): - """Kill off worker threads (not below self.min).""" - # Grow/shrink the pool if necessary. - # Remove any dead threads from our list - for t in self._threads: - if not t.isAlive(): - self._threads.remove(t) - amount -= 1 - - # calculate the number of threads above the minimum - n_extra = max(len(self._threads) - self.min, 0) - - # don't remove more than amount - n_to_remove = min(amount, n_extra) - - # put shutdown requests on the queue equal to the number of threads - # to remove. As each request is processed by a worker, that worker - # will terminate and be culled from the list. - for n in range(n_to_remove): - self._queue.put(_SHUTDOWNREQUEST) - - def stop(self, timeout=5): - # Must shut down threads here so the code that calls - # this method can know when all threads are stopped. - for worker in self._threads: - self._queue.put(_SHUTDOWNREQUEST) - - # Don't join currentThread (when stop is called inside a request). - current = threading.currentThread() - if timeout and timeout >= 0: - endtime = time.time() + timeout - while self._threads: - worker = self._threads.pop() - if worker is not current and worker.isAlive(): - try: - if timeout is None or timeout < 0: - worker.join() - else: - remaining_time = endtime - time.time() - if remaining_time > 0: - worker.join(remaining_time) - if worker.isAlive(): - # We exhausted the timeout. - # Forcibly shut down the socket. - c = worker.conn - if c and not c.rfile.closed: - try: - c.socket.shutdown(socket.SHUT_RD) - except TypeError: - # pyOpenSSL sockets don't take an arg - c.socket.shutdown() - worker.join() - except (AssertionError, - # Ignore repeated Ctrl-C. - # See - # https://bitbucket.org/cherrypy/cherrypy/issue/691. - KeyboardInterrupt): - pass - - def _get_qsize(self): - return self._queue.qsize() - qsize = property(_get_qsize) - - -try: - import fcntl -except ImportError: - try: - from ctypes import windll, WinError - import ctypes.wintypes - _SetHandleInformation = windll.kernel32.SetHandleInformation - _SetHandleInformation.argtypes = [ - ctypes.wintypes.HANDLE, - ctypes.wintypes.DWORD, - ctypes.wintypes.DWORD, - ] - _SetHandleInformation.restype = ctypes.wintypes.BOOL - except ImportError: - def prevent_socket_inheritance(sock): - """Dummy function, since neither fcntl nor ctypes are available.""" - pass - else: - def prevent_socket_inheritance(sock): - """Mark the given socket fd as non-inheritable (Windows).""" - if not _SetHandleInformation(sock.fileno(), 1, 0): - raise WinError() -else: - def prevent_socket_inheritance(sock): - """Mark the given socket fd as non-inheritable (POSIX).""" - fd = sock.fileno() - old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) - fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) - - -class SSLAdapter(object): - - """Base class for SSL driver library adapters. - - Required methods: - - * ``wrap(sock) -> (wrapped socket, ssl environ dict)`` - * ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) -> - socket file object`` - """ - - def __init__(self, certificate, private_key, certificate_chain=None): - self.certificate = certificate - self.private_key = private_key - self.certificate_chain = certificate_chain - - def wrap(self, sock): - raise NotImplemented - - def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): - raise NotImplemented - - -class HTTPServer(object): - - """An HTTP server.""" - - _bind_addr = "127.0.0.1" - _interrupt = None - - gateway = None - """A Gateway instance.""" - - minthreads = None - """The minimum number of worker threads to create (default 10).""" - - maxthreads = None - """The maximum number of worker threads to create (default -1 = no limit). - """ - - server_name = None - """The name of the server; defaults to socket.gethostname().""" - - protocol = "HTTP/1.1" - """The version string to write in the Status-Line of all HTTP responses. - - For example, "HTTP/1.1" is the default. This also limits the supported - features used in the response.""" - - request_queue_size = 5 - """The 'backlog' arg to socket.listen(); max queued connections - (default 5). - """ - - shutdown_timeout = 5 - """The total time, in seconds, to wait for worker threads to cleanly exit. - """ - - timeout = 10 - """The timeout in seconds for accepted connections (default 10).""" - - version = "CherryPy/3.6.0" - """A version string for the HTTPServer.""" - - software = None - """The value to set for the SERVER_SOFTWARE entry in the WSGI environ. - - If None, this defaults to ``'%s Server' % self.version``.""" - - ready = False - """An internal flag which marks whether the socket is accepting - connections. - """ - - max_request_header_size = 0 - """The maximum size, in bytes, for request headers, or 0 for no limit.""" - - max_request_body_size = 0 - """The maximum size, in bytes, for request bodies, or 0 for no limit.""" - - nodelay = True - """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" - - ConnectionClass = HTTPConnection - """The class to use for handling HTTP connections.""" - - ssl_adapter = None - """An instance of SSLAdapter (or a subclass). - - You must have the corresponding SSL driver library installed.""" - - def __init__(self, bind_addr, gateway, minthreads=10, maxthreads=-1, - server_name=None): - self.bind_addr = bind_addr - self.gateway = gateway - - self.requests = ThreadPool(self, min=minthreads or 1, max=maxthreads) - - if not server_name: - server_name = socket.gethostname() - self.server_name = server_name - self.clear_stats() - - def clear_stats(self): - self._start_time = None - self._run_time = 0 - self.stats = { - 'Enabled': False, - 'Bind Address': lambda s: repr(self.bind_addr), - 'Run time': lambda s: (not s['Enabled']) and -1 or self.runtime(), - 'Accepts': 0, - 'Accepts/sec': lambda s: s['Accepts'] / self.runtime(), - 'Queue': lambda s: getattr(self.requests, "qsize", None), - 'Threads': lambda s: len(getattr(self.requests, "_threads", [])), - 'Threads Idle': lambda s: getattr(self.requests, "idle", None), - 'Socket Errors': 0, - 'Requests': lambda s: (not s['Enabled']) and -1 or sum( - [w['Requests'](w) for w in s['Worker Threads'].values()], 0), - 'Bytes Read': lambda s: (not s['Enabled']) and -1 or sum( - [w['Bytes Read'](w) for w in s['Worker Threads'].values()], 0), - 'Bytes Written': lambda s: (not s['Enabled']) and -1 or sum( - [w['Bytes Written'](w) for w in s['Worker Threads'].values()], - 0), - 'Work Time': lambda s: (not s['Enabled']) and -1 or sum( - [w['Work Time'](w) for w in s['Worker Threads'].values()], 0), - 'Read Throughput': lambda s: (not s['Enabled']) and -1 or sum( - [w['Bytes Read'](w) / (w['Work Time'](w) or 1e-6) - for w in s['Worker Threads'].values()], 0), - 'Write Throughput': lambda s: (not s['Enabled']) and -1 or sum( - [w['Bytes Written'](w) / (w['Work Time'](w) or 1e-6) - for w in s['Worker Threads'].values()], 0), - 'Worker Threads': {}, - } - logging.statistics["CherryPy HTTPServer %d" % id(self)] = self.stats - - def runtime(self): - if self._start_time is None: - return self._run_time - else: - return self._run_time + (time.time() - self._start_time) - - def __str__(self): - return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, - self.bind_addr) - - def _get_bind_addr(self): - return self._bind_addr - - def _set_bind_addr(self, value): - if isinstance(value, tuple) and value[0] in ('', None): - # Despite the socket module docs, using '' does not - # allow AI_PASSIVE to work. Passing None instead - # returns '0.0.0.0' like we want. In other words: - # host AI_PASSIVE result - # '' Y 192.168.x.y - # '' N 192.168.x.y - # None Y 0.0.0.0 - # None N 127.0.0.1 - # But since you can get the same effect with an explicit - # '0.0.0.0', we deny both the empty string and None as values. - raise ValueError("Host values of '' or None are not allowed. " - "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " - "to listen on all active interfaces.") - self._bind_addr = value - bind_addr = property( - _get_bind_addr, - _set_bind_addr, - doc="""The interface on which to listen for connections. - - For TCP sockets, a (host, port) tuple. Host values may be any IPv4 - or IPv6 address, or any valid hostname. The string 'localhost' is a - synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). - The string '0.0.0.0' is a special IPv4 entry meaning "any active - interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for - IPv6. The empty string or None are not allowed. - - For UNIX sockets, supply the filename as a string.""") - - def start(self): - """Run the server forever.""" - # We don't have to trap KeyboardInterrupt or SystemExit here, - # because cherrpy.server already does so, calling self.stop() for us. - # If you're using this server with another framework, you should - # trap those exceptions in whatever code block calls start(). - self._interrupt = None - - if self.software is None: - self.software = "%s Server" % self.version - - # Select the appropriate socket - if isinstance(self.bind_addr, basestring): - # AF_UNIX socket - - # So we can reuse the socket... - try: - os.unlink(self.bind_addr) - except: - pass - - # So everyone can access the socket... - try: - os.chmod(self.bind_addr, 511) # 0777 - except: - pass - - info = [ - (socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] - else: - # AF_INET or AF_INET6 socket - # Get the correct address family for our host (allows IPv6 - # addresses) - host, port = self.bind_addr - try: - info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM, 0, - socket.AI_PASSIVE) - except socket.gaierror: - if ':' in self.bind_addr[0]: - info = [(socket.AF_INET6, socket.SOCK_STREAM, - 0, "", self.bind_addr + (0, 0))] - else: - info = [(socket.AF_INET, socket.SOCK_STREAM, - 0, "", self.bind_addr)] - - self.socket = None - msg = "No socket could be created" - for res in info: - af, socktype, proto, canonname, sa = res - try: - self.bind(af, socktype, proto) - except socket.error as serr: - msg = "%s -- (%s: %s)" % (msg, sa, serr) - if self.socket: - self.socket.close() - self.socket = None - continue - break - if not self.socket: - raise socket.error(msg) - - # Timeout so KeyboardInterrupt can be caught on Win32 - self.socket.settimeout(1) - self.socket.listen(self.request_queue_size) - - # Create worker threads - self.requests.start() - - self.ready = True - self._start_time = time.time() - while self.ready: - try: - self.tick() - except (KeyboardInterrupt, SystemExit): - raise - except: - self.error_log("Error in HTTPServer.tick", level=logging.ERROR, - traceback=True) - if self.interrupt: - while self.interrupt is True: - # Wait for self.stop() to complete. See _set_interrupt. - time.sleep(0.1) - if self.interrupt: - raise self.interrupt - - def error_log(self, msg="", level=20, traceback=False): - # Override this in subclasses as desired - sys.stderr.write(msg + '\n') - sys.stderr.flush() - if traceback: - tblines = format_exc() - sys.stderr.write(tblines) - sys.stderr.flush() - - def bind(self, family, type, proto=0): - """Create (or recreate) the actual socket object.""" - self.socket = socket.socket(family, type, proto) - prevent_socket_inheritance(self.socket) - self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - if self.nodelay and not isinstance(self.bind_addr, str): - self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - - if self.ssl_adapter is not None: - self.socket = self.ssl_adapter.bind(self.socket) - - # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), - # activate dual-stack. See - # https://bitbucket.org/cherrypy/cherrypy/issue/871. - if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6 - and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')): - try: - self.socket.setsockopt( - socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) - except (AttributeError, socket.error): - # Apparently, the socket option is not available in - # this machine's TCP stack - pass - - self.socket.bind(self.bind_addr) - - def tick(self): - """Accept a new connection and put it on the Queue.""" - try: - s, addr = self.socket.accept() - if self.stats['Enabled']: - self.stats['Accepts'] += 1 - if not self.ready: - return - - prevent_socket_inheritance(s) - if hasattr(s, 'settimeout'): - s.settimeout(self.timeout) - - makefile = CP_makefile - ssl_env = {} - # if ssl cert and key are set, we try to be a secure HTTP server - if self.ssl_adapter is not None: - try: - s, ssl_env = self.ssl_adapter.wrap(s) - except NoSSLError: - msg = ("The client sent a plain HTTP request, but " - "this server only speaks HTTPS on this port.") - buf = ["%s 400 Bad Request\r\n" % self.protocol, - "Content-Length: %s\r\n" % len(msg), - "Content-Type: text/plain\r\n\r\n", - msg] - - wfile = makefile(s, "wb", DEFAULT_BUFFER_SIZE) - try: - wfile.write("".join(buf).encode('ISO-8859-1')) - except socket.error: - x = sys.exc_info()[1] - if x.args[0] not in socket_errors_to_ignore: - raise - return - if not s: - return - makefile = self.ssl_adapter.makefile - # Re-apply our timeout since we may have a new socket object - if hasattr(s, 'settimeout'): - s.settimeout(self.timeout) - - conn = self.ConnectionClass(self, s, makefile) - - if not isinstance(self.bind_addr, basestring): - # optional values - # Until we do DNS lookups, omit REMOTE_HOST - if addr is None: # sometimes this can happen - # figure out if AF_INET or AF_INET6. - if len(s.getsockname()) == 2: - # AF_INET - addr = ('0.0.0.0', 0) - else: - # AF_INET6 - addr = ('::', 0) - conn.remote_addr = addr[0] - conn.remote_port = addr[1] - - conn.ssl_env = ssl_env - - try: - self.requests.put(conn) - except queue.Full: - # Just drop the conn. TODO: write 503 back? - conn.close() - return - except socket.timeout: - # The only reason for the timeout in start() is so we can - # notice keyboard interrupts on Win32, which don't interrupt - # accept() by default - return - except socket.error: - x = sys.exc_info()[1] - if self.stats['Enabled']: - self.stats['Socket Errors'] += 1 - if x.args[0] in socket_error_eintr: - # I *think* this is right. EINTR should occur when a signal - # is received during the accept() call; all docs say retry - # the call, and I *think* I'm reading it right that Python - # will then go ahead and poll for and handle the signal - # elsewhere. See - # https://bitbucket.org/cherrypy/cherrypy/issue/707. - return - if x.args[0] in socket_errors_nonblocking: - # Just try again. See - # https://bitbucket.org/cherrypy/cherrypy/issue/479. - return - if x.args[0] in socket_errors_to_ignore: - # Our socket was closed. - # See https://bitbucket.org/cherrypy/cherrypy/issue/686. - return - raise - - def _get_interrupt(self): - return self._interrupt - - def _set_interrupt(self, interrupt): - self._interrupt = True - self.stop() - self._interrupt = interrupt - interrupt = property(_get_interrupt, _set_interrupt, - doc="Set this to an Exception instance to " - "interrupt the server.") - - def stop(self): - """Gracefully shutdown a server that is serving forever.""" - self.ready = False - if self._start_time is not None: - self._run_time += (time.time() - self._start_time) - self._start_time = None - - sock = getattr(self, "socket", None) - if sock: - if not isinstance(self.bind_addr, basestring): - # Touch our own socket to make accept() return immediately. - try: - host, port = sock.getsockname()[:2] - except socket.error: - x = sys.exc_info()[1] - if x.args[0] not in socket_errors_to_ignore: - # Changed to use error code and not message - # See - # https://bitbucket.org/cherrypy/cherrypy/issue/860. - raise - else: - # Note that we're explicitly NOT using AI_PASSIVE, - # here, because we want an actual IP to touch. - # localhost won't work if we've bound to a public IP, - # but it will if we bound to '0.0.0.0' (INADDR_ANY). - for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, - socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - s = None - try: - s = socket.socket(af, socktype, proto) - # See - # http://groups.google.com/group/cherrypy-users/ - # browse_frm/thread/bbfe5eb39c904fe0 - s.settimeout(1.0) - s.connect((host, port)) - s.close() - except socket.error: - if s: - s.close() - if hasattr(sock, "close"): - sock.close() - self.socket = None - - self.requests.stop(self.shutdown_timeout) - - -class Gateway(object): - - """A base class to interface HTTPServer with other systems, such as WSGI. - """ - - def __init__(self, req): - self.req = req - - def respond(self): - """Process the current request. Must be overridden in a subclass.""" - raise NotImplemented - - -# These may either be wsgiserver.SSLAdapter subclasses or the string names -# of such classes (in which case they will be lazily loaded). -ssl_adapters = { - 'builtin': 'cherrypy.wsgiserver.ssl_builtin.BuiltinSSLAdapter', -} - - -def get_ssl_adapter_class(name='builtin'): - """Return an SSL adapter class for the given name.""" - adapter = ssl_adapters[name.lower()] - if isinstance(adapter, basestring): - last_dot = adapter.rfind(".") - attr_name = adapter[last_dot + 1:] - mod_path = adapter[:last_dot] - - try: - mod = sys.modules[mod_path] - if mod is None: - raise KeyError() - except KeyError: - # The last [''] is important. - mod = __import__(mod_path, globals(), locals(), ['']) - - # Let an AttributeError propagate outward. - try: - adapter = getattr(mod, attr_name) - except AttributeError: - raise AttributeError("'%s' object has no attribute '%s'" - % (mod_path, attr_name)) - - return adapter - -# ------------------------------- WSGI Stuff -------------------------------- # - - -class CherryPyWSGIServer(HTTPServer): - - """A subclass of HTTPServer which calls a WSGI application.""" - - wsgi_version = (1, 0) - """The version of WSGI to produce.""" - - def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, - max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5, - accepted_queue_size=-1, accepted_queue_timeout=10): - self.requests = ThreadPool(self, min=numthreads or 1, max=max, - accepted_queue_size=accepted_queue_size, - accepted_queue_timeout=accepted_queue_timeout) - self.wsgi_app = wsgi_app - self.gateway = wsgi_gateways[self.wsgi_version] - - self.bind_addr = bind_addr - if not server_name: - server_name = socket.gethostname() - self.server_name = server_name - self.request_queue_size = request_queue_size - - self.timeout = timeout - self.shutdown_timeout = shutdown_timeout - self.clear_stats() - - def _get_numthreads(self): - return self.requests.min - - def _set_numthreads(self, value): - self.requests.min = value - numthreads = property(_get_numthreads, _set_numthreads) - - -class WSGIGateway(Gateway): - - """A base class to interface HTTPServer with WSGI.""" - - def __init__(self, req): - self.req = req - self.started_response = False - self.env = self.get_environ() - self.remaining_bytes_out = None - - def get_environ(self): - """Return a new environ dict targeting the given wsgi.version""" - raise NotImplemented - - def respond(self): - """Process the current request.""" - response = self.req.server.wsgi_app(self.env, self.start_response) - try: - for chunk in response: - # "The start_response callable must not actually transmit - # the response headers. Instead, it must store them for the - # server or gateway to transmit only after the first - # iteration of the application return value that yields - # a NON-EMPTY string, or upon the application's first - # invocation of the write() callable." (PEP 333) - if chunk: - if isinstance(chunk, unicodestr): - chunk = chunk.encode('ISO-8859-1') - self.write(chunk) - finally: - if hasattr(response, "close"): - response.close() - - def start_response(self, status, headers, exc_info=None): - """WSGI callable to begin the HTTP response.""" - # "The application may call start_response more than once, - # if and only if the exc_info argument is provided." - if self.started_response and not exc_info: - raise AssertionError("WSGI start_response called a second " - "time with no exc_info.") - self.started_response = True - - # "if exc_info is provided, and the HTTP headers have already been - # sent, start_response must raise an error, and should raise the - # exc_info tuple." - if self.req.sent_headers: - try: - raise exc_info[0](exc_info[1]).with_traceback(exc_info[2]) - finally: - exc_info = None - - # According to PEP 3333, when using Python 3, the response status - # and headers must be bytes masquerading as unicode; that is, they - # must be of type "str" but are restricted to code points in the - # "latin-1" set. - if not isinstance(status, str): - raise TypeError("WSGI response status is not of type str.") - self.req.status = status.encode('ISO-8859-1') - - for k, v in headers: - if not isinstance(k, str): - raise TypeError( - "WSGI response header key %r is not of type str." % k) - if not isinstance(v, str): - raise TypeError( - "WSGI response header value %r is not of type str." % v) - if k.lower() == 'content-length': - self.remaining_bytes_out = int(v) - self.req.outheaders.append( - (k.encode('ISO-8859-1'), v.encode('ISO-8859-1'))) - - return self.write - - def write(self, chunk): - """WSGI callable to write unbuffered data to the client. - - This method is also used internally by start_response (to write - data from the iterable returned by the WSGI application). - """ - if not self.started_response: - raise AssertionError("WSGI write called before start_response.") - - chunklen = len(chunk) - rbo = self.remaining_bytes_out - if rbo is not None and chunklen > rbo: - if not self.req.sent_headers: - # Whew. We can send a 500 to the client. - self.req.simple_response("500 Internal Server Error", - "The requested resource returned " - "more bytes than the declared " - "Content-Length.") - else: - # Dang. We have probably already sent data. Truncate the chunk - # to fit (so the client doesn't hang) and raise an error later. - chunk = chunk[:rbo] - - if not self.req.sent_headers: - self.req.sent_headers = True - self.req.send_headers() - - self.req.write(chunk) - - if rbo is not None: - rbo -= chunklen - if rbo < 0: - raise ValueError( - "Response body exceeds the declared Content-Length.") - - -class WSGIGateway_10(WSGIGateway): - - """A Gateway class to interface HTTPServer with WSGI 1.0.x.""" - - def get_environ(self): - """Return a new environ dict targeting the given wsgi.version""" - req = self.req - env = { - # set a non-standard environ entry so the WSGI app can know what - # the *real* server protocol is (and what features to support). - # See http://www.faqs.org/rfcs/rfc2145.html. - 'ACTUAL_SERVER_PROTOCOL': req.server.protocol, - 'PATH_INFO': req.path.decode('ISO-8859-1'), - 'QUERY_STRING': req.qs.decode('ISO-8859-1'), - 'REMOTE_ADDR': req.conn.remote_addr or '', - 'REMOTE_PORT': str(req.conn.remote_port or ''), - 'REQUEST_METHOD': req.method.decode('ISO-8859-1'), - 'REQUEST_URI': req.uri.decode('ISO-8859-1'), - 'SCRIPT_NAME': '', - 'SERVER_NAME': req.server.server_name, - # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. - 'SERVER_PROTOCOL': req.request_protocol.decode('ISO-8859-1'), - 'SERVER_SOFTWARE': req.server.software, - 'wsgi.errors': sys.stderr, - 'wsgi.input': req.rfile, - 'wsgi.multiprocess': False, - 'wsgi.multithread': True, - 'wsgi.run_once': False, - 'wsgi.url_scheme': req.scheme.decode('ISO-8859-1'), - 'wsgi.version': (1, 0), - } - if isinstance(req.server.bind_addr, basestring): - # AF_UNIX. This isn't really allowed by WSGI, which doesn't - # address unix domain sockets. But it's better than nothing. - env["SERVER_PORT"] = "" - else: - env["SERVER_PORT"] = str(req.server.bind_addr[1]) - - # Request headers - for k, v in req.inheaders.items(): - k = k.decode('ISO-8859-1').upper().replace("-", "_") - env["HTTP_" + k] = v.decode('ISO-8859-1') - - # CONTENT_TYPE/CONTENT_LENGTH - ct = env.pop("HTTP_CONTENT_TYPE", None) - if ct is not None: - env["CONTENT_TYPE"] = ct - cl = env.pop("HTTP_CONTENT_LENGTH", None) - if cl is not None: - env["CONTENT_LENGTH"] = cl - - if req.conn.ssl_env: - env.update(req.conn.ssl_env) - - return env - - -class WSGIGateway_u0(WSGIGateway_10): - - """A Gateway class to interface HTTPServer with WSGI u.0. - - WSGI u.0 is an experimental protocol, which uses unicode for keys - and values in both Python 2 and Python 3. - """ - - def get_environ(self): - """Return a new environ dict targeting the given wsgi.version""" - req = self.req - env_10 = WSGIGateway_10.get_environ(self) - env = env_10.copy() - env['wsgi.version'] = ('u', 0) - - # Request-URI - env.setdefault('wsgi.url_encoding', 'utf-8') - try: - # SCRIPT_NAME is the empty string, who cares what encoding it is? - env["PATH_INFO"] = req.path.decode(env['wsgi.url_encoding']) - env["QUERY_STRING"] = req.qs.decode(env['wsgi.url_encoding']) - except UnicodeDecodeError: - # Fall back to latin 1 so apps can transcode if needed. - env['wsgi.url_encoding'] = 'ISO-8859-1' - env["PATH_INFO"] = env_10["PATH_INFO"] - env["QUERY_STRING"] = env_10["QUERY_STRING"] - - return env - -wsgi_gateways = { - (1, 0): WSGIGateway_10, - ('u', 0): WSGIGateway_u0, -} - - -class WSGIPathInfoDispatcher(object): - - """A WSGI dispatcher for dispatch based on the PATH_INFO. - - apps: a dict or list of (path_prefix, app) pairs. - """ - - def __init__(self, apps): - try: - apps = list(apps.items()) - except AttributeError: - pass - - # Sort the apps by len(path), descending - apps.sort() - apps.reverse() - - # The path_prefix strings must start, but not end, with a slash. - # Use "" instead of "/". - self.apps = [(p.rstrip("/"), a) for p, a in apps] - - def __call__(self, environ, start_response): - path = environ["PATH_INFO"] or "/" - for p, app in self.apps: - # The apps list should be sorted by length, descending. - if path.startswith(p + "/") or path == p: - environ = environ.copy() - environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p - environ["PATH_INFO"] = path[len(p):] - return app(environ, start_response) - - start_response('404 Not Found', [('Content-Type', 'text/plain'), - ('Content-Length', '0')]) - return [''] diff --git a/lib/concurrent/LICENSE b/lib/concurrent/LICENSE deleted file mode 100644 index c430db0f..00000000 --- a/lib/concurrent/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright 2009 Brian Quinlan. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY BRIAN QUINLAN "AS IS" AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -HALL THE FREEBSD PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/lib/concurrent/__init__.py b/lib/concurrent/__init__.py deleted file mode 100644 index b36383a6..00000000 --- a/lib/concurrent/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from pkgutil import extend_path - -__path__ = extend_path(__path__, __name__) diff --git a/lib/concurrent/futures/__init__.py b/lib/concurrent/futures/__init__.py deleted file mode 100644 index fef52819..00000000 --- a/lib/concurrent/futures/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2009 Brian Quinlan. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Execute computations asynchronously using threads or processes.""" - -__author__ = 'Brian Quinlan (brian@sweetapp.com)' - -from concurrent.futures._base import (FIRST_COMPLETED, - FIRST_EXCEPTION, - ALL_COMPLETED, - CancelledError, - TimeoutError, - Future, - Executor, - wait, - as_completed) -from concurrent.futures.thread import ThreadPoolExecutor - -# Jython doesn't have multiprocessing -try: - from concurrent.futures.process import ProcessPoolExecutor -except ImportError: - pass diff --git a/lib/concurrent/futures/_base.py b/lib/concurrent/futures/_base.py deleted file mode 100644 index 6f0c0f3b..00000000 --- a/lib/concurrent/futures/_base.py +++ /dev/null @@ -1,605 +0,0 @@ -# Copyright 2009 Brian Quinlan. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -from __future__ import with_statement -import logging -import threading -import time - -from concurrent.futures._compat import reraise - -try: - from collections import namedtuple -except ImportError: - from concurrent.futures._compat import namedtuple - -__author__ = 'Brian Quinlan (brian@sweetapp.com)' - -FIRST_COMPLETED = 'FIRST_COMPLETED' -FIRST_EXCEPTION = 'FIRST_EXCEPTION' -ALL_COMPLETED = 'ALL_COMPLETED' -_AS_COMPLETED = '_AS_COMPLETED' - -# Possible future states (for internal use by the futures package). -PENDING = 'PENDING' -RUNNING = 'RUNNING' -# The future was cancelled by the user... -CANCELLED = 'CANCELLED' -# ...and _Waiter.add_cancelled() was called by a worker. -CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED' -FINISHED = 'FINISHED' - -_FUTURE_STATES = [ - PENDING, - RUNNING, - CANCELLED, - CANCELLED_AND_NOTIFIED, - FINISHED -] - -_STATE_TO_DESCRIPTION_MAP = { - PENDING: "pending", - RUNNING: "running", - CANCELLED: "cancelled", - CANCELLED_AND_NOTIFIED: "cancelled", - FINISHED: "finished" -} - -# Logger for internal use by the futures package. -LOGGER = logging.getLogger("concurrent.futures") - -class Error(Exception): - """Base class for all future-related exceptions.""" - pass - -class CancelledError(Error): - """The Future was cancelled.""" - pass - -class TimeoutError(Error): - """The operation exceeded the given deadline.""" - pass - -class _Waiter(object): - """Provides the event that wait() and as_completed() block on.""" - def __init__(self): - self.event = threading.Event() - self.finished_futures = [] - - def add_result(self, future): - self.finished_futures.append(future) - - def add_exception(self, future): - self.finished_futures.append(future) - - def add_cancelled(self, future): - self.finished_futures.append(future) - -class _AsCompletedWaiter(_Waiter): - """Used by as_completed().""" - - def __init__(self): - super(_AsCompletedWaiter, self).__init__() - self.lock = threading.Lock() - - def add_result(self, future): - with self.lock: - super(_AsCompletedWaiter, self).add_result(future) - self.event.set() - - def add_exception(self, future): - with self.lock: - super(_AsCompletedWaiter, self).add_exception(future) - self.event.set() - - def add_cancelled(self, future): - with self.lock: - super(_AsCompletedWaiter, self).add_cancelled(future) - self.event.set() - -class _FirstCompletedWaiter(_Waiter): - """Used by wait(return_when=FIRST_COMPLETED).""" - - def add_result(self, future): - super(_FirstCompletedWaiter, self).add_result(future) - self.event.set() - - def add_exception(self, future): - super(_FirstCompletedWaiter, self).add_exception(future) - self.event.set() - - def add_cancelled(self, future): - super(_FirstCompletedWaiter, self).add_cancelled(future) - self.event.set() - -class _AllCompletedWaiter(_Waiter): - """Used by wait(return_when=FIRST_EXCEPTION and ALL_COMPLETED).""" - - def __init__(self, num_pending_calls, stop_on_exception): - self.num_pending_calls = num_pending_calls - self.stop_on_exception = stop_on_exception - self.lock = threading.Lock() - super(_AllCompletedWaiter, self).__init__() - - def _decrement_pending_calls(self): - with self.lock: - self.num_pending_calls -= 1 - if not self.num_pending_calls: - self.event.set() - - def add_result(self, future): - super(_AllCompletedWaiter, self).add_result(future) - self._decrement_pending_calls() - - def add_exception(self, future): - super(_AllCompletedWaiter, self).add_exception(future) - if self.stop_on_exception: - self.event.set() - else: - self._decrement_pending_calls() - - def add_cancelled(self, future): - super(_AllCompletedWaiter, self).add_cancelled(future) - self._decrement_pending_calls() - -class _AcquireFutures(object): - """A context manager that does an ordered acquire of Future conditions.""" - - def __init__(self, futures): - self.futures = sorted(futures, key=id) - - def __enter__(self): - for future in self.futures: - future._condition.acquire() - - def __exit__(self, *args): - for future in self.futures: - future._condition.release() - -def _create_and_install_waiters(fs, return_when): - if return_when == _AS_COMPLETED: - waiter = _AsCompletedWaiter() - elif return_when == FIRST_COMPLETED: - waiter = _FirstCompletedWaiter() - else: - pending_count = sum( - f._state not in [CANCELLED_AND_NOTIFIED, FINISHED] for f in fs) - - if return_when == FIRST_EXCEPTION: - waiter = _AllCompletedWaiter(pending_count, stop_on_exception=True) - elif return_when == ALL_COMPLETED: - waiter = _AllCompletedWaiter(pending_count, stop_on_exception=False) - else: - raise ValueError("Invalid return condition: %r" % return_when) - - for f in fs: - f._waiters.append(waiter) - - return waiter - -def as_completed(fs, timeout=None): - """An iterator over the given futures that yields each as it completes. - - Args: - fs: The sequence of Futures (possibly created by different Executors) to - iterate over. - timeout: The maximum number of seconds to wait. If None, then there - is no limit on the wait time. - - Returns: - An iterator that yields the given Futures as they complete (finished or - cancelled). - - Raises: - TimeoutError: If the entire result iterator could not be generated - before the given timeout. - """ - if timeout is not None: - end_time = timeout + time.time() - - with _AcquireFutures(fs): - finished = set( - f for f in fs - if f._state in [CANCELLED_AND_NOTIFIED, FINISHED]) - pending = set(fs) - finished - waiter = _create_and_install_waiters(fs, _AS_COMPLETED) - - try: - for future in finished: - yield future - - while pending: - if timeout is None: - wait_timeout = None - else: - wait_timeout = end_time - time.time() - if wait_timeout < 0: - raise TimeoutError( - '%d (of %d) futures unfinished' % ( - len(pending), len(fs))) - - waiter.event.wait(wait_timeout) - - with waiter.lock: - finished = waiter.finished_futures - waiter.finished_futures = [] - waiter.event.clear() - - for future in finished: - yield future - pending.remove(future) - - finally: - for f in fs: - f._waiters.remove(waiter) - -DoneAndNotDoneFutures = namedtuple( - 'DoneAndNotDoneFutures', 'done not_done') -def wait(fs, timeout=None, return_when=ALL_COMPLETED): - """Wait for the futures in the given sequence to complete. - - Args: - fs: The sequence of Futures (possibly created by different Executors) to - wait upon. - timeout: The maximum number of seconds to wait. If None, then there - is no limit on the wait time. - return_when: Indicates when this function should return. The options - are: - - FIRST_COMPLETED - Return when any future finishes or is - cancelled. - FIRST_EXCEPTION - Return when any future finishes by raising an - exception. If no future raises an exception - then it is equivalent to ALL_COMPLETED. - ALL_COMPLETED - Return when all futures finish or are cancelled. - - Returns: - A named 2-tuple of sets. The first set, named 'done', contains the - futures that completed (is finished or cancelled) before the wait - completed. The second set, named 'not_done', contains uncompleted - futures. - """ - with _AcquireFutures(fs): - done = set(f for f in fs - if f._state in [CANCELLED_AND_NOTIFIED, FINISHED]) - not_done = set(fs) - done - - if (return_when == FIRST_COMPLETED) and done: - return DoneAndNotDoneFutures(done, not_done) - elif (return_when == FIRST_EXCEPTION) and done: - if any(f for f in done - if not f.cancelled() and f.exception() is not None): - return DoneAndNotDoneFutures(done, not_done) - - if len(done) == len(fs): - return DoneAndNotDoneFutures(done, not_done) - - waiter = _create_and_install_waiters(fs, return_when) - - waiter.event.wait(timeout) - for f in fs: - f._waiters.remove(waiter) - - done.update(waiter.finished_futures) - return DoneAndNotDoneFutures(done, set(fs) - done) - -class Future(object): - """Represents the result of an asynchronous computation.""" - - def __init__(self): - """Initializes the future. Should not be called by clients.""" - self._condition = threading.Condition() - self._state = PENDING - self._result = None - self._exception = None - self._traceback = None - self._waiters = [] - self._done_callbacks = [] - - def _invoke_callbacks(self): - for callback in self._done_callbacks: - try: - callback(self) - except Exception: - LOGGER.exception('exception calling callback for %r', self) - - def __repr__(self): - with self._condition: - if self._state == FINISHED: - if self._exception: - return '' % ( - hex(id(self)), - _STATE_TO_DESCRIPTION_MAP[self._state], - self._exception.__class__.__name__) - else: - return '' % ( - hex(id(self)), - _STATE_TO_DESCRIPTION_MAP[self._state], - self._result.__class__.__name__) - return '' % ( - hex(id(self)), - _STATE_TO_DESCRIPTION_MAP[self._state]) - - def cancel(self): - """Cancel the future if possible. - - Returns True if the future was cancelled, False otherwise. A future - cannot be cancelled if it is running or has already completed. - """ - with self._condition: - if self._state in [RUNNING, FINISHED]: - return False - - if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: - return True - - self._state = CANCELLED - self._condition.notify_all() - - self._invoke_callbacks() - return True - - def cancelled(self): - """Return True if the future has cancelled.""" - with self._condition: - return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED] - - def running(self): - """Return True if the future is currently executing.""" - with self._condition: - return self._state == RUNNING - - def done(self): - """Return True of the future was cancelled or finished executing.""" - with self._condition: - return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED] - - def __get_result(self): - if self._exception: - reraise(self._exception, self._traceback) - else: - return self._result - - def add_done_callback(self, fn): - """Attaches a callable that will be called when the future finishes. - - Args: - fn: A callable that will be called with this future as its only - argument when the future completes or is cancelled. The callable - will always be called by a thread in the same process in which - it was added. If the future has already completed or been - cancelled then the callable will be called immediately. These - callables are called in the order that they were added. - """ - with self._condition: - if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]: - self._done_callbacks.append(fn) - return - fn(self) - - def result(self, timeout=None): - """Return the result of the call that the future represents. - - Args: - timeout: The number of seconds to wait for the result if the future - isn't done. If None, then there is no limit on the wait time. - - Returns: - The result of the call that the future represents. - - Raises: - CancelledError: If the future was cancelled. - TimeoutError: If the future didn't finish executing before the given - timeout. - Exception: If the call raised then that exception will be raised. - """ - with self._condition: - if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: - raise CancelledError() - elif self._state == FINISHED: - return self.__get_result() - - self._condition.wait(timeout) - - if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: - raise CancelledError() - elif self._state == FINISHED: - return self.__get_result() - else: - raise TimeoutError() - - def exception_info(self, timeout=None): - """Return a tuple of (exception, traceback) raised by the call that the - future represents. - - Args: - timeout: The number of seconds to wait for the exception if the - future isn't done. If None, then there is no limit on the wait - time. - - Returns: - The exception raised by the call that the future represents or None - if the call completed without raising. - - Raises: - CancelledError: If the future was cancelled. - TimeoutError: If the future didn't finish executing before the given - timeout. - """ - with self._condition: - if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: - raise CancelledError() - elif self._state == FINISHED: - return self._exception, self._traceback - - self._condition.wait(timeout) - - if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: - raise CancelledError() - elif self._state == FINISHED: - return self._exception, self._traceback - else: - raise TimeoutError() - - def exception(self, timeout=None): - """Return the exception raised by the call that the future represents. - - Args: - timeout: The number of seconds to wait for the exception if the - future isn't done. If None, then there is no limit on the wait - time. - - Returns: - The exception raised by the call that the future represents or None - if the call completed without raising. - - Raises: - CancelledError: If the future was cancelled. - TimeoutError: If the future didn't finish executing before the given - timeout. - """ - return self.exception_info(timeout)[0] - - # The following methods should only be used by Executors and in tests. - def set_running_or_notify_cancel(self): - """Mark the future as running or process any cancel notifications. - - Should only be used by Executor implementations and unit tests. - - If the future has been cancelled (cancel() was called and returned - True) then any threads waiting on the future completing (though calls - to as_completed() or wait()) are notified and False is returned. - - If the future was not cancelled then it is put in the running state - (future calls to running() will return True) and True is returned. - - This method should be called by Executor implementations before - executing the work associated with this future. If this method returns - False then the work should not be executed. - - Returns: - False if the Future was cancelled, True otherwise. - - Raises: - RuntimeError: if this method was already called or if set_result() - or set_exception() was called. - """ - with self._condition: - if self._state == CANCELLED: - self._state = CANCELLED_AND_NOTIFIED - for waiter in self._waiters: - waiter.add_cancelled(self) - # self._condition.notify_all() is not necessary because - # self.cancel() triggers a notification. - return False - elif self._state == PENDING: - self._state = RUNNING - return True - else: - LOGGER.critical('Future %s in unexpected state: %s', - id(self.future), - self.future._state) - raise RuntimeError('Future in unexpected state') - - def set_result(self, result): - """Sets the return value of work associated with the future. - - Should only be used by Executor implementations and unit tests. - """ - with self._condition: - self._result = result - self._state = FINISHED - for waiter in self._waiters: - waiter.add_result(self) - self._condition.notify_all() - self._invoke_callbacks() - - def set_exception_info(self, exception, traceback): - """Sets the result of the future as being the given exception - and traceback. - - Should only be used by Executor implementations and unit tests. - """ - with self._condition: - self._exception = exception - self._traceback = traceback - self._state = FINISHED - for waiter in self._waiters: - waiter.add_exception(self) - self._condition.notify_all() - self._invoke_callbacks() - - def set_exception(self, exception): - """Sets the result of the future as being the given exception. - - Should only be used by Executor implementations and unit tests. - """ - self.set_exception_info(exception, None) - -class Executor(object): - """This is an abstract base class for concrete asynchronous executors.""" - - def submit(self, fn, *args, **kwargs): - """Submits a callable to be executed with the given arguments. - - Schedules the callable to be executed as fn(*args, **kwargs) and returns - a Future instance representing the execution of the callable. - - Returns: - A Future representing the given call. - """ - raise NotImplementedError() - - def map(self, fn, *iterables, **kwargs): - """Returns a iterator equivalent to map(fn, iter). - - Args: - fn: A callable that will take as many arguments as there are - passed iterables. - timeout: The maximum number of seconds to wait. If None, then there - is no limit on the wait time. - - Returns: - An iterator equivalent to: map(func, *iterables) but the calls may - be evaluated out-of-order. - - Raises: - TimeoutError: If the entire result iterator could not be generated - before the given timeout. - Exception: If fn(*args) raises for any values. - """ - timeout = kwargs.get('timeout') - if timeout is not None: - end_time = timeout + time.time() - - fs = [self.submit(fn, *args) for args in zip(*iterables)] - - try: - for future in fs: - if timeout is None: - yield future.result() - else: - yield future.result(end_time - time.time()) - finally: - for future in fs: - future.cancel() - - def shutdown(self, wait=True): - """Clean-up the resources associated with the Executor. - - It is safe to call this method several times. Otherwise, no other - methods can be called after this one. - - Args: - wait: If True then shutdown will not return until all running - futures have finished executing and the resources used by the - executor have been reclaimed. - """ - pass - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.shutdown(wait=True) - return False diff --git a/lib/concurrent/futures/_compat.py b/lib/concurrent/futures/_compat.py deleted file mode 100644 index e77cf0e5..00000000 --- a/lib/concurrent/futures/_compat.py +++ /dev/null @@ -1,111 +0,0 @@ -from keyword import iskeyword as _iskeyword -from operator import itemgetter as _itemgetter -import sys as _sys - - -def namedtuple(typename, field_names): - """Returns a new subclass of tuple with named fields. - - >>> Point = namedtuple('Point', 'x y') - >>> Point.__doc__ # docstring for the new class - 'Point(x, y)' - >>> p = Point(11, y=22) # instantiate with positional args or keywords - >>> p[0] + p[1] # indexable like a plain tuple - 33 - >>> x, y = p # unpack like a regular tuple - >>> x, y - (11, 22) - >>> p.x + p.y # fields also accessable by name - 33 - >>> d = p._asdict() # convert to a dictionary - >>> d['x'] - 11 - >>> Point(**d) # convert from a dictionary - Point(x=11, y=22) - >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields - Point(x=100, y=22) - - """ - - # Parse and validate the field names. Validation serves two purposes, - # generating informative error messages and preventing template injection attacks. - if isinstance(field_names, basestring): - field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas - field_names = tuple(map(str, field_names)) - for name in (typename,) + field_names: - if not all(c.isalnum() or c=='_' for c in name): - raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name) - if _iskeyword(name): - raise ValueError('Type names and field names cannot be a keyword: %r' % name) - if name[0].isdigit(): - raise ValueError('Type names and field names cannot start with a number: %r' % name) - seen_names = set() - for name in field_names: - if name.startswith('_'): - raise ValueError('Field names cannot start with an underscore: %r' % name) - if name in seen_names: - raise ValueError('Encountered duplicate field name: %r' % name) - seen_names.add(name) - - # Create and fill-in the class template - numfields = len(field_names) - argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes - reprtxt = ', '.join('%s=%%r' % name for name in field_names) - dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names)) - template = '''class %(typename)s(tuple): - '%(typename)s(%(argtxt)s)' \n - __slots__ = () \n - _fields = %(field_names)r \n - def __new__(_cls, %(argtxt)s): - return _tuple.__new__(_cls, (%(argtxt)s)) \n - @classmethod - def _make(cls, iterable, new=tuple.__new__, len=len): - 'Make a new %(typename)s object from a sequence or iterable' - result = new(cls, iterable) - if len(result) != %(numfields)d: - raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result)) - return result \n - def __repr__(self): - return '%(typename)s(%(reprtxt)s)' %% self \n - def _asdict(t): - 'Return a new dict which maps field names to their values' - return {%(dicttxt)s} \n - def _replace(_self, **kwds): - 'Return a new %(typename)s object replacing specified fields with new values' - result = _self._make(map(kwds.pop, %(field_names)r, _self)) - if kwds: - raise ValueError('Got unexpected field names: %%r' %% kwds.keys()) - return result \n - def __getnewargs__(self): - return tuple(self) \n\n''' % locals() - for i, name in enumerate(field_names): - template += ' %s = _property(_itemgetter(%d))\n' % (name, i) - - # Execute the template string in a temporary namespace and - # support tracing utilities by setting a value for frame.f_globals['__name__'] - namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename, - _property=property, _tuple=tuple) - try: - exec(template, namespace) - except SyntaxError: - e = _sys.exc_info()[1] - raise SyntaxError(e.message + ':\n' + template) - result = namespace[typename] - - # For pickling to work, the __module__ variable needs to be set to the frame - # where the named tuple is created. Bypass this step in enviroments where - # sys._getframe is not defined (Jython for example). - if hasattr(_sys, '_getframe'): - result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__') - - return result - - -if _sys.version_info[0] < 3: - def reraise(exc, traceback): - locals_ = {'exc_type': type(exc), 'exc_value': exc, 'traceback': traceback} - exec('raise exc_type, exc_value, traceback', {}, locals_) -else: - def reraise(exc, traceback): - # Tracebacks are embedded in exceptions in Python 3 - raise exc diff --git a/lib/concurrent/futures/process.py b/lib/concurrent/futures/process.py deleted file mode 100644 index 98684f8e..00000000 --- a/lib/concurrent/futures/process.py +++ /dev/null @@ -1,363 +0,0 @@ -# Copyright 2009 Brian Quinlan. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Implements ProcessPoolExecutor. - -The follow diagram and text describe the data-flow through the system: - -|======================= In-process =====================|== Out-of-process ==| - -+----------+ +----------+ +--------+ +-----------+ +---------+ -| | => | Work Ids | => | | => | Call Q | => | | -| | +----------+ | | +-----------+ | | -| | | ... | | | | ... | | | -| | | 6 | | | | 5, call() | | | -| | | 7 | | | | ... | | | -| Process | | ... | | Local | +-----------+ | Process | -| Pool | +----------+ | Worker | | #1..n | -| Executor | | Thread | | | -| | +----------- + | | +-----------+ | | -| | <=> | Work Items | <=> | | <= | Result Q | <= | | -| | +------------+ | | +-----------+ | | -| | | 6: call() | | | | ... | | | -| | | future | | | | 4, result | | | -| | | ... | | | | 3, except | | | -+----------+ +------------+ +--------+ +-----------+ +---------+ - -Executor.submit() called: -- creates a uniquely numbered _WorkItem and adds it to the "Work Items" dict -- adds the id of the _WorkItem to the "Work Ids" queue - -Local worker thread: -- reads work ids from the "Work Ids" queue and looks up the corresponding - WorkItem from the "Work Items" dict: if the work item has been cancelled then - it is simply removed from the dict, otherwise it is repackaged as a - _CallItem and put in the "Call Q". New _CallItems are put in the "Call Q" - until "Call Q" is full. NOTE: the size of the "Call Q" is kept small because - calls placed in the "Call Q" can no longer be cancelled with Future.cancel(). -- reads _ResultItems from "Result Q", updates the future stored in the - "Work Items" dict and deletes the dict entry - -Process #1..n: -- reads _CallItems from "Call Q", executes the calls, and puts the resulting - _ResultItems in "Request Q" -""" - -from __future__ import with_statement -import atexit -import multiprocessing -import threading -import weakref -import sys - -from concurrent.futures import _base - -try: - import queue -except ImportError: - import Queue as queue - -__author__ = 'Brian Quinlan (brian@sweetapp.com)' - -# Workers are created as daemon threads and processes. This is done to allow the -# interpreter to exit when there are still idle processes in a -# ProcessPoolExecutor's process pool (i.e. shutdown() was not called). However, -# allowing workers to die with the interpreter has two undesirable properties: -# - The workers would still be running during interpretor shutdown, -# meaning that they would fail in unpredictable ways. -# - The workers could be killed while evaluating a work item, which could -# be bad if the callable being evaluated has external side-effects e.g. -# writing to a file. -# -# To work around this problem, an exit handler is installed which tells the -# workers to exit when their work queues are empty and then waits until the -# threads/processes finish. - -_threads_queues = weakref.WeakKeyDictionary() -_shutdown = False - -def _python_exit(): - global _shutdown - _shutdown = True - items = list(_threads_queues.items()) - for t, q in items: - q.put(None) - for t, q in items: - t.join() - -# Controls how many more calls than processes will be queued in the call queue. -# A smaller number will mean that processes spend more time idle waiting for -# work while a larger number will make Future.cancel() succeed less frequently -# (Futures in the call queue cannot be cancelled). -EXTRA_QUEUED_CALLS = 1 - -class _WorkItem(object): - def __init__(self, future, fn, args, kwargs): - self.future = future - self.fn = fn - self.args = args - self.kwargs = kwargs - -class _ResultItem(object): - def __init__(self, work_id, exception=None, result=None): - self.work_id = work_id - self.exception = exception - self.result = result - -class _CallItem(object): - def __init__(self, work_id, fn, args, kwargs): - self.work_id = work_id - self.fn = fn - self.args = args - self.kwargs = kwargs - -def _process_worker(call_queue, result_queue): - """Evaluates calls from call_queue and places the results in result_queue. - - This worker is run in a separate process. - - Args: - call_queue: A multiprocessing.Queue of _CallItems that will be read and - evaluated by the worker. - result_queue: A multiprocessing.Queue of _ResultItems that will written - to by the worker. - shutdown: A multiprocessing.Event that will be set as a signal to the - worker that it should exit when call_queue is empty. - """ - while True: - call_item = call_queue.get(block=True) - if call_item is None: - # Wake up queue management thread - result_queue.put(None) - return - try: - r = call_item.fn(*call_item.args, **call_item.kwargs) - except BaseException: - e = sys.exc_info()[1] - result_queue.put(_ResultItem(call_item.work_id, - exception=e)) - else: - result_queue.put(_ResultItem(call_item.work_id, - result=r)) - -def _add_call_item_to_queue(pending_work_items, - work_ids, - call_queue): - """Fills call_queue with _WorkItems from pending_work_items. - - This function never blocks. - - Args: - pending_work_items: A dict mapping work ids to _WorkItems e.g. - {5: <_WorkItem...>, 6: <_WorkItem...>, ...} - work_ids: A queue.Queue of work ids e.g. Queue([5, 6, ...]). Work ids - are consumed and the corresponding _WorkItems from - pending_work_items are transformed into _CallItems and put in - call_queue. - call_queue: A multiprocessing.Queue that will be filled with _CallItems - derived from _WorkItems. - """ - while True: - if call_queue.full(): - return - try: - work_id = work_ids.get(block=False) - except queue.Empty: - return - else: - work_item = pending_work_items[work_id] - - if work_item.future.set_running_or_notify_cancel(): - call_queue.put(_CallItem(work_id, - work_item.fn, - work_item.args, - work_item.kwargs), - block=True) - else: - del pending_work_items[work_id] - continue - -def _queue_management_worker(executor_reference, - processes, - pending_work_items, - work_ids_queue, - call_queue, - result_queue): - """Manages the communication between this process and the worker processes. - - This function is run in a local thread. - - Args: - executor_reference: A weakref.ref to the ProcessPoolExecutor that owns - this thread. Used to determine if the ProcessPoolExecutor has been - garbage collected and that this function can exit. - process: A list of the multiprocessing.Process instances used as - workers. - pending_work_items: A dict mapping work ids to _WorkItems e.g. - {5: <_WorkItem...>, 6: <_WorkItem...>, ...} - work_ids_queue: A queue.Queue of work ids e.g. Queue([5, 6, ...]). - call_queue: A multiprocessing.Queue that will be filled with _CallItems - derived from _WorkItems for processing by the process workers. - result_queue: A multiprocessing.Queue of _ResultItems generated by the - process workers. - """ - nb_shutdown_processes = [0] - def shutdown_one_process(): - """Tell a worker to terminate, which will in turn wake us again""" - call_queue.put(None) - nb_shutdown_processes[0] += 1 - while True: - _add_call_item_to_queue(pending_work_items, - work_ids_queue, - call_queue) - - result_item = result_queue.get(block=True) - if result_item is not None: - work_item = pending_work_items[result_item.work_id] - del pending_work_items[result_item.work_id] - - if result_item.exception: - work_item.future.set_exception(result_item.exception) - else: - work_item.future.set_result(result_item.result) - # Check whether we should start shutting down. - executor = executor_reference() - # No more work items can be added if: - # - The interpreter is shutting down OR - # - The executor that owns this worker has been collected OR - # - The executor that owns this worker has been shutdown. - if _shutdown or executor is None or executor._shutdown_thread: - # Since no new work items can be added, it is safe to shutdown - # this thread if there are no pending work items. - if not pending_work_items: - while nb_shutdown_processes[0] < len(processes): - shutdown_one_process() - # If .join() is not called on the created processes then - # some multiprocessing.Queue methods may deadlock on Mac OS - # X. - for p in processes: - p.join() - call_queue.close() - return - del executor - -_system_limits_checked = False -_system_limited = None -def _check_system_limits(): - global _system_limits_checked, _system_limited - if _system_limits_checked: - if _system_limited: - raise NotImplementedError(_system_limited) - _system_limits_checked = True - try: - import os - nsems_max = os.sysconf("SC_SEM_NSEMS_MAX") - except (AttributeError, ValueError): - # sysconf not available or setting not available - return - if nsems_max == -1: - # indetermine limit, assume that limit is determined - # by available memory only - return - if nsems_max >= 256: - # minimum number of semaphores available - # according to POSIX - return - _system_limited = "system provides too few semaphores (%d available, 256 necessary)" % nsems_max - raise NotImplementedError(_system_limited) - -class ProcessPoolExecutor(_base.Executor): - def __init__(self, max_workers=None): - """Initializes a new ProcessPoolExecutor instance. - - Args: - max_workers: The maximum number of processes that can be used to - execute the given calls. If None or not given then as many - worker processes will be created as the machine has processors. - """ - _check_system_limits() - - if max_workers is None: - self._max_workers = multiprocessing.cpu_count() - else: - self._max_workers = max_workers - - # Make the call queue slightly larger than the number of processes to - # prevent the worker processes from idling. But don't make it too big - # because futures in the call queue cannot be cancelled. - self._call_queue = multiprocessing.Queue(self._max_workers + - EXTRA_QUEUED_CALLS) - self._result_queue = multiprocessing.Queue() - self._work_ids = queue.Queue() - self._queue_management_thread = None - self._processes = set() - - # Shutdown is a two-step process. - self._shutdown_thread = False - self._shutdown_lock = threading.Lock() - self._queue_count = 0 - self._pending_work_items = {} - - def _start_queue_management_thread(self): - # When the executor gets lost, the weakref callback will wake up - # the queue management thread. - def weakref_cb(_, q=self._result_queue): - q.put(None) - if self._queue_management_thread is None: - self._queue_management_thread = threading.Thread( - target=_queue_management_worker, - args=(weakref.ref(self, weakref_cb), - self._processes, - self._pending_work_items, - self._work_ids, - self._call_queue, - self._result_queue)) - self._queue_management_thread.daemon = True - self._queue_management_thread.start() - _threads_queues[self._queue_management_thread] = self._result_queue - - def _adjust_process_count(self): - for _ in range(len(self._processes), self._max_workers): - p = multiprocessing.Process( - target=_process_worker, - args=(self._call_queue, - self._result_queue)) - p.start() - self._processes.add(p) - - def submit(self, fn, *args, **kwargs): - with self._shutdown_lock: - if self._shutdown_thread: - raise RuntimeError('cannot schedule new futures after shutdown') - - f = _base.Future() - w = _WorkItem(f, fn, args, kwargs) - - self._pending_work_items[self._queue_count] = w - self._work_ids.put(self._queue_count) - self._queue_count += 1 - # Wake up queue management thread - self._result_queue.put(None) - - self._start_queue_management_thread() - self._adjust_process_count() - return f - submit.__doc__ = _base.Executor.submit.__doc__ - - def shutdown(self, wait=True): - with self._shutdown_lock: - self._shutdown_thread = True - if self._queue_management_thread: - # Wake up queue management thread - self._result_queue.put(None) - if wait: - self._queue_management_thread.join() - # To reduce the risk of openning too many files, remove references to - # objects that use file descriptors. - self._queue_management_thread = None - self._call_queue = None - self._result_queue = None - self._processes = None - shutdown.__doc__ = _base.Executor.shutdown.__doc__ - -atexit.register(_python_exit) diff --git a/lib/concurrent/futures/thread.py b/lib/concurrent/futures/thread.py deleted file mode 100644 index 930d1673..00000000 --- a/lib/concurrent/futures/thread.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright 2009 Brian Quinlan. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Implements ThreadPoolExecutor.""" - -from __future__ import with_statement -import atexit -import threading -import weakref -import sys - -from concurrent.futures import _base - -try: - import queue -except ImportError: - import Queue as queue - -__author__ = 'Brian Quinlan (brian@sweetapp.com)' - -# Workers are created as daemon threads. This is done to allow the interpreter -# to exit when there are still idle threads in a ThreadPoolExecutor's thread -# pool (i.e. shutdown() was not called). However, allowing workers to die with -# the interpreter has two undesirable properties: -# - The workers would still be running during interpretor shutdown, -# meaning that they would fail in unpredictable ways. -# - The workers could be killed while evaluating a work item, which could -# be bad if the callable being evaluated has external side-effects e.g. -# writing to a file. -# -# To work around this problem, an exit handler is installed which tells the -# workers to exit when their work queues are empty and then waits until the -# threads finish. - -_threads_queues = weakref.WeakKeyDictionary() -_shutdown = False - -def _python_exit(): - global _shutdown - _shutdown = True - items = list(_threads_queues.items()) - for t, q in items: - q.put(None) - for t, q in items: - t.join() - -atexit.register(_python_exit) - -class _WorkItem(object): - def __init__(self, future, fn, args, kwargs): - self.future = future - self.fn = fn - self.args = args - self.kwargs = kwargs - - def run(self): - if not self.future.set_running_or_notify_cancel(): - return - - try: - result = self.fn(*self.args, **self.kwargs) - except BaseException: - e, tb = sys.exc_info()[1:] - self.future.set_exception_info(e, tb) - else: - self.future.set_result(result) - -def _worker(executor_reference, work_queue): - try: - while True: - work_item = work_queue.get(block=True) - if work_item is not None: - work_item.run() - continue - executor = executor_reference() - # Exit if: - # - The interpreter is shutting down OR - # - The executor that owns the worker has been collected OR - # - The executor that owns the worker has been shutdown. - if _shutdown or executor is None or executor._shutdown: - # Notice other workers - work_queue.put(None) - return - del executor - except BaseException: - _base.LOGGER.critical('Exception in worker', exc_info=True) - -class ThreadPoolExecutor(_base.Executor): - def __init__(self, max_workers): - """Initializes a new ThreadPoolExecutor instance. - - Args: - max_workers: The maximum number of threads that can be used to - execute the given calls. - """ - self._max_workers = max_workers - self._work_queue = queue.Queue() - self._threads = set() - self._shutdown = False - self._shutdown_lock = threading.Lock() - - def submit(self, fn, *args, **kwargs): - with self._shutdown_lock: - if self._shutdown: - raise RuntimeError('cannot schedule new futures after shutdown') - - f = _base.Future() - w = _WorkItem(f, fn, args, kwargs) - - self._work_queue.put(w) - self._adjust_thread_count() - return f - submit.__doc__ = _base.Executor.submit.__doc__ - - def _adjust_thread_count(self): - # When the executor gets lost, the weakref callback will wake up - # the worker threads. - def weakref_cb(_, q=self._work_queue): - q.put(None) - # TODO(bquinlan): Should avoid creating new threads if there are more - # idle threads than items in the work queue. - if len(self._threads) < self._max_workers: - t = threading.Thread(target=_worker, - args=(weakref.ref(self, weakref_cb), - self._work_queue)) - t.daemon = True - t.start() - self._threads.add(t) - _threads_queues[t] = self._work_queue - - def shutdown(self, wait=True): - with self._shutdown_lock: - self._shutdown = True - self._work_queue.put(None) - if wait: - for t in self._threads: - t.join() - shutdown.__doc__ = _base.Executor.shutdown.__doc__ diff --git a/lib/configobj.py b/lib/configobj.py deleted file mode 100644 index eae556a7..00000000 --- a/lib/configobj.py +++ /dev/null @@ -1,2468 +0,0 @@ -# configobj.py -# A config file reader/writer that supports nested sections in config files. -# Copyright (C) 2005-2010 Michael Foord, Nicola Larosa -# E-mail: fuzzyman AT voidspace DOT org DOT uk -# nico AT tekNico DOT net - -# ConfigObj 4 -# http://www.voidspace.org.uk/python/configobj.html - -# Released subject to the BSD License -# Please see http://www.voidspace.org.uk/python/license.shtml - -# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml -# For information about bugfixes, updates and support, please join the -# ConfigObj mailing list: -# http://lists.sourceforge.net/lists/listinfo/configobj-develop -# Comments, suggestions and bug reports welcome. - -from __future__ import generators - -import os -import re -import sys - -from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF16_BE, BOM_UTF16_LE - - -# imported lazily to avoid startup performance hit if it isn't used -compiler = None - -# A dictionary mapping BOM to -# the encoding to decode with, and what to set the -# encoding attribute to. -BOMS = { - BOM_UTF8: ('utf_8', None), - BOM_UTF16_BE: ('utf16_be', 'utf_16'), - BOM_UTF16_LE: ('utf16_le', 'utf_16'), - BOM_UTF16: ('utf_16', 'utf_16'), - } -# All legal variants of the BOM codecs. -# TODO: the list of aliases is not meant to be exhaustive, is there a -# better way ? -BOM_LIST = { - 'utf_16': 'utf_16', - 'u16': 'utf_16', - 'utf16': 'utf_16', - 'utf-16': 'utf_16', - 'utf16_be': 'utf16_be', - 'utf_16_be': 'utf16_be', - 'utf-16be': 'utf16_be', - 'utf16_le': 'utf16_le', - 'utf_16_le': 'utf16_le', - 'utf-16le': 'utf16_le', - 'utf_8': 'utf_8', - 'u8': 'utf_8', - 'utf': 'utf_8', - 'utf8': 'utf_8', - 'utf-8': 'utf_8', - } - -# Map of encodings to the BOM to write. -BOM_SET = { - 'utf_8': BOM_UTF8, - 'utf_16': BOM_UTF16, - 'utf16_be': BOM_UTF16_BE, - 'utf16_le': BOM_UTF16_LE, - None: BOM_UTF8 - } - - -def match_utf8(encoding): - return BOM_LIST.get(encoding.lower()) == 'utf_8' - - -# Quote strings used for writing values -squot = "'%s'" -dquot = '"%s"' -noquot = "%s" -wspace_plus = ' \r\n\v\t\'"' -tsquot = '"""%s"""' -tdquot = "'''%s'''" - -# Sentinel for use in getattr calls to replace hasattr -MISSING = object() - -__version__ = '4.7.2' - -try: - any -except NameError: - def any(iterable): - for entry in iterable: - if entry: - return True - return False - - -__all__ = ( - '__version__', - 'DEFAULT_INDENT_TYPE', - 'DEFAULT_INTERPOLATION', - 'ConfigObjError', - 'NestingError', - 'ParseError', - 'DuplicateError', - 'ConfigspecError', - 'ConfigObj', - 'SimpleVal', - 'InterpolationError', - 'InterpolationLoopError', - 'MissingInterpolationOption', - 'RepeatSectionError', - 'ReloadError', - 'UnreprError', - 'UnknownType', - 'flatten_errors', - 'get_extra_values' -) - -DEFAULT_INTERPOLATION = 'configparser' -DEFAULT_INDENT_TYPE = ' ' -MAX_INTERPOL_DEPTH = 10 - -OPTION_DEFAULTS = { - 'interpolation': True, - 'raise_errors': False, - 'list_values': True, - 'create_empty': False, - 'file_error': False, - 'configspec': None, - 'stringify': True, - # option may be set to one of ('', ' ', '\t') - 'indent_type': None, - 'encoding': None, - 'default_encoding': None, - 'unrepr': False, - 'write_empty_values': False, -} - - - -def getObj(s): - global compiler - if compiler is None: - import compiler - s = "a=" + s - p = compiler.parse(s) - return p.getChildren()[1].getChildren()[0].getChildren()[1] - - -class UnknownType(Exception): - pass - - -class Builder(object): - - def build(self, o): - m = getattr(self, 'build_' + o.__class__.__name__, None) - if m is None: - raise UnknownType(o.__class__.__name__) - return m(o) - - def build_List(self, o): - return map(self.build, o.getChildren()) - - def build_Const(self, o): - return o.value - - def build_Dict(self, o): - d = {} - i = iter(map(self.build, o.getChildren())) - for el in i: - d[el] = i.next() - return d - - def build_Tuple(self, o): - return tuple(self.build_List(o)) - - def build_Name(self, o): - if o.name == 'None': - return None - if o.name == 'True': - return True - if o.name == 'False': - return False - - # An undefined Name - raise UnknownType('Undefined Name') - - def build_Add(self, o): - real, imag = map(self.build_Const, o.getChildren()) - try: - real = float(real) - except TypeError: - raise UnknownType('Add') - if not isinstance(imag, complex) or imag.real != 0.0: - raise UnknownType('Add') - return real+imag - - def build_Getattr(self, o): - parent = self.build(o.expr) - return getattr(parent, o.attrname) - - def build_UnarySub(self, o): - return -self.build_Const(o.getChildren()[0]) - - def build_UnaryAdd(self, o): - return self.build_Const(o.getChildren()[0]) - - -_builder = Builder() - - -def unrepr(s): - if not s: - return s - return _builder.build(getObj(s)) - - - -class ConfigObjError(SyntaxError): - """ - This is the base class for all errors that ConfigObj raises. - It is a subclass of SyntaxError. - """ - def __init__(self, message='', line_number=None, line=''): - self.line = line - self.line_number = line_number - SyntaxError.__init__(self, message) - - -class NestingError(ConfigObjError): - """ - This error indicates a level of nesting that doesn't match. - """ - - -class ParseError(ConfigObjError): - """ - This error indicates that a line is badly written. - It is neither a valid ``key = value`` line, - nor a valid section marker line. - """ - - -class ReloadError(IOError): - """ - A 'reload' operation failed. - This exception is a subclass of ``IOError``. - """ - def __init__(self): - IOError.__init__(self, 'reload failed, filename is not set.') - - -class DuplicateError(ConfigObjError): - """ - The keyword or section specified already exists. - """ - - -class ConfigspecError(ConfigObjError): - """ - An error occured whilst parsing a configspec. - """ - - -class InterpolationError(ConfigObjError): - """Base class for the two interpolation errors.""" - - -class InterpolationLoopError(InterpolationError): - """Maximum interpolation depth exceeded in string interpolation.""" - - def __init__(self, option): - InterpolationError.__init__( - self, - 'interpolation loop detected in value "%s".' % option) - - -class RepeatSectionError(ConfigObjError): - """ - This error indicates additional sections in a section with a - ``__many__`` (repeated) section. - """ - - -class MissingInterpolationOption(InterpolationError): - """A value specified for interpolation was missing.""" - def __init__(self, option): - msg = 'missing option "%s" in interpolation.' % option - InterpolationError.__init__(self, msg) - - -class UnreprError(ConfigObjError): - """An error parsing in unrepr mode.""" - - - -class InterpolationEngine(object): - """ - A helper class to help perform string interpolation. - - This class is an abstract base class; its descendants perform - the actual work. - """ - - # compiled regexp to use in self.interpolate() - _KEYCRE = re.compile(r"%\(([^)]*)\)s") - _cookie = '%' - - def __init__(self, section): - # the Section instance that "owns" this engine - self.section = section - - - def interpolate(self, key, value): - # short-cut - if not self._cookie in value: - return value - - def recursive_interpolate(key, value, section, backtrail): - """The function that does the actual work. - - ``value``: the string we're trying to interpolate. - ``section``: the section in which that string was found - ``backtrail``: a dict to keep track of where we've been, - to detect and prevent infinite recursion loops - - This is similar to a depth-first-search algorithm. - """ - # Have we been here already? - if (key, section.name) in backtrail: - # Yes - infinite loop detected - raise InterpolationLoopError(key) - # Place a marker on our backtrail so we won't come back here again - backtrail[(key, section.name)] = 1 - - # Now start the actual work - match = self._KEYCRE.search(value) - while match: - # The actual parsing of the match is implementation-dependent, - # so delegate to our helper function - k, v, s = self._parse_match(match) - if k is None: - # That's the signal that no further interpolation is needed - replacement = v - else: - # Further interpolation may be needed to obtain final value - replacement = recursive_interpolate(k, v, s, backtrail) - # Replace the matched string with its final value - start, end = match.span() - value = ''.join((value[:start], replacement, value[end:])) - new_search_start = start + len(replacement) - # Pick up the next interpolation key, if any, for next time - # through the while loop - match = self._KEYCRE.search(value, new_search_start) - - # Now safe to come back here again; remove marker from backtrail - del backtrail[(key, section.name)] - - return value - - # Back in interpolate(), all we have to do is kick off the recursive - # function with appropriate starting values - value = recursive_interpolate(key, value, self.section, {}) - return value - - - def _fetch(self, key): - """Helper function to fetch values from owning section. - - Returns a 2-tuple: the value, and the section where it was found. - """ - # switch off interpolation before we try and fetch anything ! - save_interp = self.section.main.interpolation - self.section.main.interpolation = False - - # Start at section that "owns" this InterpolationEngine - current_section = self.section - while True: - # try the current section first - val = current_section.get(key) - if val is not None and not isinstance(val, Section): - break - # try "DEFAULT" next - val = current_section.get('DEFAULT', {}).get(key) - if val is not None and not isinstance(val, Section): - break - # move up to parent and try again - # top-level's parent is itself - if current_section.parent is current_section: - # reached top level, time to give up - break - current_section = current_section.parent - - # restore interpolation to previous value before returning - self.section.main.interpolation = save_interp - if val is None: - raise MissingInterpolationOption(key) - return val, current_section - - - def _parse_match(self, match): - """Implementation-dependent helper function. - - Will be passed a match object corresponding to the interpolation - key we just found (e.g., "%(foo)s" or "$foo"). Should look up that - key in the appropriate config file section (using the ``_fetch()`` - helper function) and return a 3-tuple: (key, value, section) - - ``key`` is the name of the key we're looking for - ``value`` is the value found for that key - ``section`` is a reference to the section where it was found - - ``key`` and ``section`` should be None if no further - interpolation should be performed on the resulting value - (e.g., if we interpolated "$$" and returned "$"). - """ - raise NotImplementedError() - - - -class ConfigParserInterpolation(InterpolationEngine): - """Behaves like ConfigParser.""" - _cookie = '%' - _KEYCRE = re.compile(r"%\(([^)]*)\)s") - - def _parse_match(self, match): - key = match.group(1) - value, section = self._fetch(key) - return key, value, section - - - -class TemplateInterpolation(InterpolationEngine): - """Behaves like string.Template.""" - _cookie = '$' - _delimiter = '$' - _KEYCRE = re.compile(r""" - \$(?: - (?P\$) | # Two $ signs - (?P[_a-z][_a-z0-9]*) | # $name format - {(?P[^}]*)} # ${name} format - ) - """, re.IGNORECASE | re.VERBOSE) - - def _parse_match(self, match): - # Valid name (in or out of braces): fetch value from section - key = match.group('named') or match.group('braced') - if key is not None: - value, section = self._fetch(key) - return key, value, section - # Escaped delimiter (e.g., $$): return single delimiter - if match.group('escaped') is not None: - # Return None for key and section to indicate it's time to stop - return None, self._delimiter, None - # Anything else: ignore completely, just return it unchanged - return None, match.group(), None - - -interpolation_engines = { - 'configparser': ConfigParserInterpolation, - 'template': TemplateInterpolation, -} - - -def __newobj__(cls, *args): - # Hack for pickle - return cls.__new__(cls, *args) - -class Section(dict): - """ - A dictionary-like object that represents a section in a config file. - - It does string interpolation if the 'interpolation' attribute - of the 'main' object is set to True. - - Interpolation is tried first from this object, then from the 'DEFAULT' - section of this object, next from the parent and its 'DEFAULT' section, - and so on until the main object is reached. - - A Section will behave like an ordered dictionary - following the - order of the ``scalars`` and ``sections`` attributes. - You can use this to change the order of members. - - Iteration follows the order: scalars, then sections. - """ - - - def __setstate__(self, state): - dict.update(self, state[0]) - self.__dict__.update(state[1]) - - def __reduce__(self): - state = (dict(self), self.__dict__) - return (__newobj__, (self.__class__,), state) - - - def __init__(self, parent, depth, main, indict=None, name=None): - """ - * parent is the section above - * depth is the depth level of this section - * main is the main ConfigObj - * indict is a dictionary to initialise the section with - """ - if indict is None: - indict = {} - dict.__init__(self) - # used for nesting level *and* interpolation - self.parent = parent - # used for the interpolation attribute - self.main = main - # level of nesting depth of this Section - self.depth = depth - # purely for information - self.name = name - # - self._initialise() - # we do this explicitly so that __setitem__ is used properly - # (rather than just passing to ``dict.__init__``) - for entry, value in indict.iteritems(): - self[entry] = value - - - def _initialise(self): - # the sequence of scalar values in this Section - self.scalars = [] - # the sequence of sections in this Section - self.sections = [] - # for comments :-) - self.comments = {} - self.inline_comments = {} - # the configspec - self.configspec = None - # for defaults - self.defaults = [] - self.default_values = {} - self.extra_values = [] - self._created = False - - - def _interpolate(self, key, value): - try: - # do we already have an interpolation engine? - engine = self._interpolation_engine - except AttributeError: - # not yet: first time running _interpolate(), so pick the engine - name = self.main.interpolation - if name == True: # note that "if name:" would be incorrect here - # backwards-compatibility: interpolation=True means use default - name = DEFAULT_INTERPOLATION - name = name.lower() # so that "Template", "template", etc. all work - class_ = interpolation_engines.get(name, None) - if class_ is None: - # invalid value for self.main.interpolation - self.main.interpolation = False - return value - else: - # save reference to engine so we don't have to do this again - engine = self._interpolation_engine = class_(self) - # let the engine do the actual work - return engine.interpolate(key, value) - - - def __getitem__(self, key): - """Fetch the item and do string interpolation.""" - val = dict.__getitem__(self, key) - if self.main.interpolation: - if isinstance(val, basestring): - return self._interpolate(key, val) - if isinstance(val, list): - def _check(entry): - if isinstance(entry, basestring): - return self._interpolate(key, entry) - return entry - new = [_check(entry) for entry in val] - if new != val: - return new - return val - - - def __setitem__(self, key, value, unrepr=False): - """ - Correctly set a value. - - Making dictionary values Section instances. - (We have to special case 'Section' instances - which are also dicts) - - Keys must be strings. - Values need only be strings (or lists of strings) if - ``main.stringify`` is set. - - ``unrepr`` must be set when setting a value to a dictionary, without - creating a new sub-section. - """ - if not isinstance(key, basestring): - raise ValueError('The key "%s" is not a string.' % key) - - # add the comment - if key not in self.comments: - self.comments[key] = [] - self.inline_comments[key] = '' - # remove the entry from defaults - if key in self.defaults: - self.defaults.remove(key) - # - if isinstance(value, Section): - if key not in self: - self.sections.append(key) - dict.__setitem__(self, key, value) - elif isinstance(value, dict) and not unrepr: - # First create the new depth level, - # then create the section - if key not in self: - self.sections.append(key) - new_depth = self.depth + 1 - dict.__setitem__( - self, - key, - Section( - self, - new_depth, - self.main, - indict=value, - name=key)) - else: - if key not in self: - self.scalars.append(key) - if not self.main.stringify: - if isinstance(value, basestring): - pass - elif isinstance(value, (list, tuple)): - for entry in value: - if not isinstance(entry, basestring): - raise TypeError('Value is not a string "%s".' % entry) - else: - raise TypeError('Value is not a string "%s".' % value) - dict.__setitem__(self, key, value) - - - def __delitem__(self, key): - """Remove items from the sequence when deleting.""" - dict. __delitem__(self, key) - if key in self.scalars: - self.scalars.remove(key) - else: - self.sections.remove(key) - del self.comments[key] - del self.inline_comments[key] - - - def get(self, key, default=None): - """A version of ``get`` that doesn't bypass string interpolation.""" - try: - return self[key] - except KeyError: - return default - - - def update(self, indict): - """ - A version of update that uses our ``__setitem__``. - """ - for entry in indict: - self[entry] = indict[entry] - - - def pop(self, key, default=MISSING): - """ - 'D.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised' - """ - try: - val = self[key] - except KeyError: - if default is MISSING: - raise - val = default - else: - del self[key] - return val - - - def popitem(self): - """Pops the first (key,val)""" - sequence = (self.scalars + self.sections) - if not sequence: - raise KeyError(": 'popitem(): dictionary is empty'") - key = sequence[0] - val = self[key] - del self[key] - return key, val - - - def clear(self): - """ - A version of clear that also affects scalars/sections - Also clears comments and configspec. - - Leaves other attributes alone : - depth/main/parent are not affected - """ - dict.clear(self) - self.scalars = [] - self.sections = [] - self.comments = {} - self.inline_comments = {} - self.configspec = None - self.defaults = [] - self.extra_values = [] - - - def setdefault(self, key, default=None): - """A version of setdefault that sets sequence if appropriate.""" - try: - return self[key] - except KeyError: - self[key] = default - return self[key] - - - def items(self): - """D.items() -> list of D's (key, value) pairs, as 2-tuples""" - return zip((self.scalars + self.sections), self.values()) - - - def keys(self): - """D.keys() -> list of D's keys""" - return (self.scalars + self.sections) - - - def values(self): - """D.values() -> list of D's values""" - return [self[key] for key in (self.scalars + self.sections)] - - - def iteritems(self): - """D.iteritems() -> an iterator over the (key, value) items of D""" - return iter(self.items()) - - - def iterkeys(self): - """D.iterkeys() -> an iterator over the keys of D""" - return iter((self.scalars + self.sections)) - - __iter__ = iterkeys - - - def itervalues(self): - """D.itervalues() -> an iterator over the values of D""" - return iter(self.values()) - - - def __repr__(self): - """x.__repr__() <==> repr(x)""" - def _getval(key): - try: - return self[key] - except MissingInterpolationOption: - return dict.__getitem__(self, key) - return '{%s}' % ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) - for key in (self.scalars + self.sections)]) - - __str__ = __repr__ - __str__.__doc__ = "x.__str__() <==> str(x)" - - - # Extra methods - not in a normal dictionary - - def dict(self): - """ - Return a deepcopy of self as a dictionary. - - All members that are ``Section`` instances are recursively turned to - ordinary dictionaries - by calling their ``dict`` method. - - >>> n = a.dict() - >>> n == a - 1 - >>> n is a - 0 - """ - newdict = {} - for entry in self: - this_entry = self[entry] - if isinstance(this_entry, Section): - this_entry = this_entry.dict() - elif isinstance(this_entry, list): - # create a copy rather than a reference - this_entry = list(this_entry) - elif isinstance(this_entry, tuple): - # create a copy rather than a reference - this_entry = tuple(this_entry) - newdict[entry] = this_entry - return newdict - - - def merge(self, indict): - """ - A recursive update - useful for merging config files. - - >>> a = '''[section1] - ... option1 = True - ... [[subsection]] - ... more_options = False - ... # end of file'''.splitlines() - >>> b = '''# File is user.ini - ... [section1] - ... option1 = False - ... # end of file'''.splitlines() - >>> c1 = ConfigObj(b) - >>> c2 = ConfigObj(a) - >>> c2.merge(c1) - >>> c2 - ConfigObj({'section1': {'option1': 'False', 'subsection': {'more_options': 'False'}}}) - """ - for key, val in indict.items(): - if (key in self and isinstance(self[key], dict) and - isinstance(val, dict)): - self[key].merge(val) - else: - self[key] = val - - - def rename(self, oldkey, newkey): - """ - Change a keyname to another, without changing position in sequence. - - Implemented so that transformations can be made on keys, - as well as on values. (used by encode and decode) - - Also renames comments. - """ - if oldkey in self.scalars: - the_list = self.scalars - elif oldkey in self.sections: - the_list = self.sections - else: - raise KeyError('Key "%s" not found.' % oldkey) - pos = the_list.index(oldkey) - # - val = self[oldkey] - dict.__delitem__(self, oldkey) - dict.__setitem__(self, newkey, val) - the_list.remove(oldkey) - the_list.insert(pos, newkey) - comm = self.comments[oldkey] - inline_comment = self.inline_comments[oldkey] - del self.comments[oldkey] - del self.inline_comments[oldkey] - self.comments[newkey] = comm - self.inline_comments[newkey] = inline_comment - - - def walk(self, function, raise_errors=True, - call_on_sections=False, **keywargs): - """ - Walk every member and call a function on the keyword and value. - - Return a dictionary of the return values - - If the function raises an exception, raise the errror - unless ``raise_errors=False``, in which case set the return value to - ``False``. - - Any unrecognised keyword arguments you pass to walk, will be pased on - to the function you pass in. - - Note: if ``call_on_sections`` is ``True`` then - on encountering a - subsection, *first* the function is called for the *whole* subsection, - and then recurses into it's members. This means your function must be - able to handle strings, dictionaries and lists. This allows you - to change the key of subsections as well as for ordinary members. The - return value when called on the whole subsection has to be discarded. - - See the encode and decode methods for examples, including functions. - - .. admonition:: caution - - You can use ``walk`` to transform the names of members of a section - but you mustn't add or delete members. - - >>> config = '''[XXXXsection] - ... XXXXkey = XXXXvalue'''.splitlines() - >>> cfg = ConfigObj(config) - >>> cfg - ConfigObj({'XXXXsection': {'XXXXkey': 'XXXXvalue'}}) - >>> def transform(section, key): - ... val = section[key] - ... newkey = key.replace('XXXX', 'CLIENT1') - ... section.rename(key, newkey) - ... if isinstance(val, (tuple, list, dict)): - ... pass - ... else: - ... val = val.replace('XXXX', 'CLIENT1') - ... section[newkey] = val - >>> cfg.walk(transform, call_on_sections=True) - {'CLIENT1section': {'CLIENT1key': None}} - >>> cfg - ConfigObj({'CLIENT1section': {'CLIENT1key': 'CLIENT1value'}}) - """ - out = {} - # scalars first - for i in range(len(self.scalars)): - entry = self.scalars[i] - try: - val = function(self, entry, **keywargs) - # bound again in case name has changed - entry = self.scalars[i] - out[entry] = val - except Exception: - if raise_errors: - raise - else: - entry = self.scalars[i] - out[entry] = False - # then sections - for i in range(len(self.sections)): - entry = self.sections[i] - if call_on_sections: - try: - function(self, entry, **keywargs) - except Exception: - if raise_errors: - raise - else: - entry = self.sections[i] - out[entry] = False - # bound again in case name has changed - entry = self.sections[i] - # previous result is discarded - out[entry] = self[entry].walk( - function, - raise_errors=raise_errors, - call_on_sections=call_on_sections, - **keywargs) - return out - - - def as_bool(self, key): - """ - Accepts a key as input. The corresponding value must be a string or - the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to - retain compatibility with Python 2.2. - - If the string is one of ``True``, ``On``, ``Yes``, or ``1`` it returns - ``True``. - - If the string is one of ``False``, ``Off``, ``No``, or ``0`` it returns - ``False``. - - ``as_bool`` is not case sensitive. - - Any other input will raise a ``ValueError``. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_bool('a') - Traceback (most recent call last): - ValueError: Value "fish" is neither True nor False - >>> a['b'] = 'True' - >>> a.as_bool('b') - 1 - >>> a['b'] = 'off' - >>> a.as_bool('b') - 0 - """ - val = self[key] - if val == True: - return True - elif val == False: - return False - else: - try: - if not isinstance(val, basestring): - # TODO: Why do we raise a KeyError here? - raise KeyError() - else: - return self.main._bools[val.lower()] - except KeyError: - raise ValueError('Value "%s" is neither True nor False' % val) - - - def as_int(self, key): - """ - A convenience method which coerces the specified value to an integer. - - If the value is an invalid literal for ``int``, a ``ValueError`` will - be raised. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_int('a') - Traceback (most recent call last): - ValueError: invalid literal for int() with base 10: 'fish' - >>> a['b'] = '1' - >>> a.as_int('b') - 1 - >>> a['b'] = '3.2' - >>> a.as_int('b') - Traceback (most recent call last): - ValueError: invalid literal for int() with base 10: '3.2' - """ - return int(self[key]) - - - def as_float(self, key): - """ - A convenience method which coerces the specified value to a float. - - If the value is an invalid literal for ``float``, a ``ValueError`` will - be raised. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_float('a') - Traceback (most recent call last): - ValueError: invalid literal for float(): fish - >>> a['b'] = '1' - >>> a.as_float('b') - 1.0 - >>> a['b'] = '3.2' - >>> a.as_float('b') - 3.2000000000000002 - """ - return float(self[key]) - - - def as_list(self, key): - """ - A convenience method which fetches the specified value, guaranteeing - that it is a list. - - >>> a = ConfigObj() - >>> a['a'] = 1 - >>> a.as_list('a') - [1] - >>> a['a'] = (1,) - >>> a.as_list('a') - [1] - >>> a['a'] = [1] - >>> a.as_list('a') - [1] - """ - result = self[key] - if isinstance(result, (tuple, list)): - return list(result) - return [result] - - - def restore_default(self, key): - """ - Restore (and return) default value for the specified key. - - This method will only work for a ConfigObj that was created - with a configspec and has been validated. - - If there is no default value for this key, ``KeyError`` is raised. - """ - default = self.default_values[key] - dict.__setitem__(self, key, default) - if key not in self.defaults: - self.defaults.append(key) - return default - - - def restore_defaults(self): - """ - Recursively restore default values to all members - that have them. - - This method will only work for a ConfigObj that was created - with a configspec and has been validated. - - It doesn't delete or modify entries without default values. - """ - for key in self.default_values: - self.restore_default(key) - - for section in self.sections: - self[section].restore_defaults() - - -class ConfigObj(Section): - """An object to read, create, and write config files.""" - - _keyword = re.compile(r'''^ # line start - (\s*) # indentation - ( # keyword - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'"=].*?) # no quotes - ) - \s*=\s* # divider - (.*) # value (including list values and comments) - $ # line end - ''', - re.VERBOSE) - - _sectionmarker = re.compile(r'''^ - (\s*) # 1: indentation - ((?:\[\s*)+) # 2: section marker open - ( # 3: section name open - (?:"\s*\S.*?\s*")| # at least one non-space with double quotes - (?:'\s*\S.*?\s*')| # at least one non-space with single quotes - (?:[^'"\s].*?) # at least one non-space unquoted - ) # section name close - ((?:\s*\])+) # 4: section marker close - \s*(\#.*)? # 5: optional comment - $''', - re.VERBOSE) - - # this regexp pulls list values out as a single string - # or single values and comments - # FIXME: this regex adds a '' to the end of comma terminated lists - # workaround in ``_handle_value`` - _valueexp = re.compile(r'''^ - (?: - (?: - ( - (?: - (?: - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'",\#][^,\#]*?) # unquoted - ) - \s*,\s* # comma - )* # match all list items ending in a comma (if any) - ) - ( - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'",\#\s][^,]*?)| # unquoted - (?:(? 1: - msg = "Parsing failed with several errors.\nFirst error %s" % info - error = ConfigObjError(msg) - else: - error = self._errors[0] - # set the errors attribute; it's a list of tuples: - # (error_type, message, line_number) - error.errors = self._errors - # set the config attribute - error.config = self - raise error - # delete private attributes - del self._errors - - if configspec is None: - self.configspec = None - else: - self._handle_configspec(configspec) - - - def _initialise(self, options=None): - if options is None: - options = OPTION_DEFAULTS - - # initialise a few variables - self.filename = None - self._errors = [] - self.raise_errors = options['raise_errors'] - self.interpolation = options['interpolation'] - self.list_values = options['list_values'] - self.create_empty = options['create_empty'] - self.file_error = options['file_error'] - self.stringify = options['stringify'] - self.indent_type = options['indent_type'] - self.encoding = options['encoding'] - self.default_encoding = options['default_encoding'] - self.BOM = False - self.newlines = None - self.write_empty_values = options['write_empty_values'] - self.unrepr = options['unrepr'] - - self.initial_comment = [] - self.final_comment = [] - self.configspec = None - - if self._inspec: - self.list_values = False - - # Clear section attributes as well - Section._initialise(self) - - - def __repr__(self): - def _getval(key): - try: - return self[key] - except MissingInterpolationOption: - return dict.__getitem__(self, key) - return ('ConfigObj({%s})' % - ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) - for key in (self.scalars + self.sections)])) - - - def _handle_bom(self, infile): - """ - Handle any BOM, and decode if necessary. - - If an encoding is specified, that *must* be used - but the BOM should - still be removed (and the BOM attribute set). - - (If the encoding is wrongly specified, then a BOM for an alternative - encoding won't be discovered or removed.) - - If an encoding is not specified, UTF8 or UTF16 BOM will be detected and - removed. The BOM attribute will be set. UTF16 will be decoded to - unicode. - - NOTE: This method must not be called with an empty ``infile``. - - Specifying the *wrong* encoding is likely to cause a - ``UnicodeDecodeError``. - - ``infile`` must always be returned as a list of lines, but may be - passed in as a single string. - """ - if ((self.encoding is not None) and - (self.encoding.lower() not in BOM_LIST)): - # No need to check for a BOM - # the encoding specified doesn't have one - # just decode - return self._decode(infile, self.encoding) - - if isinstance(infile, (list, tuple)): - line = infile[0] - else: - line = infile - if self.encoding is not None: - # encoding explicitly supplied - # And it could have an associated BOM - # TODO: if encoding is just UTF16 - we ought to check for both - # TODO: big endian and little endian versions. - enc = BOM_LIST[self.encoding.lower()] - if enc == 'utf_16': - # For UTF16 we try big endian and little endian - for BOM, (encoding, final_encoding) in BOMS.items(): - if not final_encoding: - # skip UTF8 - continue - if infile.startswith(BOM): - ### BOM discovered - ##self.BOM = True - # Don't need to remove BOM - return self._decode(infile, encoding) - - # If we get this far, will *probably* raise a DecodeError - # As it doesn't appear to start with a BOM - return self._decode(infile, self.encoding) - - # Must be UTF8 - BOM = BOM_SET[enc] - if not line.startswith(BOM): - return self._decode(infile, self.encoding) - - newline = line[len(BOM):] - - # BOM removed - if isinstance(infile, (list, tuple)): - infile[0] = newline - else: - infile = newline - self.BOM = True - return self._decode(infile, self.encoding) - - # No encoding specified - so we need to check for UTF8/UTF16 - for BOM, (encoding, final_encoding) in BOMS.items(): - if not line.startswith(BOM): - continue - else: - # BOM discovered - self.encoding = final_encoding - if not final_encoding: - self.BOM = True - # UTF8 - # remove BOM - newline = line[len(BOM):] - if isinstance(infile, (list, tuple)): - infile[0] = newline - else: - infile = newline - # UTF8 - don't decode - if isinstance(infile, basestring): - return infile.splitlines(True) - else: - return infile - # UTF16 - have to decode - return self._decode(infile, encoding) - - # No BOM discovered and no encoding specified, just return - if isinstance(infile, basestring): - # infile read from a file will be a single string - return infile.splitlines(True) - return infile - - - def _a_to_u(self, aString): - """Decode ASCII strings to unicode if a self.encoding is specified.""" - if self.encoding: - return aString.decode('ascii') - else: - return aString - - - def _decode(self, infile, encoding): - """ - Decode infile to unicode. Using the specified encoding. - - if is a string, it also needs converting to a list. - """ - if isinstance(infile, basestring): - # can't be unicode - # NOTE: Could raise a ``UnicodeDecodeError`` - return infile.decode(encoding).splitlines(True) - for i, line in enumerate(infile): - if not isinstance(line, unicode): - # NOTE: The isinstance test here handles mixed lists of unicode/string - # NOTE: But the decode will break on any non-string values - # NOTE: Or could raise a ``UnicodeDecodeError`` - infile[i] = line.decode(encoding) - return infile - - - def _decode_element(self, line): - """Decode element to unicode if necessary.""" - if not self.encoding: - return line - if isinstance(line, str) and self.default_encoding: - return line.decode(self.default_encoding) - return line - - - def _str(self, value): - """ - Used by ``stringify`` within validate, to turn non-string values - into strings. - """ - if not isinstance(value, basestring): - return str(value) - else: - return value - - - def _parse(self, infile): - """Actually parse the config file.""" - temp_list_values = self.list_values - if self.unrepr: - self.list_values = False - - comment_list = [] - done_start = False - this_section = self - maxline = len(infile) - 1 - cur_index = -1 - reset_comment = False - - while cur_index < maxline: - if reset_comment: - comment_list = [] - cur_index += 1 - line = infile[cur_index] - sline = line.strip() - # do we have anything on the line ? - if not sline or sline.startswith('#'): - reset_comment = False - comment_list.append(line) - continue - - if not done_start: - # preserve initial comment - self.initial_comment = comment_list - comment_list = [] - done_start = True - - reset_comment = True - # first we check if it's a section marker - mat = self._sectionmarker.match(line) - if mat is not None: - # is a section line - (indent, sect_open, sect_name, sect_close, comment) = mat.groups() - if indent and (self.indent_type is None): - self.indent_type = indent - cur_depth = sect_open.count('[') - if cur_depth != sect_close.count(']'): - self._handle_error("Cannot compute the section depth at line %s.", - NestingError, infile, cur_index) - continue - - if cur_depth < this_section.depth: - # the new section is dropping back to a previous level - try: - parent = self._match_depth(this_section, - cur_depth).parent - except SyntaxError: - self._handle_error("Cannot compute nesting level at line %s.", - NestingError, infile, cur_index) - continue - elif cur_depth == this_section.depth: - # the new section is a sibling of the current section - parent = this_section.parent - elif cur_depth == this_section.depth + 1: - # the new section is a child the current section - parent = this_section - else: - self._handle_error("Section too nested at line %s.", - NestingError, infile, cur_index) - - sect_name = self._unquote(sect_name) - if sect_name in parent: - self._handle_error('Duplicate section name at line %s.', - DuplicateError, infile, cur_index) - continue - - # create the new section - this_section = Section( - parent, - cur_depth, - self, - name=sect_name) - parent[sect_name] = this_section - parent.inline_comments[sect_name] = comment - parent.comments[sect_name] = comment_list - continue - # - # it's not a section marker, - # so it should be a valid ``key = value`` line - mat = self._keyword.match(line) - if mat is None: - # it neither matched as a keyword - # or a section marker - self._handle_error( - 'Invalid line at line "%s".', - ParseError, infile, cur_index) - else: - # is a keyword value - # value will include any inline comment - (indent, key, value) = mat.groups() - if indent and (self.indent_type is None): - self.indent_type = indent - # check for a multiline value - if value[:3] in ['"""', "'''"]: - try: - value, comment, cur_index = self._multiline( - value, infile, cur_index, maxline) - except SyntaxError: - self._handle_error( - 'Parse error in value at line %s.', - ParseError, infile, cur_index) - continue - else: - if self.unrepr: - comment = '' - try: - value = unrepr(value) - except Exception, e: - if type(e) == UnknownType: - msg = 'Unknown name or type in value at line %s.' - else: - msg = 'Parse error in value at line %s.' - self._handle_error(msg, UnreprError, infile, - cur_index) - continue - else: - if self.unrepr: - comment = '' - try: - value = unrepr(value) - except Exception, e: - if isinstance(e, UnknownType): - msg = 'Unknown name or type in value at line %s.' - else: - msg = 'Parse error in value at line %s.' - self._handle_error(msg, UnreprError, infile, - cur_index) - continue - else: - # extract comment and lists - try: - (value, comment) = self._handle_value(value) - except SyntaxError: - self._handle_error( - 'Parse error in value at line %s.', - ParseError, infile, cur_index) - continue - # - key = self._unquote(key) - if key in this_section: - self._handle_error( - 'Duplicate keyword name at line %s.', - DuplicateError, infile, cur_index) - continue - # add the key. - # we set unrepr because if we have got this far we will never - # be creating a new section - this_section.__setitem__(key, value, unrepr=True) - this_section.inline_comments[key] = comment - this_section.comments[key] = comment_list - continue - # - if self.indent_type is None: - # no indentation used, set the type accordingly - self.indent_type = '' - - # preserve the final comment - if not self and not self.initial_comment: - self.initial_comment = comment_list - elif not reset_comment: - self.final_comment = comment_list - self.list_values = temp_list_values - - - def _match_depth(self, sect, depth): - """ - Given a section and a depth level, walk back through the sections - parents to see if the depth level matches a previous section. - - Return a reference to the right section, - or raise a SyntaxError. - """ - while depth < sect.depth: - if sect is sect.parent: - # we've reached the top level already - raise SyntaxError() - sect = sect.parent - if sect.depth == depth: - return sect - # shouldn't get here - raise SyntaxError() - - - def _handle_error(self, text, ErrorClass, infile, cur_index): - """ - Handle an error according to the error settings. - - Either raise the error or store it. - The error will have occured at ``cur_index`` - """ - line = infile[cur_index] - cur_index += 1 - message = text % cur_index - error = ErrorClass(message, cur_index, line) - if self.raise_errors: - # raise the error - parsing stops here - raise error - # store the error - # reraise when parsing has finished - self._errors.append(error) - - - def _unquote(self, value): - """Return an unquoted version of a value""" - if not value: - # should only happen during parsing of lists - raise SyntaxError - if (value[0] == value[-1]) and (value[0] in ('"', "'")): - value = value[1:-1] - return value - - - def _quote(self, value, multiline=True): - """ - Return a safely quoted version of a value. - - Raise a ConfigObjError if the value cannot be safely quoted. - If multiline is ``True`` (default) then use triple quotes - if necessary. - - * Don't quote values that don't need it. - * Recursively quote members of a list and return a comma joined list. - * Multiline is ``False`` for lists. - * Obey list syntax for empty and single member lists. - - If ``list_values=False`` then the value is only quoted if it contains - a ``\\n`` (is multiline) or '#'. - - If ``write_empty_values`` is set, and the value is an empty string, it - won't be quoted. - """ - if multiline and self.write_empty_values and value == '': - # Only if multiline is set, so that it is used for values not - # keys, and not values that are part of a list - return '' - - if multiline and isinstance(value, (list, tuple)): - if not value: - return ',' - elif len(value) == 1: - return self._quote(value[0], multiline=False) + ',' - return ', '.join([self._quote(val, multiline=False) - for val in value]) - if not isinstance(value, basestring): - if self.stringify: - value = str(value) - else: - raise TypeError('Value "%s" is not a string.' % value) - - if not value: - return '""' - - no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value - need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value )) - hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value) - check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote - - if check_for_single: - if not self.list_values: - # we don't quote if ``list_values=False`` - quot = noquot - # for normal values either single or double quotes will do - elif '\n' in value: - # will only happen if multiline is off - e.g. '\n' in key - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - elif ((value[0] not in wspace_plus) and - (value[-1] not in wspace_plus) and - (',' not in value)): - quot = noquot - else: - quot = self._get_single_quote(value) - else: - # if value has '\n' or "'" *and* '"', it will need triple quotes - quot = self._get_triple_quote(value) - - if quot == noquot and '#' in value and self.list_values: - quot = self._get_single_quote(value) - - return quot % value - - - def _get_single_quote(self, value): - if ("'" in value) and ('"' in value): - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - elif '"' in value: - quot = squot - else: - quot = dquot - return quot - - - def _get_triple_quote(self, value): - if (value.find('"""') != -1) and (value.find("'''") != -1): - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - if value.find('"""') == -1: - quot = tdquot - else: - quot = tsquot - return quot - - - def _handle_value(self, value): - """ - Given a value string, unquote, remove comment, - handle lists. (including empty and single member lists) - """ - if self._inspec: - # Parsing a configspec so don't handle comments - return (value, '') - # do we look for lists in values ? - if not self.list_values: - mat = self._nolistvalue.match(value) - if mat is None: - raise SyntaxError() - # NOTE: we don't unquote here - return mat.groups() - # - mat = self._valueexp.match(value) - if mat is None: - # the value is badly constructed, probably badly quoted, - # or an invalid list - raise SyntaxError() - (list_values, single, empty_list, comment) = mat.groups() - if (list_values == '') and (single is None): - # change this if you want to accept empty values - raise SyntaxError() - # NOTE: note there is no error handling from here if the regex - # is wrong: then incorrect values will slip through - if empty_list is not None: - # the single comma - meaning an empty list - return ([], comment) - if single is not None: - # handle empty values - if list_values and not single: - # FIXME: the '' is a workaround because our regex now matches - # '' at the end of a list if it has a trailing comma - single = None - else: - single = single or '""' - single = self._unquote(single) - if list_values == '': - # not a list value - return (single, comment) - the_list = self._listvalueexp.findall(list_values) - the_list = [self._unquote(val) for val in the_list] - if single is not None: - the_list += [single] - return (the_list, comment) - - - def _multiline(self, value, infile, cur_index, maxline): - """Extract the value, where we are in a multiline situation.""" - quot = value[:3] - newvalue = value[3:] - single_line = self._triple_quote[quot][0] - multi_line = self._triple_quote[quot][1] - mat = single_line.match(value) - if mat is not None: - retval = list(mat.groups()) - retval.append(cur_index) - return retval - elif newvalue.find(quot) != -1: - # somehow the triple quote is missing - raise SyntaxError() - # - while cur_index < maxline: - cur_index += 1 - newvalue += '\n' - line = infile[cur_index] - if line.find(quot) == -1: - newvalue += line - else: - # end of multiline, process it - break - else: - # we've got to the end of the config, oops... - raise SyntaxError() - mat = multi_line.match(line) - if mat is None: - # a badly formed line - raise SyntaxError() - (value, comment) = mat.groups() - return (newvalue + value, comment, cur_index) - - - def _handle_configspec(self, configspec): - """Parse the configspec.""" - # FIXME: Should we check that the configspec was created with the - # correct settings ? (i.e. ``list_values=False``) - if not isinstance(configspec, ConfigObj): - try: - configspec = ConfigObj(configspec, - raise_errors=True, - file_error=True, - _inspec=True) - except ConfigObjError, e: - # FIXME: Should these errors have a reference - # to the already parsed ConfigObj ? - raise ConfigspecError('Parsing configspec failed: %s' % e) - except IOError, e: - raise IOError('Reading configspec failed: %s' % e) - - self.configspec = configspec - - - - def _set_configspec(self, section, copy): - """ - Called by validate. Handles setting the configspec on subsections - including sections to be validated by __many__ - """ - configspec = section.configspec - many = configspec.get('__many__') - if isinstance(many, dict): - for entry in section.sections: - if entry not in configspec: - section[entry].configspec = many - - for entry in configspec.sections: - if entry == '__many__': - continue - if entry not in section: - section[entry] = {} - section[entry]._created = True - if copy: - # copy comments - section.comments[entry] = configspec.comments.get(entry, []) - section.inline_comments[entry] = configspec.inline_comments.get(entry, '') - - # Could be a scalar when we expect a section - if isinstance(section[entry], Section): - section[entry].configspec = configspec[entry] - - - def _write_line(self, indent_string, entry, this_entry, comment): - """Write an individual line, for the write method""" - # NOTE: the calls to self._quote here handles non-StringType values. - if not self.unrepr: - val = self._decode_element(self._quote(this_entry)) - else: - val = repr(this_entry) - return '%s%s%s%s%s' % (indent_string, - self._decode_element(self._quote(entry, multiline=False)), - self._a_to_u(' = '), - val, - self._decode_element(comment)) - - - def _write_marker(self, indent_string, depth, entry, comment): - """Write a section marker line""" - return '%s%s%s%s%s' % (indent_string, - self._a_to_u('[' * depth), - self._quote(self._decode_element(entry), multiline=False), - self._a_to_u(']' * depth), - self._decode_element(comment)) - - - def _handle_comment(self, comment): - """Deal with a comment.""" - if not comment: - return '' - start = self.indent_type - if not comment.startswith('#'): - start += self._a_to_u(' # ') - return (start + comment) - - - # Public methods - - def write(self, outfile=None, section=None): - """ - Write the current ConfigObj as a file - - tekNico: FIXME: use StringIO instead of real files - - >>> filename = a.filename - >>> a.filename = 'test.ini' - >>> a.write() - >>> a.filename = filename - >>> a == ConfigObj('test.ini', raise_errors=True) - 1 - >>> import os - >>> os.remove('test.ini') - """ - if self.indent_type is None: - # this can be true if initialised from a dictionary - self.indent_type = DEFAULT_INDENT_TYPE - - out = [] - cs = self._a_to_u('#') - csp = self._a_to_u('# ') - if section is None: - int_val = self.interpolation - self.interpolation = False - section = self - for line in self.initial_comment: - line = self._decode_element(line) - stripped_line = line.strip() - if stripped_line and not stripped_line.startswith(cs): - line = csp + line - out.append(line) - - indent_string = self.indent_type * section.depth - for entry in (section.scalars + section.sections): - if entry in section.defaults: - # don't write out default values - continue - for comment_line in section.comments[entry]: - comment_line = self._decode_element(comment_line.lstrip()) - if comment_line and not comment_line.startswith(cs): - comment_line = csp + comment_line - out.append(indent_string + comment_line) - this_entry = section[entry] - comment = self._handle_comment(section.inline_comments[entry]) - - if isinstance(this_entry, dict): - # a section - out.append(self._write_marker( - indent_string, - this_entry.depth, - entry, - comment)) - out.extend(self.write(section=this_entry)) - else: - out.append(self._write_line( - indent_string, - entry, - this_entry, - comment)) - - if section is self: - for line in self.final_comment: - line = self._decode_element(line) - stripped_line = line.strip() - if stripped_line and not stripped_line.startswith(cs): - line = csp + line - out.append(line) - self.interpolation = int_val - - if section is not self: - return out - - if (self.filename is None) and (outfile is None): - # output a list of lines - # might need to encode - # NOTE: This will *screw* UTF16, each line will start with the BOM - if self.encoding: - out = [l.encode(self.encoding) for l in out] - if (self.BOM and ((self.encoding is None) or - (BOM_LIST.get(self.encoding.lower()) == 'utf_8'))): - # Add the UTF8 BOM - if not out: - out.append('') - out[0] = BOM_UTF8 + out[0] - return out - - # Turn the list to a string, joined with correct newlines - newline = self.newlines or os.linesep - if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w' - and sys.platform == 'win32' and newline == '\r\n'): - # Windows specific hack to avoid writing '\r\r\n' - newline = '\n' - output = self._a_to_u(newline).join(out) - if self.encoding: - output = output.encode(self.encoding) - if self.BOM and ((self.encoding is None) or match_utf8(self.encoding)): - # Add the UTF8 BOM - output = BOM_UTF8 + output - - if not output.endswith(newline): - output += newline - if outfile is not None: - outfile.write(output) - else: - h = open(self.filename, 'wb') - h.write(output) - h.close() - - - def validate(self, validator, preserve_errors=False, copy=False, - section=None): - """ - Test the ConfigObj against a configspec. - - It uses the ``validator`` object from *validate.py*. - - To run ``validate`` on the current ConfigObj, call: :: - - test = config.validate(validator) - - (Normally having previously passed in the configspec when the ConfigObj - was created - you can dynamically assign a dictionary of checks to the - ``configspec`` attribute of a section though). - - It returns ``True`` if everything passes, or a dictionary of - pass/fails (True/False). If every member of a subsection passes, it - will just have the value ``True``. (It also returns ``False`` if all - members fail). - - In addition, it converts the values from strings to their native - types if their checks pass (and ``stringify`` is set). - - If ``preserve_errors`` is ``True`` (``False`` is default) then instead - of a marking a fail with a ``False``, it will preserve the actual - exception object. This can contain info about the reason for failure. - For example the ``VdtValueTooSmallError`` indicates that the value - supplied was too small. If a value (or section) is missing it will - still be marked as ``False``. - - You must have the validate module to use ``preserve_errors=True``. - - You can then use the ``flatten_errors`` function to turn your nested - results dictionary into a flattened list of failures - useful for - displaying meaningful error messages. - """ - if section is None: - if self.configspec is None: - raise ValueError('No configspec supplied.') - if preserve_errors: - # We do this once to remove a top level dependency on the validate module - # Which makes importing configobj faster - from validate import VdtMissingValue - self._vdtMissingValue = VdtMissingValue - - section = self - - if copy: - section.initial_comment = section.configspec.initial_comment - section.final_comment = section.configspec.final_comment - section.encoding = section.configspec.encoding - section.BOM = section.configspec.BOM - section.newlines = section.configspec.newlines - section.indent_type = section.configspec.indent_type - - # - # section.default_values.clear() #?? - configspec = section.configspec - self._set_configspec(section, copy) - - - def validate_entry(entry, spec, val, missing, ret_true, ret_false): - section.default_values.pop(entry, None) - - try: - section.default_values[entry] = validator.get_default_value(configspec[entry]) - except (KeyError, AttributeError, validator.baseErrorClass): - # No default, bad default or validator has no 'get_default_value' - # (e.g. SimpleVal) - pass - - try: - check = validator.check(spec, - val, - missing=missing - ) - except validator.baseErrorClass, e: - if not preserve_errors or isinstance(e, self._vdtMissingValue): - out[entry] = False - else: - # preserve the error - out[entry] = e - ret_false = False - ret_true = False - else: - ret_false = False - out[entry] = True - if self.stringify or missing: - # if we are doing type conversion - # or the value is a supplied default - if not self.stringify: - if isinstance(check, (list, tuple)): - # preserve lists - check = [self._str(item) for item in check] - elif missing and check is None: - # convert the None from a default to a '' - check = '' - else: - check = self._str(check) - if (check != val) or missing: - section[entry] = check - if not copy and missing and entry not in section.defaults: - section.defaults.append(entry) - return ret_true, ret_false - - # - out = {} - ret_true = True - ret_false = True - - unvalidated = [k for k in section.scalars if k not in configspec] - incorrect_sections = [k for k in configspec.sections if k in section.scalars] - incorrect_scalars = [k for k in configspec.scalars if k in section.sections] - - for entry in configspec.scalars: - if entry in ('__many__', '___many___'): - # reserved names - continue - if (not entry in section.scalars) or (entry in section.defaults): - # missing entries - # or entries from defaults - missing = True - val = None - if copy and entry not in section.scalars: - # copy comments - section.comments[entry] = ( - configspec.comments.get(entry, [])) - section.inline_comments[entry] = ( - configspec.inline_comments.get(entry, '')) - # - else: - missing = False - val = section[entry] - - ret_true, ret_false = validate_entry(entry, configspec[entry], val, - missing, ret_true, ret_false) - - many = None - if '__many__' in configspec.scalars: - many = configspec['__many__'] - elif '___many___' in configspec.scalars: - many = configspec['___many___'] - - if many is not None: - for entry in unvalidated: - val = section[entry] - ret_true, ret_false = validate_entry(entry, many, val, False, - ret_true, ret_false) - unvalidated = [] - - for entry in incorrect_scalars: - ret_true = False - if not preserve_errors: - out[entry] = False - else: - ret_false = False - msg = 'Value %r was provided as a section' % entry - out[entry] = validator.baseErrorClass(msg) - for entry in incorrect_sections: - ret_true = False - if not preserve_errors: - out[entry] = False - else: - ret_false = False - msg = 'Section %r was provided as a single value' % entry - out[entry] = validator.baseErrorClass(msg) - - # Missing sections will have been created as empty ones when the - # configspec was read. - for entry in section.sections: - # FIXME: this means DEFAULT is not copied in copy mode - if section is self and entry == 'DEFAULT': - continue - if section[entry].configspec is None: - unvalidated.append(entry) - continue - if copy: - section.comments[entry] = configspec.comments.get(entry, []) - section.inline_comments[entry] = configspec.inline_comments.get(entry, '') - check = self.validate(validator, preserve_errors=preserve_errors, copy=copy, section=section[entry]) - out[entry] = check - if check == False: - ret_true = False - elif check == True: - ret_false = False - else: - ret_true = False - - section.extra_values = unvalidated - if preserve_errors and not section._created: - # If the section wasn't created (i.e. it wasn't missing) - # then we can't return False, we need to preserve errors - ret_false = False - # - if ret_false and preserve_errors and out: - # If we are preserving errors, but all - # the failures are from missing sections / values - # then we can return False. Otherwise there is a - # real failure that we need to preserve. - ret_false = not any(out.values()) - if ret_true: - return True - elif ret_false: - return False - return out - - - def reset(self): - """Clear ConfigObj instance and restore to 'freshly created' state.""" - self.clear() - self._initialise() - # FIXME: Should be done by '_initialise', but ConfigObj constructor (and reload) - # requires an empty dictionary - self.configspec = None - # Just to be sure ;-) - self._original_configspec = None - - - def reload(self): - """ - Reload a ConfigObj from file. - - This method raises a ``ReloadError`` if the ConfigObj doesn't have - a filename attribute pointing to a file. - """ - if not isinstance(self.filename, basestring): - raise ReloadError() - - filename = self.filename - current_options = {} - for entry in OPTION_DEFAULTS: - if entry == 'configspec': - continue - current_options[entry] = getattr(self, entry) - - configspec = self._original_configspec - current_options['configspec'] = configspec - - self.clear() - self._initialise(current_options) - self._load(filename, configspec) - - - -class SimpleVal(object): - """ - A simple validator. - Can be used to check that all members expected are present. - - To use it, provide a configspec with all your members in (the value given - will be ignored). Pass an instance of ``SimpleVal`` to the ``validate`` - method of your ``ConfigObj``. ``validate`` will return ``True`` if all - members are present, or a dictionary with True/False meaning - present/missing. (Whole missing sections will be replaced with ``False``) - """ - - def __init__(self): - self.baseErrorClass = ConfigObjError - - def check(self, check, member, missing=False): - """A dummy check method, always returns the value unchanged.""" - if missing: - raise self.baseErrorClass() - return member - - -def flatten_errors(cfg, res, levels=None, results=None): - """ - An example function that will turn a nested dictionary of results - (as returned by ``ConfigObj.validate``) into a flat list. - - ``cfg`` is the ConfigObj instance being checked, ``res`` is the results - dictionary returned by ``validate``. - - (This is a recursive function, so you shouldn't use the ``levels`` or - ``results`` arguments - they are used by the function.) - - Returns a list of keys that failed. Each member of the list is a tuple:: - - ([list of sections...], key, result) - - If ``validate`` was called with ``preserve_errors=False`` (the default) - then ``result`` will always be ``False``. - - *list of sections* is a flattened list of sections that the key was found - in. - - If the section was missing (or a section was expected and a scalar provided - - or vice-versa) then key will be ``None``. - - If the value (or section) was missing then ``result`` will be ``False``. - - If ``validate`` was called with ``preserve_errors=True`` and a value - was present, but failed the check, then ``result`` will be the exception - object returned. You can use this as a string that describes the failure. - - For example *The value "3" is of the wrong type*. - """ - if levels is None: - # first time called - levels = [] - results = [] - if res == True: - return results - if res == False or isinstance(res, Exception): - results.append((levels[:], None, res)) - if levels: - levels.pop() - return results - for (key, val) in res.items(): - if val == True: - continue - if isinstance(cfg.get(key), dict): - # Go down one level - levels.append(key) - flatten_errors(cfg[key], val, levels, results) - continue - results.append((levels[:], key, val)) - # - # Go up one level - if levels: - levels.pop() - # - return results - - -def get_extra_values(conf, _prepend=()): - """ - Find all the values and sections not in the configspec from a validated - ConfigObj. - - ``get_extra_values`` returns a list of tuples where each tuple represents - either an extra section, or an extra value. - - The tuples contain two values, a tuple representing the section the value - is in and the name of the extra values. For extra values in the top level - section the first member will be an empty tuple. For values in the 'foo' - section the first member will be ``('foo',)``. For members in the 'bar' - subsection of the 'foo' section the first member will be ``('foo', 'bar')``. - - NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't - been validated it will return an empty list. - """ - out = [] - - out.extend([(_prepend, name) for name in conf.extra_values]) - for name in conf.sections: - if name not in conf.extra_values: - out.extend(get_extra_values(conf[name], _prepend + (name,))) - return out - - -"""*A programming language is a medium of expression.* - Paul Graham""" diff --git a/lib/enum/LICENSE b/lib/enum/LICENSE deleted file mode 100644 index 9003b885..00000000 --- a/lib/enum/LICENSE +++ /dev/null @@ -1,32 +0,0 @@ -Copyright (c) 2013, Ethan Furman. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - - Redistributions of source code must retain the above - copyright notice, this list of conditions and the - following disclaimer. - - Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - Neither the name Ethan Furman nor the names of any - contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. diff --git a/lib/enum/README b/lib/enum/README deleted file mode 100644 index 511af984..00000000 --- a/lib/enum/README +++ /dev/null @@ -1,2 +0,0 @@ -enum34 is the new Python stdlib enum module available in Python 3.4 -backported for previous versions of Python from 2.4 to 3.3. diff --git a/lib/enum/__init__.py b/lib/enum/__init__.py deleted file mode 100644 index 6a327a8a..00000000 --- a/lib/enum/__init__.py +++ /dev/null @@ -1,790 +0,0 @@ -"""Python Enumerations""" - -import sys as _sys - -__all__ = ['Enum', 'IntEnum', 'unique'] - -version = 1, 0, 4 - -pyver = float('%s.%s' % _sys.version_info[:2]) - -try: - any -except NameError: - def any(iterable): - for element in iterable: - if element: - return True - return False - -try: - from collections import OrderedDict -except ImportError: - OrderedDict = None - -try: - basestring -except NameError: - # In Python 2 basestring is the ancestor of both str and unicode - # in Python 3 it's just str, but was missing in 3.1 - basestring = str - -try: - unicode -except NameError: - # In Python 3 unicode no longer exists (it's just str) - unicode = str - -class _RouteClassAttributeToGetattr(object): - """Route attribute access on a class to __getattr__. - - This is a descriptor, used to define attributes that act differently when - accessed through an instance and through a class. Instance access remains - normal, but access to an attribute through a class will be routed to the - class's __getattr__ method; this is done by raising AttributeError. - - """ - def __init__(self, fget=None): - self.fget = fget - - def __get__(self, instance, ownerclass=None): - if instance is None: - raise AttributeError() - return self.fget(instance) - - def __set__(self, instance, value): - raise AttributeError("can't set attribute") - - def __delete__(self, instance): - raise AttributeError("can't delete attribute") - - -def _is_descriptor(obj): - """Returns True if obj is a descriptor, False otherwise.""" - return ( - hasattr(obj, '__get__') or - hasattr(obj, '__set__') or - hasattr(obj, '__delete__')) - - -def _is_dunder(name): - """Returns True if a __dunder__ name, False otherwise.""" - return (name[:2] == name[-2:] == '__' and - name[2:3] != '_' and - name[-3:-2] != '_' and - len(name) > 4) - - -def _is_sunder(name): - """Returns True if a _sunder_ name, False otherwise.""" - return (name[0] == name[-1] == '_' and - name[1:2] != '_' and - name[-2:-1] != '_' and - len(name) > 2) - - -def _make_class_unpicklable(cls): - """Make the given class un-picklable.""" - def _break_on_call_reduce(self, protocol=None): - raise TypeError('%r cannot be pickled' % self) - cls.__reduce_ex__ = _break_on_call_reduce - cls.__module__ = '' - - -class _EnumDict(dict): - """Track enum member order and ensure member names are not reused. - - EnumMeta will use the names found in self._member_names as the - enumeration member names. - - """ - def __init__(self): - super(_EnumDict, self).__init__() - self._member_names = [] - - def __setitem__(self, key, value): - """Changes anything not dundered or not a descriptor. - - If a descriptor is added with the same name as an enum member, the name - is removed from _member_names (this may leave a hole in the numerical - sequence of values). - - If an enum member name is used twice, an error is raised; duplicate - values are not checked for. - - Single underscore (sunder) names are reserved. - - Note: in 3.x __order__ is simply discarded as a not necessary piece - leftover from 2.x - - """ - if pyver >= 3.0 and key == '__order__': - return - if _is_sunder(key): - raise ValueError('_names_ are reserved for future Enum use') - elif _is_dunder(key): - pass - elif key in self._member_names: - # descriptor overwriting an enum? - raise TypeError('Attempted to reuse key: %r' % key) - elif not _is_descriptor(value): - if key in self: - # enum overwriting a descriptor? - raise TypeError('Key already defined as: %r' % self[key]) - self._member_names.append(key) - super(_EnumDict, self).__setitem__(key, value) - - -# Dummy value for Enum as EnumMeta explicity checks for it, but of course until -# EnumMeta finishes running the first time the Enum class doesn't exist. This -# is also why there are checks in EnumMeta like `if Enum is not None` -Enum = None - - -class EnumMeta(type): - """Metaclass for Enum""" - @classmethod - def __prepare__(metacls, cls, bases): - return _EnumDict() - - def __new__(metacls, cls, bases, classdict): - # an Enum class is final once enumeration items have been defined; it - # cannot be mixed with other types (int, float, etc.) if it has an - # inherited __new__ unless a new __new__ is defined (or the resulting - # class will fail). - if type(classdict) is dict: - original_dict = classdict - classdict = _EnumDict() - for k, v in original_dict.items(): - classdict[k] = v - - member_type, first_enum = metacls._get_mixins_(bases) - __new__, save_new, use_args = metacls._find_new_(classdict, member_type, - first_enum) - # save enum items into separate mapping so they don't get baked into - # the new class - members = dict((k, classdict[k]) for k in classdict._member_names) - for name in classdict._member_names: - del classdict[name] - - # py2 support for definition order - __order__ = classdict.get('__order__') - if __order__ is None: - if pyver < 3.0: - try: - __order__ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])] - except TypeError: - __order__ = [name for name in sorted(members.keys())] - else: - __order__ = classdict._member_names - else: - del classdict['__order__'] - if pyver < 3.0: - __order__ = __order__.replace(',', ' ').split() - aliases = [name for name in members if name not in __order__] - __order__ += aliases - - # check for illegal enum names (any others?) - invalid_names = set(members) & set(['mro']) - if invalid_names: - raise ValueError('Invalid enum member name(s): %s' % ( - ', '.join(invalid_names), )) - - # create our new Enum type - enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict) - enum_class._member_names_ = [] # names in random order - if OrderedDict is not None: - enum_class._member_map_ = OrderedDict() - else: - enum_class._member_map_ = {} # name->value map - enum_class._member_type_ = member_type - - # Reverse value->name map for hashable values. - enum_class._value2member_map_ = {} - - # instantiate them, checking for duplicates as we go - # we instantiate first instead of checking for duplicates first in case - # a custom __new__ is doing something funky with the values -- such as - # auto-numbering ;) - if __new__ is None: - __new__ = enum_class.__new__ - for member_name in __order__: - value = members[member_name] - if not isinstance(value, tuple): - args = (value, ) - else: - args = value - if member_type is tuple: # special case for tuple enums - args = (args, ) # wrap it one more time - if not use_args or not args: - enum_member = __new__(enum_class) - if not hasattr(enum_member, '_value_'): - enum_member._value_ = value - else: - enum_member = __new__(enum_class, *args) - if not hasattr(enum_member, '_value_'): - enum_member._value_ = member_type(*args) - value = enum_member._value_ - enum_member._name_ = member_name - enum_member.__objclass__ = enum_class - enum_member.__init__(*args) - # If another member with the same value was already defined, the - # new member becomes an alias to the existing one. - for name, canonical_member in enum_class._member_map_.items(): - if canonical_member.value == enum_member._value_: - enum_member = canonical_member - break - else: - # Aliases don't appear in member names (only in __members__). - enum_class._member_names_.append(member_name) - enum_class._member_map_[member_name] = enum_member - try: - # This may fail if value is not hashable. We can't add the value - # to the map, and by-value lookups for this value will be - # linear. - enum_class._value2member_map_[value] = enum_member - except TypeError: - pass - - - # If a custom type is mixed into the Enum, and it does not know how - # to pickle itself, pickle.dumps will succeed but pickle.loads will - # fail. Rather than have the error show up later and possibly far - # from the source, sabotage the pickle protocol for this class so - # that pickle.dumps also fails. - # - # However, if the new class implements its own __reduce_ex__, do not - # sabotage -- it's on them to make sure it works correctly. We use - # __reduce_ex__ instead of any of the others as it is preferred by - # pickle over __reduce__, and it handles all pickle protocols. - unpicklable = False - if '__reduce_ex__' not in classdict: - if member_type is not object: - methods = ('__getnewargs_ex__', '__getnewargs__', - '__reduce_ex__', '__reduce__') - if not any(m in member_type.__dict__ for m in methods): - _make_class_unpicklable(enum_class) - unpicklable = True - - - # double check that repr and friends are not the mixin's or various - # things break (such as pickle) - for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): - class_method = getattr(enum_class, name) - obj_method = getattr(member_type, name, None) - enum_method = getattr(first_enum, name, None) - if name not in classdict and class_method is not enum_method: - if name == '__reduce_ex__' and unpicklable: - continue - setattr(enum_class, name, enum_method) - - # method resolution and int's are not playing nice - # Python's less than 2.6 use __cmp__ - - if pyver < 2.6: - - if issubclass(enum_class, int): - setattr(enum_class, '__cmp__', getattr(int, '__cmp__')) - - elif pyver < 3.0: - - if issubclass(enum_class, int): - for method in ( - '__le__', - '__lt__', - '__gt__', - '__ge__', - '__eq__', - '__ne__', - '__hash__', - ): - setattr(enum_class, method, getattr(int, method)) - - # replace any other __new__ with our own (as long as Enum is not None, - # anyway) -- again, this is to support pickle - if Enum is not None: - # if the user defined their own __new__, save it before it gets - # clobbered in case they subclass later - if save_new: - setattr(enum_class, '__member_new__', enum_class.__dict__['__new__']) - setattr(enum_class, '__new__', Enum.__dict__['__new__']) - return enum_class - - def __call__(cls, value, names=None, module=None, type=None): - """Either returns an existing member, or creates a new enum class. - - This method is used both when an enum class is given a value to match - to an enumeration member (i.e. Color(3)) and for the functional API - (i.e. Color = Enum('Color', names='red green blue')). - - When used for the functional API: `module`, if set, will be stored in - the new class' __module__ attribute; `type`, if set, will be mixed in - as the first base class. - - Note: if `module` is not set this routine will attempt to discover the - calling module by walking the frame stack; if this is unsuccessful - the resulting class will not be pickleable. - - """ - if names is None: # simple value lookup - return cls.__new__(cls, value) - # otherwise, functional API: we're creating a new Enum type - return cls._create_(value, names, module=module, type=type) - - def __contains__(cls, member): - return isinstance(member, cls) and member.name in cls._member_map_ - - def __delattr__(cls, attr): - # nicer error message when someone tries to delete an attribute - # (see issue19025). - if attr in cls._member_map_: - raise AttributeError( - "%s: cannot delete Enum member." % cls.__name__) - super(EnumMeta, cls).__delattr__(attr) - - def __dir__(self): - return (['__class__', '__doc__', '__members__', '__module__'] + - self._member_names_) - - @property - def __members__(cls): - """Returns a mapping of member name->value. - - This mapping lists all enum members, including aliases. Note that this - is a copy of the internal mapping. - - """ - return cls._member_map_.copy() - - def __getattr__(cls, name): - """Return the enum member matching `name` - - We use __getattr__ instead of descriptors or inserting into the enum - class' __dict__ in order to support `name` and `value` being both - properties for enum members (which live in the class' __dict__) and - enum members themselves. - - """ - if _is_dunder(name): - raise AttributeError(name) - try: - return cls._member_map_[name] - except KeyError: - raise AttributeError(name) - - def __getitem__(cls, name): - return cls._member_map_[name] - - def __iter__(cls): - return (cls._member_map_[name] for name in cls._member_names_) - - def __reversed__(cls): - return (cls._member_map_[name] for name in reversed(cls._member_names_)) - - def __len__(cls): - return len(cls._member_names_) - - def __repr__(cls): - return "" % cls.__name__ - - def __setattr__(cls, name, value): - """Block attempts to reassign Enum members. - - A simple assignment to the class namespace only changes one of the - several possible ways to get an Enum member from the Enum class, - resulting in an inconsistent Enumeration. - - """ - member_map = cls.__dict__.get('_member_map_', {}) - if name in member_map: - raise AttributeError('Cannot reassign members.') - super(EnumMeta, cls).__setattr__(name, value) - - def _create_(cls, class_name, names=None, module=None, type=None): - """Convenience method to create a new Enum class. - - `names` can be: - - * A string containing member names, separated either with spaces or - commas. Values are auto-numbered from 1. - * An iterable of member names. Values are auto-numbered from 1. - * An iterable of (member name, value) pairs. - * A mapping of member name -> value. - - """ - if pyver < 3.0: - # if class_name is unicode, attempt a conversion to ASCII - if isinstance(class_name, unicode): - try: - class_name = class_name.encode('ascii') - except UnicodeEncodeError: - raise TypeError('%r is not representable in ASCII' % class_name) - metacls = cls.__class__ - if type is None: - bases = (cls, ) - else: - bases = (type, cls) - classdict = metacls.__prepare__(class_name, bases) - __order__ = [] - - # special processing needed for names? - if isinstance(names, basestring): - names = names.replace(',', ' ').split() - if isinstance(names, (tuple, list)) and isinstance(names[0], basestring): - names = [(e, i+1) for (i, e) in enumerate(names)] - - # Here, names is either an iterable of (name, value) or a mapping. - for item in names: - if isinstance(item, basestring): - member_name, member_value = item, names[item] - else: - member_name, member_value = item - classdict[member_name] = member_value - __order__.append(member_name) - # only set __order__ in classdict if name/value was not from a mapping - if not isinstance(item, basestring): - classdict['__order__'] = ' '.join(__order__) - enum_class = metacls.__new__(metacls, class_name, bases, classdict) - - # TODO: replace the frame hack if a blessed way to know the calling - # module is ever developed - if module is None: - try: - module = _sys._getframe(2).f_globals['__name__'] - except (AttributeError, ValueError): - pass - if module is None: - _make_class_unpicklable(enum_class) - else: - enum_class.__module__ = module - - return enum_class - - @staticmethod - def _get_mixins_(bases): - """Returns the type for creating enum members, and the first inherited - enum class. - - bases: the tuple of bases that was given to __new__ - - """ - if not bases or Enum is None: - return object, Enum - - - # double check that we are not subclassing a class with existing - # enumeration members; while we're at it, see if any other data - # type has been mixed in so we can use the correct __new__ - member_type = first_enum = None - for base in bases: - if (base is not Enum and - issubclass(base, Enum) and - base._member_names_): - raise TypeError("Cannot extend enumerations") - # base is now the last base in bases - if not issubclass(base, Enum): - raise TypeError("new enumerations must be created as " - "`ClassName([mixin_type,] enum_type)`") - - # get correct mix-in type (either mix-in type of Enum subclass, or - # first base if last base is Enum) - if not issubclass(bases[0], Enum): - member_type = bases[0] # first data type - first_enum = bases[-1] # enum type - else: - for base in bases[0].__mro__: - # most common: (IntEnum, int, Enum, object) - # possible: (, , - # , , - # ) - if issubclass(base, Enum): - if first_enum is None: - first_enum = base - else: - if member_type is None: - member_type = base - - return member_type, first_enum - - if pyver < 3.0: - @staticmethod - def _find_new_(classdict, member_type, first_enum): - """Returns the __new__ to be used for creating the enum members. - - classdict: the class dictionary given to __new__ - member_type: the data type whose __new__ will be used by default - first_enum: enumeration to check for an overriding __new__ - - """ - # now find the correct __new__, checking to see of one was defined - # by the user; also check earlier enum classes in case a __new__ was - # saved as __member_new__ - __new__ = classdict.get('__new__', None) - if __new__: - return None, True, True # __new__, save_new, use_args - - N__new__ = getattr(None, '__new__') - O__new__ = getattr(object, '__new__') - if Enum is None: - E__new__ = N__new__ - else: - E__new__ = Enum.__dict__['__new__'] - # check all possibles for __member_new__ before falling back to - # __new__ - for method in ('__member_new__', '__new__'): - for possible in (member_type, first_enum): - try: - target = possible.__dict__[method] - except (AttributeError, KeyError): - target = getattr(possible, method, None) - if target not in [ - None, - N__new__, - O__new__, - E__new__, - ]: - if method == '__member_new__': - classdict['__new__'] = target - return None, False, True - if isinstance(target, staticmethod): - target = target.__get__(member_type) - __new__ = target - break - if __new__ is not None: - break - else: - __new__ = object.__new__ - - # if a non-object.__new__ is used then whatever value/tuple was - # assigned to the enum member name will be passed to __new__ and to the - # new enum member's __init__ - if __new__ is object.__new__: - use_args = False - else: - use_args = True - - return __new__, False, use_args - else: - @staticmethod - def _find_new_(classdict, member_type, first_enum): - """Returns the __new__ to be used for creating the enum members. - - classdict: the class dictionary given to __new__ - member_type: the data type whose __new__ will be used by default - first_enum: enumeration to check for an overriding __new__ - - """ - # now find the correct __new__, checking to see of one was defined - # by the user; also check earlier enum classes in case a __new__ was - # saved as __member_new__ - __new__ = classdict.get('__new__', None) - - # should __new__ be saved as __member_new__ later? - save_new = __new__ is not None - - if __new__ is None: - # check all possibles for __member_new__ before falling back to - # __new__ - for method in ('__member_new__', '__new__'): - for possible in (member_type, first_enum): - target = getattr(possible, method, None) - if target not in ( - None, - None.__new__, - object.__new__, - Enum.__new__, - ): - __new__ = target - break - if __new__ is not None: - break - else: - __new__ = object.__new__ - - # if a non-object.__new__ is used then whatever value/tuple was - # assigned to the enum member name will be passed to __new__ and to the - # new enum member's __init__ - if __new__ is object.__new__: - use_args = False - else: - use_args = True - - return __new__, save_new, use_args - - -######################################################## -# In order to support Python 2 and 3 with a single -# codebase we have to create the Enum methods separately -# and then use the `type(name, bases, dict)` method to -# create the class. -######################################################## -temp_enum_dict = {} -temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n" - -def __new__(cls, value): - # all enum instances are actually created during class construction - # without calling this method; this method is called by the metaclass' - # __call__ (i.e. Color(3) ), and by pickle - if type(value) is cls: - # For lookups like Color(Color.red) - value = value.value - #return value - # by-value search for a matching enum member - # see if it's in the reverse mapping (for hashable values) - try: - if value in cls._value2member_map_: - return cls._value2member_map_[value] - except TypeError: - # not there, now do long search -- O(n) behavior - for member in cls._member_map_.values(): - if member.value == value: - return member - raise ValueError("%s is not a valid %s" % (value, cls.__name__)) -temp_enum_dict['__new__'] = __new__ -del __new__ - -def __repr__(self): - return "<%s.%s: %r>" % ( - self.__class__.__name__, self._name_, self._value_) -temp_enum_dict['__repr__'] = __repr__ -del __repr__ - -def __str__(self): - return "%s.%s" % (self.__class__.__name__, self._name_) -temp_enum_dict['__str__'] = __str__ -del __str__ - -def __dir__(self): - added_behavior = [ - m - for cls in self.__class__.mro() - for m in cls.__dict__ - if m[0] != '_' - ] - return (['__class__', '__doc__', '__module__', ] + added_behavior) -temp_enum_dict['__dir__'] = __dir__ -del __dir__ - -def __format__(self, format_spec): - # mixed-in Enums should use the mixed-in type's __format__, otherwise - # we can get strange results with the Enum name showing up instead of - # the value - - # pure Enum branch - if self._member_type_ is object: - cls = str - val = str(self) - # mix-in branch - else: - cls = self._member_type_ - val = self.value - return cls.__format__(val, format_spec) -temp_enum_dict['__format__'] = __format__ -del __format__ - - -#################################### -# Python's less than 2.6 use __cmp__ - -if pyver < 2.6: - - def __cmp__(self, other): - if type(other) is self.__class__: - if self is other: - return 0 - return -1 - return NotImplemented - raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__cmp__'] = __cmp__ - del __cmp__ - -else: - - def __le__(self, other): - raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__le__'] = __le__ - del __le__ - - def __lt__(self, other): - raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__lt__'] = __lt__ - del __lt__ - - def __ge__(self, other): - raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__ge__'] = __ge__ - del __ge__ - - def __gt__(self, other): - raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__gt__'] = __gt__ - del __gt__ - - -def __eq__(self, other): - if type(other) is self.__class__: - return self is other - return NotImplemented -temp_enum_dict['__eq__'] = __eq__ -del __eq__ - -def __ne__(self, other): - if type(other) is self.__class__: - return self is not other - return NotImplemented -temp_enum_dict['__ne__'] = __ne__ -del __ne__ - -def __hash__(self): - return hash(self._name_) -temp_enum_dict['__hash__'] = __hash__ -del __hash__ - -def __reduce_ex__(self, proto): - return self.__class__, (self._value_, ) -temp_enum_dict['__reduce_ex__'] = __reduce_ex__ -del __reduce_ex__ - -# _RouteClassAttributeToGetattr is used to provide access to the `name` -# and `value` properties of enum members while keeping some measure of -# protection from modification, while still allowing for an enumeration -# to have members named `name` and `value`. This works because enumeration -# members are not set directly on the enum class -- __getattr__ is -# used to look them up. - -@_RouteClassAttributeToGetattr -def name(self): - return self._name_ -temp_enum_dict['name'] = name -del name - -@_RouteClassAttributeToGetattr -def value(self): - return self._value_ -temp_enum_dict['value'] = value -del value - -Enum = EnumMeta('Enum', (object, ), temp_enum_dict) -del temp_enum_dict - -# Enum has now been created -########################### - -class IntEnum(int, Enum): - """Enum where members are also (and must be) ints""" - - -def unique(enumeration): - """Class decorator that ensures only unique members exist in an enumeration.""" - duplicates = [] - for name, member in enumeration.__members__.items(): - if name != member.name: - duplicates.append((name, member.name)) - if duplicates: - duplicate_names = ', '.join( - ["%s -> %s" % (alias, name) for (alias, name) in duplicates] - ) - raise ValueError('duplicate names found in %r: %s' % - (enumeration, duplicate_names) - ) - return enumeration diff --git a/lib/enum/doc/enum.rst b/lib/enum/doc/enum.rst deleted file mode 100644 index 0d429bfc..00000000 --- a/lib/enum/doc/enum.rst +++ /dev/null @@ -1,725 +0,0 @@ -``enum`` --- support for enumerations -======================================== - -.. :synopsis: enumerations are sets of symbolic names bound to unique, constant - values. -.. :moduleauthor:: Ethan Furman -.. :sectionauthor:: Barry Warsaw , -.. :sectionauthor:: Eli Bendersky , -.. :sectionauthor:: Ethan Furman - ----------------- - -An enumeration is a set of symbolic names (members) bound to unique, constant -values. Within an enumeration, the members can be compared by identity, and -the enumeration itself can be iterated over. - - -Module Contents ---------------- - -This module defines two enumeration classes that can be used to define unique -sets of names and values: ``Enum`` and ``IntEnum``. It also defines -one decorator, ``unique``. - -``Enum`` - -Base class for creating enumerated constants. See section `Functional API`_ -for an alternate construction syntax. - -``IntEnum`` - -Base class for creating enumerated constants that are also subclasses of ``int``. - -``unique`` - -Enum class decorator that ensures only one name is bound to any one value. - - -Creating an Enum ----------------- - -Enumerations are created using the ``class`` syntax, which makes them -easy to read and write. An alternative creation method is described in -`Functional API`_. To define an enumeration, subclass ``Enum`` as -follows:: - - >>> from enum import Enum - >>> class Color(Enum): - ... red = 1 - ... green = 2 - ... blue = 3 - -Note: Nomenclature - - - The class ``Color`` is an *enumeration* (or *enum*) - - The attributes ``Color.red``, ``Color.green``, etc., are - *enumeration members* (or *enum members*). - - The enum members have *names* and *values* (the name of - ``Color.red`` is ``red``, the value of ``Color.blue`` is - ``3``, etc.) - -Note: - - Even though we use the ``class`` syntax to create Enums, Enums - are not normal Python classes. See `How are Enums different?`_ for - more details. - -Enumeration members have human readable string representations:: - - >>> print(Color.red) - Color.red - -...while their ``repr`` has more information:: - - >>> print(repr(Color.red)) - - -The *type* of an enumeration member is the enumeration it belongs to:: - - >>> type(Color.red) - - >>> isinstance(Color.green, Color) - True - >>> - -Enum members also have a property that contains just their item name:: - - >>> print(Color.red.name) - red - -Enumerations support iteration. In Python 3.x definition order is used; in -Python 2.x the definition order is not available, but class attribute -``__order__`` is supported; otherwise, value order is used:: - - >>> class Shake(Enum): - ... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x - ... vanilla = 7 - ... chocolate = 4 - ... cookies = 9 - ... mint = 3 - ... - >>> for shake in Shake: - ... print(shake) - ... - Shake.vanilla - Shake.chocolate - Shake.cookies - Shake.mint - -The ``__order__`` attribute is always removed, and in 3.x it is also ignored -(order is definition order); however, in the stdlib version it will be ignored -but not removed. - -Enumeration members are hashable, so they can be used in dictionaries and sets:: - - >>> apples = {} - >>> apples[Color.red] = 'red delicious' - >>> apples[Color.green] = 'granny smith' - >>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'} - True - - -Programmatic access to enumeration members and their attributes ---------------------------------------------------------------- - -Sometimes it's useful to access members in enumerations programmatically (i.e. -situations where ``Color.red`` won't do because the exact color is not known -at program-writing time). ``Enum`` allows such access:: - - >>> Color(1) - - >>> Color(3) - - -If you want to access enum members by *name*, use item access:: - - >>> Color['red'] - - >>> Color['green'] - - -If have an enum member and need its ``name`` or ``value``:: - - >>> member = Color.red - >>> member.name - 'red' - >>> member.value - 1 - - -Duplicating enum members and values ------------------------------------ - -Having two enum members (or any other attribute) with the same name is invalid; -in Python 3.x this would raise an error, but in Python 2.x the second member -simply overwrites the first:: - - >>> # python 2.x - >>> class Shape(Enum): - ... square = 2 - ... square = 3 - ... - >>> Shape.square - - - >>> # python 3.x - >>> class Shape(Enum): - ... square = 2 - ... square = 3 - Traceback (most recent call last): - ... - TypeError: Attempted to reuse key: 'square' - -However, two enum members are allowed to have the same value. Given two members -A and B with the same value (and A defined first), B is an alias to A. By-value -lookup of the value of A and B will return A. By-name lookup of B will also -return A:: - - >>> class Shape(Enum): - ... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x - ... square = 2 - ... diamond = 1 - ... circle = 3 - ... alias_for_square = 2 - ... - >>> Shape.square - - >>> Shape.alias_for_square - - >>> Shape(2) - - - -Allowing aliases is not always desirable. ``unique`` can be used to ensure -that none exist in a particular enumeration:: - - >>> from enum import unique - >>> @unique - ... class Mistake(Enum): - ... __order__ = 'one two three four' # only needed in 2.x - ... one = 1 - ... two = 2 - ... three = 3 - ... four = 3 - Traceback (most recent call last): - ... - ValueError: duplicate names found in : four -> three - -Iterating over the members of an enum does not provide the aliases:: - - >>> list(Shape) - [, , ] - -The special attribute ``__members__`` is a dictionary mapping names to members. -It includes all names defined in the enumeration, including the aliases:: - - >>> for name, member in sorted(Shape.__members__.items()): - ... name, member - ... - ('alias_for_square', ) - ('circle', ) - ('diamond', ) - ('square', ) - -The ``__members__`` attribute can be used for detailed programmatic access to -the enumeration members. For example, finding all the aliases:: - - >>> [name for name, member in Shape.__members__.items() if member.name != name] - ['alias_for_square'] - -Comparisons ------------ - -Enumeration members are compared by identity:: - - >>> Color.red is Color.red - True - >>> Color.red is Color.blue - False - >>> Color.red is not Color.blue - True - -Ordered comparisons between enumeration values are *not* supported. Enum -members are not integers (but see `IntEnum`_ below):: - - >>> Color.red < Color.blue - Traceback (most recent call last): - File "", line 1, in - TypeError: unorderable types: Color() < Color() - -.. warning:: - - In Python 2 *everything* is ordered, even though the ordering may not - make sense. If you want your enumerations to have a sensible ordering - check out the `OrderedEnum`_ recipe below. - - -Equality comparisons are defined though:: - - >>> Color.blue == Color.red - False - >>> Color.blue != Color.red - True - >>> Color.blue == Color.blue - True - -Comparisons against non-enumeration values will always compare not equal -(again, ``IntEnum`` was explicitly designed to behave differently, see -below):: - - >>> Color.blue == 2 - False - - -Allowed members and attributes of enumerations ----------------------------------------------- - -The examples above use integers for enumeration values. Using integers is -short and handy (and provided by default by the `Functional API`_), but not -strictly enforced. In the vast majority of use-cases, one doesn't care what -the actual value of an enumeration is. But if the value *is* important, -enumerations can have arbitrary values. - -Enumerations are Python classes, and can have methods and special methods as -usual. If we have this enumeration:: - - >>> class Mood(Enum): - ... funky = 1 - ... happy = 3 - ... - ... def describe(self): - ... # self is the member here - ... return self.name, self.value - ... - ... def __str__(self): - ... return 'my custom str! {0}'.format(self.value) - ... - ... @classmethod - ... def favorite_mood(cls): - ... # cls here is the enumeration - ... return cls.happy - -Then:: - - >>> Mood.favorite_mood() - - >>> Mood.happy.describe() - ('happy', 3) - >>> str(Mood.funky) - 'my custom str! 1' - -The rules for what is allowed are as follows: _sunder_ names (starting and -ending with a single underscore) are reserved by enum and cannot be used; -all other attributes defined within an enumeration will become members of this -enumeration, with the exception of *__dunder__* names and descriptors (methods -are also descriptors). - -Note: - - If your enumeration defines ``__new__`` and/or ``__init__`` then - whatever value(s) were given to the enum member will be passed into - those methods. See `Planet`_ for an example. - - -Restricted subclassing of enumerations --------------------------------------- - -Subclassing an enumeration is allowed only if the enumeration does not define -any members. So this is forbidden:: - - >>> class MoreColor(Color): - ... pink = 17 - Traceback (most recent call last): - ... - TypeError: Cannot extend enumerations - -But this is allowed:: - - >>> class Foo(Enum): - ... def some_behavior(self): - ... pass - ... - >>> class Bar(Foo): - ... happy = 1 - ... sad = 2 - ... - -Allowing subclassing of enums that define members would lead to a violation of -some important invariants of types and instances. On the other hand, it makes -sense to allow sharing some common behavior between a group of enumerations. -(See `OrderedEnum`_ for an example.) - - -Pickling --------- - -Enumerations can be pickled and unpickled:: - - >>> from enum.test_enum import Fruit - >>> from pickle import dumps, loads - >>> Fruit.tomato is loads(dumps(Fruit.tomato, 2)) - True - -The usual restrictions for pickling apply: picklable enums must be defined in -the top level of a module, since unpickling requires them to be importable -from that module. - -Note: - - With pickle protocol version 4 (introduced in Python 3.4) it is possible - to easily pickle enums nested in other classes. - - - -Functional API --------------- - -The ``Enum`` class is callable, providing the following functional API:: - - >>> Animal = Enum('Animal', 'ant bee cat dog') - >>> Animal - - >>> Animal.ant - - >>> Animal.ant.value - 1 - >>> list(Animal) - [, , , ] - -The semantics of this API resemble ``namedtuple``. The first argument -of the call to ``Enum`` is the name of the enumeration. - -The second argument is the *source* of enumeration member names. It can be a -whitespace-separated string of names, a sequence of names, a sequence of -2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to -values. The last two options enable assigning arbitrary values to -enumerations; the others auto-assign increasing integers starting with 1. A -new class derived from ``Enum`` is returned. In other words, the above -assignment to ``Animal`` is equivalent to:: - - >>> class Animals(Enum): - ... ant = 1 - ... bee = 2 - ... cat = 3 - ... dog = 4 - -Pickling enums created with the functional API can be tricky as frame stack -implementation details are used to try and figure out which module the -enumeration is being created in (e.g. it will fail if you use a utility -function in separate module, and also may not work on IronPython or Jython). -The solution is to specify the module name explicitly as follows:: - - >>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__) - -Derived Enumerations --------------------- - -IntEnum -^^^^^^^ - -A variation of ``Enum`` is provided which is also a subclass of -``int``. Members of an ``IntEnum`` can be compared to integers; -by extension, integer enumerations of different types can also be compared -to each other:: - - >>> from enum import IntEnum - >>> class Shape(IntEnum): - ... circle = 1 - ... square = 2 - ... - >>> class Request(IntEnum): - ... post = 1 - ... get = 2 - ... - >>> Shape == 1 - False - >>> Shape.circle == 1 - True - >>> Shape.circle == Request.post - True - -However, they still can't be compared to standard ``Enum`` enumerations:: - - >>> class Shape(IntEnum): - ... circle = 1 - ... square = 2 - ... - >>> class Color(Enum): - ... red = 1 - ... green = 2 - ... - >>> Shape.circle == Color.red - False - -``IntEnum`` values behave like integers in other ways you'd expect:: - - >>> int(Shape.circle) - 1 - >>> ['a', 'b', 'c'][Shape.circle] - 'b' - >>> [i for i in range(Shape.square)] - [0, 1] - -For the vast majority of code, ``Enum`` is strongly recommended, -since ``IntEnum`` breaks some semantic promises of an enumeration (by -being comparable to integers, and thus by transitivity to other -unrelated enumerations). It should be used only in special cases where -there's no other choice; for example, when integer constants are -replaced with enumerations and backwards compatibility is required with code -that still expects integers. - - -Others -^^^^^^ - -While ``IntEnum`` is part of the ``enum`` module, it would be very -simple to implement independently:: - - class IntEnum(int, Enum): - pass - -This demonstrates how similar derived enumerations can be defined; for example -a ``StrEnum`` that mixes in ``str`` instead of ``int``. - -Some rules: - -1. When subclassing ``Enum``, mix-in types must appear before - ``Enum`` itself in the sequence of bases, as in the ``IntEnum`` - example above. -2. While ``Enum`` can have members of any type, once you mix in an - additional type, all the members must have values of that type, e.g. - ``int`` above. This restriction does not apply to mix-ins which only - add methods and don't specify another data type such as ``int`` or - ``str``. -3. When another data type is mixed in, the ``value`` attribute is *not the - same* as the enum member itself, although it is equivalant and will compare - equal. -4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and - ``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for - IntEnum) treat the enum member as its mixed-in type. - - Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int`` - subclasses are printed as strings and not numbers when the ``%d``, ``%i``, - or ``%u`` codes are used. -5. ``str.__format__`` (or ``format``) will use the mixed-in - type's ``__format__``. If the ``Enum``'s ``str`` or - ``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes. - - -Decorators ----------- - -unique -^^^^^^ - -A ``class`` decorator specifically for enumerations. It searches an -enumeration's ``__members__`` gathering any aliases it finds; if any are -found ``ValueError`` is raised with the details:: - - >>> @unique - ... class NoDupes(Enum): - ... first = 'one' - ... second = 'two' - ... third = 'two' - Traceback (most recent call last): - ... - ValueError: duplicate names found in : third -> second - - -Interesting examples --------------------- - -While ``Enum`` and ``IntEnum`` are expected to cover the majority of -use-cases, they cannot cover them all. Here are recipes for some different -types of enumerations that can be used directly, or as examples for creating -one's own. - - -AutoNumber -^^^^^^^^^^ - -Avoids having to specify the value for each enumeration member:: - - >>> class AutoNumber(Enum): - ... def __new__(cls): - ... value = len(cls.__members__) + 1 - ... obj = object.__new__(cls) - ... obj._value_ = value - ... return obj - ... - >>> class Color(AutoNumber): - ... __order__ = "red green blue" # only needed in 2.x - ... red = () - ... green = () - ... blue = () - ... - >>> Color.green.value == 2 - True - -Note: - - The `__new__` method, if defined, is used during creation of the Enum - members; it is then replaced by Enum's `__new__` which is used after - class creation for lookup of existing members. Due to the way Enums are - supposed to behave, there is no way to customize Enum's `__new__`. - - -UniqueEnum -^^^^^^^^^^ - -Raises an error if a duplicate member name is found instead of creating an -alias:: - - >>> class UniqueEnum(Enum): - ... def __init__(self, *args): - ... cls = self.__class__ - ... if any(self.value == e.value for e in cls): - ... a = self.name - ... e = cls(self.value).name - ... raise ValueError( - ... "aliases not allowed in UniqueEnum: %r --> %r" - ... % (a, e)) - ... - >>> class Color(UniqueEnum): - ... red = 1 - ... green = 2 - ... blue = 3 - ... grene = 2 - Traceback (most recent call last): - ... - ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green' - - -OrderedEnum -^^^^^^^^^^^ - -An ordered enumeration that is not based on ``IntEnum`` and so maintains -the normal ``Enum`` invariants (such as not being comparable to other -enumerations):: - - >>> class OrderedEnum(Enum): - ... def __ge__(self, other): - ... if self.__class__ is other.__class__: - ... return self._value_ >= other._value_ - ... return NotImplemented - ... def __gt__(self, other): - ... if self.__class__ is other.__class__: - ... return self._value_ > other._value_ - ... return NotImplemented - ... def __le__(self, other): - ... if self.__class__ is other.__class__: - ... return self._value_ <= other._value_ - ... return NotImplemented - ... def __lt__(self, other): - ... if self.__class__ is other.__class__: - ... return self._value_ < other._value_ - ... return NotImplemented - ... - >>> class Grade(OrderedEnum): - ... __ordered__ = 'A B C D F' - ... A = 5 - ... B = 4 - ... C = 3 - ... D = 2 - ... F = 1 - ... - >>> Grade.C < Grade.A - True - - -Planet -^^^^^^ - -If ``__new__`` or ``__init__`` is defined the value of the enum member -will be passed to those methods:: - - >>> class Planet(Enum): - ... MERCURY = (3.303e+23, 2.4397e6) - ... VENUS = (4.869e+24, 6.0518e6) - ... EARTH = (5.976e+24, 6.37814e6) - ... MARS = (6.421e+23, 3.3972e6) - ... JUPITER = (1.9e+27, 7.1492e7) - ... SATURN = (5.688e+26, 6.0268e7) - ... URANUS = (8.686e+25, 2.5559e7) - ... NEPTUNE = (1.024e+26, 2.4746e7) - ... def __init__(self, mass, radius): - ... self.mass = mass # in kilograms - ... self.radius = radius # in meters - ... @property - ... def surface_gravity(self): - ... # universal gravitational constant (m3 kg-1 s-2) - ... G = 6.67300E-11 - ... return G * self.mass / (self.radius * self.radius) - ... - >>> Planet.EARTH.value - (5.976e+24, 6378140.0) - >>> Planet.EARTH.surface_gravity - 9.802652743337129 - - -How are Enums different? ------------------------- - -Enums have a custom metaclass that affects many aspects of both derived Enum -classes and their instances (members). - - -Enum Classes -^^^^^^^^^^^^ - -The ``EnumMeta`` metaclass is responsible for providing the -``__contains__``, ``__dir__``, ``__iter__`` and other methods that -allow one to do things with an ``Enum`` class that fail on a typical -class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is -responsible for ensuring that various other methods on the final ``Enum`` -class are correct (such as ``__new__``, ``__getnewargs__``, -``__str__`` and ``__repr__``) - - -Enum Members (aka instances) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The most interesting thing about Enum members is that they are singletons. -``EnumMeta`` creates them all while it is creating the ``Enum`` -class itself, and then puts a custom ``__new__`` in place to ensure -that no new ones are ever instantiated by returning only the existing -member instances. - - -Finer Points -^^^^^^^^^^^^ - -Enum members are instances of an Enum class, and even though they are -accessible as ``EnumClass.member``, they are not accessible directly from -the member:: - - >>> Color.red - - >>> Color.red.blue - Traceback (most recent call last): - ... - AttributeError: 'Color' object has no attribute 'blue' - -Likewise, ``__members__`` is only available on the class. - -In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being -the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if -``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python -2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified -as the ``OrderedDict`` type didn't exist yet. - -If you give your ``Enum`` subclass extra methods, like the `Planet`_ -class above, those methods will show up in a `dir` of the member, -but not of the class:: - - >>> dir(Planet) - ['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS', - 'VENUS', '__class__', '__doc__', '__members__', '__module__'] - >>> dir(Planet.EARTH) - ['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value'] - -A ``__new__`` method will only be used for the creation of the -``Enum`` members -- after that it is replaced. This means if you wish to -change how ``Enum`` members are looked up you either have to write a -helper function or a ``classmethod``. diff --git a/lib/enum/enum.py b/lib/enum/enum.py deleted file mode 100644 index 6a327a8a..00000000 --- a/lib/enum/enum.py +++ /dev/null @@ -1,790 +0,0 @@ -"""Python Enumerations""" - -import sys as _sys - -__all__ = ['Enum', 'IntEnum', 'unique'] - -version = 1, 0, 4 - -pyver = float('%s.%s' % _sys.version_info[:2]) - -try: - any -except NameError: - def any(iterable): - for element in iterable: - if element: - return True - return False - -try: - from collections import OrderedDict -except ImportError: - OrderedDict = None - -try: - basestring -except NameError: - # In Python 2 basestring is the ancestor of both str and unicode - # in Python 3 it's just str, but was missing in 3.1 - basestring = str - -try: - unicode -except NameError: - # In Python 3 unicode no longer exists (it's just str) - unicode = str - -class _RouteClassAttributeToGetattr(object): - """Route attribute access on a class to __getattr__. - - This is a descriptor, used to define attributes that act differently when - accessed through an instance and through a class. Instance access remains - normal, but access to an attribute through a class will be routed to the - class's __getattr__ method; this is done by raising AttributeError. - - """ - def __init__(self, fget=None): - self.fget = fget - - def __get__(self, instance, ownerclass=None): - if instance is None: - raise AttributeError() - return self.fget(instance) - - def __set__(self, instance, value): - raise AttributeError("can't set attribute") - - def __delete__(self, instance): - raise AttributeError("can't delete attribute") - - -def _is_descriptor(obj): - """Returns True if obj is a descriptor, False otherwise.""" - return ( - hasattr(obj, '__get__') or - hasattr(obj, '__set__') or - hasattr(obj, '__delete__')) - - -def _is_dunder(name): - """Returns True if a __dunder__ name, False otherwise.""" - return (name[:2] == name[-2:] == '__' and - name[2:3] != '_' and - name[-3:-2] != '_' and - len(name) > 4) - - -def _is_sunder(name): - """Returns True if a _sunder_ name, False otherwise.""" - return (name[0] == name[-1] == '_' and - name[1:2] != '_' and - name[-2:-1] != '_' and - len(name) > 2) - - -def _make_class_unpicklable(cls): - """Make the given class un-picklable.""" - def _break_on_call_reduce(self, protocol=None): - raise TypeError('%r cannot be pickled' % self) - cls.__reduce_ex__ = _break_on_call_reduce - cls.__module__ = '' - - -class _EnumDict(dict): - """Track enum member order and ensure member names are not reused. - - EnumMeta will use the names found in self._member_names as the - enumeration member names. - - """ - def __init__(self): - super(_EnumDict, self).__init__() - self._member_names = [] - - def __setitem__(self, key, value): - """Changes anything not dundered or not a descriptor. - - If a descriptor is added with the same name as an enum member, the name - is removed from _member_names (this may leave a hole in the numerical - sequence of values). - - If an enum member name is used twice, an error is raised; duplicate - values are not checked for. - - Single underscore (sunder) names are reserved. - - Note: in 3.x __order__ is simply discarded as a not necessary piece - leftover from 2.x - - """ - if pyver >= 3.0 and key == '__order__': - return - if _is_sunder(key): - raise ValueError('_names_ are reserved for future Enum use') - elif _is_dunder(key): - pass - elif key in self._member_names: - # descriptor overwriting an enum? - raise TypeError('Attempted to reuse key: %r' % key) - elif not _is_descriptor(value): - if key in self: - # enum overwriting a descriptor? - raise TypeError('Key already defined as: %r' % self[key]) - self._member_names.append(key) - super(_EnumDict, self).__setitem__(key, value) - - -# Dummy value for Enum as EnumMeta explicity checks for it, but of course until -# EnumMeta finishes running the first time the Enum class doesn't exist. This -# is also why there are checks in EnumMeta like `if Enum is not None` -Enum = None - - -class EnumMeta(type): - """Metaclass for Enum""" - @classmethod - def __prepare__(metacls, cls, bases): - return _EnumDict() - - def __new__(metacls, cls, bases, classdict): - # an Enum class is final once enumeration items have been defined; it - # cannot be mixed with other types (int, float, etc.) if it has an - # inherited __new__ unless a new __new__ is defined (or the resulting - # class will fail). - if type(classdict) is dict: - original_dict = classdict - classdict = _EnumDict() - for k, v in original_dict.items(): - classdict[k] = v - - member_type, first_enum = metacls._get_mixins_(bases) - __new__, save_new, use_args = metacls._find_new_(classdict, member_type, - first_enum) - # save enum items into separate mapping so they don't get baked into - # the new class - members = dict((k, classdict[k]) for k in classdict._member_names) - for name in classdict._member_names: - del classdict[name] - - # py2 support for definition order - __order__ = classdict.get('__order__') - if __order__ is None: - if pyver < 3.0: - try: - __order__ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])] - except TypeError: - __order__ = [name for name in sorted(members.keys())] - else: - __order__ = classdict._member_names - else: - del classdict['__order__'] - if pyver < 3.0: - __order__ = __order__.replace(',', ' ').split() - aliases = [name for name in members if name not in __order__] - __order__ += aliases - - # check for illegal enum names (any others?) - invalid_names = set(members) & set(['mro']) - if invalid_names: - raise ValueError('Invalid enum member name(s): %s' % ( - ', '.join(invalid_names), )) - - # create our new Enum type - enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict) - enum_class._member_names_ = [] # names in random order - if OrderedDict is not None: - enum_class._member_map_ = OrderedDict() - else: - enum_class._member_map_ = {} # name->value map - enum_class._member_type_ = member_type - - # Reverse value->name map for hashable values. - enum_class._value2member_map_ = {} - - # instantiate them, checking for duplicates as we go - # we instantiate first instead of checking for duplicates first in case - # a custom __new__ is doing something funky with the values -- such as - # auto-numbering ;) - if __new__ is None: - __new__ = enum_class.__new__ - for member_name in __order__: - value = members[member_name] - if not isinstance(value, tuple): - args = (value, ) - else: - args = value - if member_type is tuple: # special case for tuple enums - args = (args, ) # wrap it one more time - if not use_args or not args: - enum_member = __new__(enum_class) - if not hasattr(enum_member, '_value_'): - enum_member._value_ = value - else: - enum_member = __new__(enum_class, *args) - if not hasattr(enum_member, '_value_'): - enum_member._value_ = member_type(*args) - value = enum_member._value_ - enum_member._name_ = member_name - enum_member.__objclass__ = enum_class - enum_member.__init__(*args) - # If another member with the same value was already defined, the - # new member becomes an alias to the existing one. - for name, canonical_member in enum_class._member_map_.items(): - if canonical_member.value == enum_member._value_: - enum_member = canonical_member - break - else: - # Aliases don't appear in member names (only in __members__). - enum_class._member_names_.append(member_name) - enum_class._member_map_[member_name] = enum_member - try: - # This may fail if value is not hashable. We can't add the value - # to the map, and by-value lookups for this value will be - # linear. - enum_class._value2member_map_[value] = enum_member - except TypeError: - pass - - - # If a custom type is mixed into the Enum, and it does not know how - # to pickle itself, pickle.dumps will succeed but pickle.loads will - # fail. Rather than have the error show up later and possibly far - # from the source, sabotage the pickle protocol for this class so - # that pickle.dumps also fails. - # - # However, if the new class implements its own __reduce_ex__, do not - # sabotage -- it's on them to make sure it works correctly. We use - # __reduce_ex__ instead of any of the others as it is preferred by - # pickle over __reduce__, and it handles all pickle protocols. - unpicklable = False - if '__reduce_ex__' not in classdict: - if member_type is not object: - methods = ('__getnewargs_ex__', '__getnewargs__', - '__reduce_ex__', '__reduce__') - if not any(m in member_type.__dict__ for m in methods): - _make_class_unpicklable(enum_class) - unpicklable = True - - - # double check that repr and friends are not the mixin's or various - # things break (such as pickle) - for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): - class_method = getattr(enum_class, name) - obj_method = getattr(member_type, name, None) - enum_method = getattr(first_enum, name, None) - if name not in classdict and class_method is not enum_method: - if name == '__reduce_ex__' and unpicklable: - continue - setattr(enum_class, name, enum_method) - - # method resolution and int's are not playing nice - # Python's less than 2.6 use __cmp__ - - if pyver < 2.6: - - if issubclass(enum_class, int): - setattr(enum_class, '__cmp__', getattr(int, '__cmp__')) - - elif pyver < 3.0: - - if issubclass(enum_class, int): - for method in ( - '__le__', - '__lt__', - '__gt__', - '__ge__', - '__eq__', - '__ne__', - '__hash__', - ): - setattr(enum_class, method, getattr(int, method)) - - # replace any other __new__ with our own (as long as Enum is not None, - # anyway) -- again, this is to support pickle - if Enum is not None: - # if the user defined their own __new__, save it before it gets - # clobbered in case they subclass later - if save_new: - setattr(enum_class, '__member_new__', enum_class.__dict__['__new__']) - setattr(enum_class, '__new__', Enum.__dict__['__new__']) - return enum_class - - def __call__(cls, value, names=None, module=None, type=None): - """Either returns an existing member, or creates a new enum class. - - This method is used both when an enum class is given a value to match - to an enumeration member (i.e. Color(3)) and for the functional API - (i.e. Color = Enum('Color', names='red green blue')). - - When used for the functional API: `module`, if set, will be stored in - the new class' __module__ attribute; `type`, if set, will be mixed in - as the first base class. - - Note: if `module` is not set this routine will attempt to discover the - calling module by walking the frame stack; if this is unsuccessful - the resulting class will not be pickleable. - - """ - if names is None: # simple value lookup - return cls.__new__(cls, value) - # otherwise, functional API: we're creating a new Enum type - return cls._create_(value, names, module=module, type=type) - - def __contains__(cls, member): - return isinstance(member, cls) and member.name in cls._member_map_ - - def __delattr__(cls, attr): - # nicer error message when someone tries to delete an attribute - # (see issue19025). - if attr in cls._member_map_: - raise AttributeError( - "%s: cannot delete Enum member." % cls.__name__) - super(EnumMeta, cls).__delattr__(attr) - - def __dir__(self): - return (['__class__', '__doc__', '__members__', '__module__'] + - self._member_names_) - - @property - def __members__(cls): - """Returns a mapping of member name->value. - - This mapping lists all enum members, including aliases. Note that this - is a copy of the internal mapping. - - """ - return cls._member_map_.copy() - - def __getattr__(cls, name): - """Return the enum member matching `name` - - We use __getattr__ instead of descriptors or inserting into the enum - class' __dict__ in order to support `name` and `value` being both - properties for enum members (which live in the class' __dict__) and - enum members themselves. - - """ - if _is_dunder(name): - raise AttributeError(name) - try: - return cls._member_map_[name] - except KeyError: - raise AttributeError(name) - - def __getitem__(cls, name): - return cls._member_map_[name] - - def __iter__(cls): - return (cls._member_map_[name] for name in cls._member_names_) - - def __reversed__(cls): - return (cls._member_map_[name] for name in reversed(cls._member_names_)) - - def __len__(cls): - return len(cls._member_names_) - - def __repr__(cls): - return "" % cls.__name__ - - def __setattr__(cls, name, value): - """Block attempts to reassign Enum members. - - A simple assignment to the class namespace only changes one of the - several possible ways to get an Enum member from the Enum class, - resulting in an inconsistent Enumeration. - - """ - member_map = cls.__dict__.get('_member_map_', {}) - if name in member_map: - raise AttributeError('Cannot reassign members.') - super(EnumMeta, cls).__setattr__(name, value) - - def _create_(cls, class_name, names=None, module=None, type=None): - """Convenience method to create a new Enum class. - - `names` can be: - - * A string containing member names, separated either with spaces or - commas. Values are auto-numbered from 1. - * An iterable of member names. Values are auto-numbered from 1. - * An iterable of (member name, value) pairs. - * A mapping of member name -> value. - - """ - if pyver < 3.0: - # if class_name is unicode, attempt a conversion to ASCII - if isinstance(class_name, unicode): - try: - class_name = class_name.encode('ascii') - except UnicodeEncodeError: - raise TypeError('%r is not representable in ASCII' % class_name) - metacls = cls.__class__ - if type is None: - bases = (cls, ) - else: - bases = (type, cls) - classdict = metacls.__prepare__(class_name, bases) - __order__ = [] - - # special processing needed for names? - if isinstance(names, basestring): - names = names.replace(',', ' ').split() - if isinstance(names, (tuple, list)) and isinstance(names[0], basestring): - names = [(e, i+1) for (i, e) in enumerate(names)] - - # Here, names is either an iterable of (name, value) or a mapping. - for item in names: - if isinstance(item, basestring): - member_name, member_value = item, names[item] - else: - member_name, member_value = item - classdict[member_name] = member_value - __order__.append(member_name) - # only set __order__ in classdict if name/value was not from a mapping - if not isinstance(item, basestring): - classdict['__order__'] = ' '.join(__order__) - enum_class = metacls.__new__(metacls, class_name, bases, classdict) - - # TODO: replace the frame hack if a blessed way to know the calling - # module is ever developed - if module is None: - try: - module = _sys._getframe(2).f_globals['__name__'] - except (AttributeError, ValueError): - pass - if module is None: - _make_class_unpicklable(enum_class) - else: - enum_class.__module__ = module - - return enum_class - - @staticmethod - def _get_mixins_(bases): - """Returns the type for creating enum members, and the first inherited - enum class. - - bases: the tuple of bases that was given to __new__ - - """ - if not bases or Enum is None: - return object, Enum - - - # double check that we are not subclassing a class with existing - # enumeration members; while we're at it, see if any other data - # type has been mixed in so we can use the correct __new__ - member_type = first_enum = None - for base in bases: - if (base is not Enum and - issubclass(base, Enum) and - base._member_names_): - raise TypeError("Cannot extend enumerations") - # base is now the last base in bases - if not issubclass(base, Enum): - raise TypeError("new enumerations must be created as " - "`ClassName([mixin_type,] enum_type)`") - - # get correct mix-in type (either mix-in type of Enum subclass, or - # first base if last base is Enum) - if not issubclass(bases[0], Enum): - member_type = bases[0] # first data type - first_enum = bases[-1] # enum type - else: - for base in bases[0].__mro__: - # most common: (IntEnum, int, Enum, object) - # possible: (, , - # , , - # ) - if issubclass(base, Enum): - if first_enum is None: - first_enum = base - else: - if member_type is None: - member_type = base - - return member_type, first_enum - - if pyver < 3.0: - @staticmethod - def _find_new_(classdict, member_type, first_enum): - """Returns the __new__ to be used for creating the enum members. - - classdict: the class dictionary given to __new__ - member_type: the data type whose __new__ will be used by default - first_enum: enumeration to check for an overriding __new__ - - """ - # now find the correct __new__, checking to see of one was defined - # by the user; also check earlier enum classes in case a __new__ was - # saved as __member_new__ - __new__ = classdict.get('__new__', None) - if __new__: - return None, True, True # __new__, save_new, use_args - - N__new__ = getattr(None, '__new__') - O__new__ = getattr(object, '__new__') - if Enum is None: - E__new__ = N__new__ - else: - E__new__ = Enum.__dict__['__new__'] - # check all possibles for __member_new__ before falling back to - # __new__ - for method in ('__member_new__', '__new__'): - for possible in (member_type, first_enum): - try: - target = possible.__dict__[method] - except (AttributeError, KeyError): - target = getattr(possible, method, None) - if target not in [ - None, - N__new__, - O__new__, - E__new__, - ]: - if method == '__member_new__': - classdict['__new__'] = target - return None, False, True - if isinstance(target, staticmethod): - target = target.__get__(member_type) - __new__ = target - break - if __new__ is not None: - break - else: - __new__ = object.__new__ - - # if a non-object.__new__ is used then whatever value/tuple was - # assigned to the enum member name will be passed to __new__ and to the - # new enum member's __init__ - if __new__ is object.__new__: - use_args = False - else: - use_args = True - - return __new__, False, use_args - else: - @staticmethod - def _find_new_(classdict, member_type, first_enum): - """Returns the __new__ to be used for creating the enum members. - - classdict: the class dictionary given to __new__ - member_type: the data type whose __new__ will be used by default - first_enum: enumeration to check for an overriding __new__ - - """ - # now find the correct __new__, checking to see of one was defined - # by the user; also check earlier enum classes in case a __new__ was - # saved as __member_new__ - __new__ = classdict.get('__new__', None) - - # should __new__ be saved as __member_new__ later? - save_new = __new__ is not None - - if __new__ is None: - # check all possibles for __member_new__ before falling back to - # __new__ - for method in ('__member_new__', '__new__'): - for possible in (member_type, first_enum): - target = getattr(possible, method, None) - if target not in ( - None, - None.__new__, - object.__new__, - Enum.__new__, - ): - __new__ = target - break - if __new__ is not None: - break - else: - __new__ = object.__new__ - - # if a non-object.__new__ is used then whatever value/tuple was - # assigned to the enum member name will be passed to __new__ and to the - # new enum member's __init__ - if __new__ is object.__new__: - use_args = False - else: - use_args = True - - return __new__, save_new, use_args - - -######################################################## -# In order to support Python 2 and 3 with a single -# codebase we have to create the Enum methods separately -# and then use the `type(name, bases, dict)` method to -# create the class. -######################################################## -temp_enum_dict = {} -temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n" - -def __new__(cls, value): - # all enum instances are actually created during class construction - # without calling this method; this method is called by the metaclass' - # __call__ (i.e. Color(3) ), and by pickle - if type(value) is cls: - # For lookups like Color(Color.red) - value = value.value - #return value - # by-value search for a matching enum member - # see if it's in the reverse mapping (for hashable values) - try: - if value in cls._value2member_map_: - return cls._value2member_map_[value] - except TypeError: - # not there, now do long search -- O(n) behavior - for member in cls._member_map_.values(): - if member.value == value: - return member - raise ValueError("%s is not a valid %s" % (value, cls.__name__)) -temp_enum_dict['__new__'] = __new__ -del __new__ - -def __repr__(self): - return "<%s.%s: %r>" % ( - self.__class__.__name__, self._name_, self._value_) -temp_enum_dict['__repr__'] = __repr__ -del __repr__ - -def __str__(self): - return "%s.%s" % (self.__class__.__name__, self._name_) -temp_enum_dict['__str__'] = __str__ -del __str__ - -def __dir__(self): - added_behavior = [ - m - for cls in self.__class__.mro() - for m in cls.__dict__ - if m[0] != '_' - ] - return (['__class__', '__doc__', '__module__', ] + added_behavior) -temp_enum_dict['__dir__'] = __dir__ -del __dir__ - -def __format__(self, format_spec): - # mixed-in Enums should use the mixed-in type's __format__, otherwise - # we can get strange results with the Enum name showing up instead of - # the value - - # pure Enum branch - if self._member_type_ is object: - cls = str - val = str(self) - # mix-in branch - else: - cls = self._member_type_ - val = self.value - return cls.__format__(val, format_spec) -temp_enum_dict['__format__'] = __format__ -del __format__ - - -#################################### -# Python's less than 2.6 use __cmp__ - -if pyver < 2.6: - - def __cmp__(self, other): - if type(other) is self.__class__: - if self is other: - return 0 - return -1 - return NotImplemented - raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__cmp__'] = __cmp__ - del __cmp__ - -else: - - def __le__(self, other): - raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__le__'] = __le__ - del __le__ - - def __lt__(self, other): - raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__lt__'] = __lt__ - del __lt__ - - def __ge__(self, other): - raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__ge__'] = __ge__ - del __ge__ - - def __gt__(self, other): - raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__gt__'] = __gt__ - del __gt__ - - -def __eq__(self, other): - if type(other) is self.__class__: - return self is other - return NotImplemented -temp_enum_dict['__eq__'] = __eq__ -del __eq__ - -def __ne__(self, other): - if type(other) is self.__class__: - return self is not other - return NotImplemented -temp_enum_dict['__ne__'] = __ne__ -del __ne__ - -def __hash__(self): - return hash(self._name_) -temp_enum_dict['__hash__'] = __hash__ -del __hash__ - -def __reduce_ex__(self, proto): - return self.__class__, (self._value_, ) -temp_enum_dict['__reduce_ex__'] = __reduce_ex__ -del __reduce_ex__ - -# _RouteClassAttributeToGetattr is used to provide access to the `name` -# and `value` properties of enum members while keeping some measure of -# protection from modification, while still allowing for an enumeration -# to have members named `name` and `value`. This works because enumeration -# members are not set directly on the enum class -- __getattr__ is -# used to look them up. - -@_RouteClassAttributeToGetattr -def name(self): - return self._name_ -temp_enum_dict['name'] = name -del name - -@_RouteClassAttributeToGetattr -def value(self): - return self._value_ -temp_enum_dict['value'] = value -del value - -Enum = EnumMeta('Enum', (object, ), temp_enum_dict) -del temp_enum_dict - -# Enum has now been created -########################### - -class IntEnum(int, Enum): - """Enum where members are also (and must be) ints""" - - -def unique(enumeration): - """Class decorator that ensures only unique members exist in an enumeration.""" - duplicates = [] - for name, member in enumeration.__members__.items(): - if name != member.name: - duplicates.append((name, member.name)) - if duplicates: - duplicate_names = ', '.join( - ["%s -> %s" % (alias, name) for (alias, name) in duplicates] - ) - raise ValueError('duplicate names found in %r: %s' % - (enumeration, duplicate_names) - ) - return enumeration diff --git a/lib/enum/test_enum.py b/lib/enum/test_enum.py deleted file mode 100644 index d7a97942..00000000 --- a/lib/enum/test_enum.py +++ /dev/null @@ -1,1690 +0,0 @@ -import enum -import sys -import unittest -from enum import Enum, IntEnum, unique, EnumMeta -from pickle import dumps, loads, PicklingError, HIGHEST_PROTOCOL - -pyver = float('%s.%s' % sys.version_info[:2]) - -try: - any -except NameError: - def any(iterable): - for element in iterable: - if element: - return True - return False - -try: - unicode -except NameError: - unicode = str - -try: - from collections import OrderedDict -except ImportError: - OrderedDict = None - -# for pickle tests -try: - class Stooges(Enum): - LARRY = 1 - CURLY = 2 - MOE = 3 -except Exception: - Stooges = sys.exc_info()[1] - -try: - class IntStooges(int, Enum): - LARRY = 1 - CURLY = 2 - MOE = 3 -except Exception: - IntStooges = sys.exc_info()[1] - -try: - class FloatStooges(float, Enum): - LARRY = 1.39 - CURLY = 2.72 - MOE = 3.142596 -except Exception: - FloatStooges = sys.exc_info()[1] - -# for pickle test and subclass tests -try: - class StrEnum(str, Enum): - 'accepts only string values' - class Name(StrEnum): - BDFL = 'Guido van Rossum' - FLUFL = 'Barry Warsaw' -except Exception: - Name = sys.exc_info()[1] - -try: - Question = Enum('Question', 'who what when where why', module=__name__) -except Exception: - Question = sys.exc_info()[1] - -try: - Answer = Enum('Answer', 'him this then there because') -except Exception: - Answer = sys.exc_info()[1] - -try: - Theory = Enum('Theory', 'rule law supposition', qualname='spanish_inquisition') -except Exception: - Theory = sys.exc_info()[1] - -# for doctests -try: - class Fruit(Enum): - tomato = 1 - banana = 2 - cherry = 3 -except Exception: - pass - -def test_pickle_dump_load(assertion, source, target=None, - protocol=(0, HIGHEST_PROTOCOL)): - start, stop = protocol - failures = [] - for protocol in range(start, stop+1): - try: - if target is None: - assertion(loads(dumps(source, protocol=protocol)) is source) - else: - assertion(loads(dumps(source, protocol=protocol)), target) - except Exception: - exc, tb = sys.exc_info()[1:] - failures.append('%2d: %s' %(protocol, exc)) - if failures: - raise ValueError('Failed with protocols: %s' % ', '.join(failures)) - -def test_pickle_exception(assertion, exception, obj, - protocol=(0, HIGHEST_PROTOCOL)): - start, stop = protocol - failures = [] - for protocol in range(start, stop+1): - try: - assertion(exception, dumps, obj, protocol=protocol) - except Exception: - exc = sys.exc_info()[1] - failures.append('%d: %s %s' % (protocol, exc.__class__.__name__, exc)) - if failures: - raise ValueError('Failed with protocols: %s' % ', '.join(failures)) - - -class TestHelpers(unittest.TestCase): - # _is_descriptor, _is_sunder, _is_dunder - - def test_is_descriptor(self): - class foo: - pass - for attr in ('__get__','__set__','__delete__'): - obj = foo() - self.assertFalse(enum._is_descriptor(obj)) - setattr(obj, attr, 1) - self.assertTrue(enum._is_descriptor(obj)) - - def test_is_sunder(self): - for s in ('_a_', '_aa_'): - self.assertTrue(enum._is_sunder(s)) - - for s in ('a', 'a_', '_a', '__a', 'a__', '__a__', '_a__', '__a_', '_', - '__', '___', '____', '_____',): - self.assertFalse(enum._is_sunder(s)) - - def test_is_dunder(self): - for s in ('__a__', '__aa__'): - self.assertTrue(enum._is_dunder(s)) - for s in ('a', 'a_', '_a', '__a', 'a__', '_a_', '_a__', '__a_', '_', - '__', '___', '____', '_____',): - self.assertFalse(enum._is_dunder(s)) - - -class TestEnum(unittest.TestCase): - def setUp(self): - class Season(Enum): - SPRING = 1 - SUMMER = 2 - AUTUMN = 3 - WINTER = 4 - self.Season = Season - - class Konstants(float, Enum): - E = 2.7182818 - PI = 3.1415926 - TAU = 2 * PI - self.Konstants = Konstants - - class Grades(IntEnum): - A = 5 - B = 4 - C = 3 - D = 2 - F = 0 - self.Grades = Grades - - class Directional(str, Enum): - EAST = 'east' - WEST = 'west' - NORTH = 'north' - SOUTH = 'south' - self.Directional = Directional - - from datetime import date - class Holiday(date, Enum): - NEW_YEAR = 2013, 1, 1 - IDES_OF_MARCH = 2013, 3, 15 - self.Holiday = Holiday - - if pyver >= 2.6: # cannot specify custom `dir` on previous versions - def test_dir_on_class(self): - Season = self.Season - self.assertEqual( - set(dir(Season)), - set(['__class__', '__doc__', '__members__', '__module__', - 'SPRING', 'SUMMER', 'AUTUMN', 'WINTER']), - ) - - def test_dir_on_item(self): - Season = self.Season - self.assertEqual( - set(dir(Season.WINTER)), - set(['__class__', '__doc__', '__module__', 'name', 'value']), - ) - - def test_dir_on_sub_with_behavior_on_super(self): - # see issue22506 - class SuperEnum(Enum): - def invisible(self): - return "did you see me?" - class SubEnum(SuperEnum): - sample = 5 - self.assertEqual( - set(dir(SubEnum.sample)), - set(['__class__', '__doc__', '__module__', 'name', 'value', 'invisible']), - ) - - if pyver >= 2.7: # OrderedDict first available here - def test_members_is_ordereddict_if_ordered(self): - class Ordered(Enum): - __order__ = 'first second third' - first = 'bippity' - second = 'boppity' - third = 'boo' - self.assertTrue(type(Ordered.__members__) is OrderedDict) - - def test_members_is_ordereddict_if_not_ordered(self): - class Unordered(Enum): - this = 'that' - these = 'those' - self.assertTrue(type(Unordered.__members__) is OrderedDict) - - if pyver >= 3.0: # all objects are ordered in Python 2.x - def test_members_is_always_ordered(self): - class AlwaysOrdered(Enum): - first = 1 - second = 2 - third = 3 - self.assertTrue(type(AlwaysOrdered.__members__) is OrderedDict) - - def test_comparisons(self): - def bad_compare(): - Season.SPRING > 4 - Season = self.Season - self.assertNotEqual(Season.SPRING, 1) - self.assertRaises(TypeError, bad_compare) - - class Part(Enum): - SPRING = 1 - CLIP = 2 - BARREL = 3 - - self.assertNotEqual(Season.SPRING, Part.SPRING) - def bad_compare(): - Season.SPRING < Part.CLIP - self.assertRaises(TypeError, bad_compare) - - def test_enum_in_enum_out(self): - Season = self.Season - self.assertTrue(Season(Season.WINTER) is Season.WINTER) - - def test_enum_value(self): - Season = self.Season - self.assertEqual(Season.SPRING.value, 1) - - def test_intenum_value(self): - self.assertEqual(IntStooges.CURLY.value, 2) - - def test_enum(self): - Season = self.Season - lst = list(Season) - self.assertEqual(len(lst), len(Season)) - self.assertEqual(len(Season), 4, Season) - self.assertEqual( - [Season.SPRING, Season.SUMMER, Season.AUTUMN, Season.WINTER], lst) - - for i, season in enumerate('SPRING SUMMER AUTUMN WINTER'.split()): - i += 1 - e = Season(i) - self.assertEqual(e, getattr(Season, season)) - self.assertEqual(e.value, i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, season) - self.assertTrue(e in Season) - self.assertTrue(type(e) is Season) - self.assertTrue(isinstance(e, Season)) - self.assertEqual(str(e), 'Season.' + season) - self.assertEqual( - repr(e), - '' % (season, i), - ) - - def test_value_name(self): - Season = self.Season - self.assertEqual(Season.SPRING.name, 'SPRING') - self.assertEqual(Season.SPRING.value, 1) - def set_name(obj, new_value): - obj.name = new_value - def set_value(obj, new_value): - obj.value = new_value - self.assertRaises(AttributeError, set_name, Season.SPRING, 'invierno', ) - self.assertRaises(AttributeError, set_value, Season.SPRING, 2) - - def test_attribute_deletion(self): - class Season(Enum): - SPRING = 1 - SUMMER = 2 - AUTUMN = 3 - WINTER = 4 - - def spam(cls): - pass - - self.assertTrue(hasattr(Season, 'spam')) - del Season.spam - self.assertFalse(hasattr(Season, 'spam')) - - self.assertRaises(AttributeError, delattr, Season, 'SPRING') - self.assertRaises(AttributeError, delattr, Season, 'DRY') - self.assertRaises(AttributeError, delattr, Season.SPRING, 'name') - - def test_invalid_names(self): - def create_bad_class_1(): - class Wrong(Enum): - mro = 9 - def create_bad_class_2(): - class Wrong(Enum): - _reserved_ = 3 - self.assertRaises(ValueError, create_bad_class_1) - self.assertRaises(ValueError, create_bad_class_2) - - def test_contains(self): - Season = self.Season - self.assertTrue(Season.AUTUMN in Season) - self.assertTrue(3 not in Season) - - val = Season(3) - self.assertTrue(val in Season) - - class OtherEnum(Enum): - one = 1; two = 2 - self.assertTrue(OtherEnum.two not in Season) - - if pyver >= 2.6: # when `format` came into being - - def test_format_enum(self): - Season = self.Season - self.assertEqual('{0}'.format(Season.SPRING), - '{0}'.format(str(Season.SPRING))) - self.assertEqual( '{0:}'.format(Season.SPRING), - '{0:}'.format(str(Season.SPRING))) - self.assertEqual('{0:20}'.format(Season.SPRING), - '{0:20}'.format(str(Season.SPRING))) - self.assertEqual('{0:^20}'.format(Season.SPRING), - '{0:^20}'.format(str(Season.SPRING))) - self.assertEqual('{0:>20}'.format(Season.SPRING), - '{0:>20}'.format(str(Season.SPRING))) - self.assertEqual('{0:<20}'.format(Season.SPRING), - '{0:<20}'.format(str(Season.SPRING))) - - def test_format_enum_custom(self): - class TestFloat(float, Enum): - one = 1.0 - two = 2.0 - def __format__(self, spec): - return 'TestFloat success!' - self.assertEqual('{0}'.format(TestFloat.one), 'TestFloat success!') - - def assertFormatIsValue(self, spec, member): - self.assertEqual(spec.format(member), spec.format(member.value)) - - def test_format_enum_date(self): - Holiday = self.Holiday - self.assertFormatIsValue('{0}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:20}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:^20}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:>20}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:<20}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:%Y %m}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:%Y %m %M:00}', Holiday.IDES_OF_MARCH) - - def test_format_enum_float(self): - Konstants = self.Konstants - self.assertFormatIsValue('{0}', Konstants.TAU) - self.assertFormatIsValue('{0:}', Konstants.TAU) - self.assertFormatIsValue('{0:20}', Konstants.TAU) - self.assertFormatIsValue('{0:^20}', Konstants.TAU) - self.assertFormatIsValue('{0:>20}', Konstants.TAU) - self.assertFormatIsValue('{0:<20}', Konstants.TAU) - self.assertFormatIsValue('{0:n}', Konstants.TAU) - self.assertFormatIsValue('{0:5.2}', Konstants.TAU) - self.assertFormatIsValue('{0:f}', Konstants.TAU) - - def test_format_enum_int(self): - Grades = self.Grades - self.assertFormatIsValue('{0}', Grades.C) - self.assertFormatIsValue('{0:}', Grades.C) - self.assertFormatIsValue('{0:20}', Grades.C) - self.assertFormatIsValue('{0:^20}', Grades.C) - self.assertFormatIsValue('{0:>20}', Grades.C) - self.assertFormatIsValue('{0:<20}', Grades.C) - self.assertFormatIsValue('{0:+}', Grades.C) - self.assertFormatIsValue('{0:08X}', Grades.C) - self.assertFormatIsValue('{0:b}', Grades.C) - - def test_format_enum_str(self): - Directional = self.Directional - self.assertFormatIsValue('{0}', Directional.WEST) - self.assertFormatIsValue('{0:}', Directional.WEST) - self.assertFormatIsValue('{0:20}', Directional.WEST) - self.assertFormatIsValue('{0:^20}', Directional.WEST) - self.assertFormatIsValue('{0:>20}', Directional.WEST) - self.assertFormatIsValue('{0:<20}', Directional.WEST) - - def test_hash(self): - Season = self.Season - dates = {} - dates[Season.WINTER] = '1225' - dates[Season.SPRING] = '0315' - dates[Season.SUMMER] = '0704' - dates[Season.AUTUMN] = '1031' - self.assertEqual(dates[Season.AUTUMN], '1031') - - def test_enum_duplicates(self): - __order__ = "SPRING SUMMER AUTUMN WINTER" - class Season(Enum): - SPRING = 1 - SUMMER = 2 - AUTUMN = FALL = 3 - WINTER = 4 - ANOTHER_SPRING = 1 - lst = list(Season) - self.assertEqual( - lst, - [Season.SPRING, Season.SUMMER, - Season.AUTUMN, Season.WINTER, - ]) - self.assertTrue(Season.FALL is Season.AUTUMN) - self.assertEqual(Season.FALL.value, 3) - self.assertEqual(Season.AUTUMN.value, 3) - self.assertTrue(Season(3) is Season.AUTUMN) - self.assertTrue(Season(1) is Season.SPRING) - self.assertEqual(Season.FALL.name, 'AUTUMN') - self.assertEqual( - set([k for k,v in Season.__members__.items() if v.name != k]), - set(['FALL', 'ANOTHER_SPRING']), - ) - - if pyver >= 3.0: - cls = vars() - result = {'Enum':Enum} - exec("""def test_duplicate_name(self): - with self.assertRaises(TypeError): - class Color(Enum): - red = 1 - green = 2 - blue = 3 - red = 4 - - with self.assertRaises(TypeError): - class Color(Enum): - red = 1 - green = 2 - blue = 3 - def red(self): - return 'red' - - with self.assertRaises(TypeError): - class Color(Enum): - @property - - def red(self): - return 'redder' - red = 1 - green = 2 - blue = 3""", - result) - cls['test_duplicate_name'] = result['test_duplicate_name'] - - def test_enum_with_value_name(self): - class Huh(Enum): - name = 1 - value = 2 - self.assertEqual( - list(Huh), - [Huh.name, Huh.value], - ) - self.assertTrue(type(Huh.name) is Huh) - self.assertEqual(Huh.name.name, 'name') - self.assertEqual(Huh.name.value, 1) - - def test_intenum_from_scratch(self): - class phy(int, Enum): - pi = 3 - tau = 2 * pi - self.assertTrue(phy.pi < phy.tau) - - def test_intenum_inherited(self): - class IntEnum(int, Enum): - pass - class phy(IntEnum): - pi = 3 - tau = 2 * pi - self.assertTrue(phy.pi < phy.tau) - - def test_floatenum_from_scratch(self): - class phy(float, Enum): - pi = 3.1415926 - tau = 2 * pi - self.assertTrue(phy.pi < phy.tau) - - def test_floatenum_inherited(self): - class FloatEnum(float, Enum): - pass - class phy(FloatEnum): - pi = 3.1415926 - tau = 2 * pi - self.assertTrue(phy.pi < phy.tau) - - def test_strenum_from_scratch(self): - class phy(str, Enum): - pi = 'Pi' - tau = 'Tau' - self.assertTrue(phy.pi < phy.tau) - - def test_strenum_inherited(self): - class StrEnum(str, Enum): - pass - class phy(StrEnum): - pi = 'Pi' - tau = 'Tau' - self.assertTrue(phy.pi < phy.tau) - - def test_intenum(self): - class WeekDay(IntEnum): - SUNDAY = 1 - MONDAY = 2 - TUESDAY = 3 - WEDNESDAY = 4 - THURSDAY = 5 - FRIDAY = 6 - SATURDAY = 7 - - self.assertEqual(['a', 'b', 'c'][WeekDay.MONDAY], 'c') - self.assertEqual([i for i in range(WeekDay.TUESDAY)], [0, 1, 2]) - - lst = list(WeekDay) - self.assertEqual(len(lst), len(WeekDay)) - self.assertEqual(len(WeekDay), 7) - target = 'SUNDAY MONDAY TUESDAY WEDNESDAY THURSDAY FRIDAY SATURDAY' - target = target.split() - for i, weekday in enumerate(target): - i += 1 - e = WeekDay(i) - self.assertEqual(e, i) - self.assertEqual(int(e), i) - self.assertEqual(e.name, weekday) - self.assertTrue(e in WeekDay) - self.assertEqual(lst.index(e)+1, i) - self.assertTrue(0 < e < 8) - self.assertTrue(type(e) is WeekDay) - self.assertTrue(isinstance(e, int)) - self.assertTrue(isinstance(e, Enum)) - - def test_intenum_duplicates(self): - class WeekDay(IntEnum): - __order__ = 'SUNDAY MONDAY TUESDAY WEDNESDAY THURSDAY FRIDAY SATURDAY' - SUNDAY = 1 - MONDAY = 2 - TUESDAY = TEUSDAY = 3 - WEDNESDAY = 4 - THURSDAY = 5 - FRIDAY = 6 - SATURDAY = 7 - self.assertTrue(WeekDay.TEUSDAY is WeekDay.TUESDAY) - self.assertEqual(WeekDay(3).name, 'TUESDAY') - self.assertEqual([k for k,v in WeekDay.__members__.items() - if v.name != k], ['TEUSDAY', ]) - - def test_pickle_enum(self): - if isinstance(Stooges, Exception): - raise Stooges - test_pickle_dump_load(self.assertTrue, Stooges.CURLY) - test_pickle_dump_load(self.assertTrue, Stooges) - - def test_pickle_int(self): - if isinstance(IntStooges, Exception): - raise IntStooges - test_pickle_dump_load(self.assertTrue, IntStooges.CURLY) - test_pickle_dump_load(self.assertTrue, IntStooges) - - def test_pickle_float(self): - if isinstance(FloatStooges, Exception): - raise FloatStooges - test_pickle_dump_load(self.assertTrue, FloatStooges.CURLY) - test_pickle_dump_load(self.assertTrue, FloatStooges) - - def test_pickle_enum_function(self): - if isinstance(Answer, Exception): - raise Answer - test_pickle_dump_load(self.assertTrue, Answer.him) - test_pickle_dump_load(self.assertTrue, Answer) - - def test_pickle_enum_function_with_module(self): - if isinstance(Question, Exception): - raise Question - test_pickle_dump_load(self.assertTrue, Question.who) - test_pickle_dump_load(self.assertTrue, Question) - - if pyver >= 3.4: - def test_class_nested_enum_and_pickle_protocol_four(self): - # would normally just have this directly in the class namespace - class NestedEnum(Enum): - twigs = 'common' - shiny = 'rare' - - self.__class__.NestedEnum = NestedEnum - self.NestedEnum.__qualname__ = '%s.NestedEnum' % self.__class__.__name__ - test_pickle_exception( - self.assertRaises, PicklingError, self.NestedEnum.twigs, - protocol=(0, 3)) - test_pickle_dump_load(self.assertTrue, self.NestedEnum.twigs, - protocol=(4, HIGHEST_PROTOCOL)) - - def test_exploding_pickle(self): - BadPickle = Enum('BadPickle', 'dill sweet bread-n-butter') - enum._make_class_unpicklable(BadPickle) - globals()['BadPickle'] = BadPickle - test_pickle_exception(self.assertRaises, TypeError, BadPickle.dill) - test_pickle_exception(self.assertRaises, PicklingError, BadPickle) - - def test_string_enum(self): - class SkillLevel(str, Enum): - master = 'what is the sound of one hand clapping?' - journeyman = 'why did the chicken cross the road?' - apprentice = 'knock, knock!' - self.assertEqual(SkillLevel.apprentice, 'knock, knock!') - - def test_getattr_getitem(self): - class Period(Enum): - morning = 1 - noon = 2 - evening = 3 - night = 4 - self.assertTrue(Period(2) is Period.noon) - self.assertTrue(getattr(Period, 'night') is Period.night) - self.assertTrue(Period['morning'] is Period.morning) - - def test_getattr_dunder(self): - Season = self.Season - self.assertTrue(getattr(Season, '__hash__')) - - def test_iteration_order(self): - class Season(Enum): - __order__ = 'SUMMER WINTER AUTUMN SPRING' - SUMMER = 2 - WINTER = 4 - AUTUMN = 3 - SPRING = 1 - self.assertEqual( - list(Season), - [Season.SUMMER, Season.WINTER, Season.AUTUMN, Season.SPRING], - ) - - def test_iteration_order_with_unorderable_values(self): - class Complex(Enum): - a = complex(7, 9) - b = complex(3.14, 2) - c = complex(1, -1) - d = complex(-77, 32) - self.assertEqual( - list(Complex), - [Complex.a, Complex.b, Complex.c, Complex.d], - ) - - def test_programatic_function_string(self): - SummerMonth = Enum('SummerMonth', 'june july august') - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_string_list(self): - SummerMonth = Enum('SummerMonth', ['june', 'july', 'august']) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_iterable(self): - SummerMonth = Enum( - 'SummerMonth', - (('june', 1), ('july', 2), ('august', 3)) - ) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_from_dict(self): - SummerMonth = Enum( - 'SummerMonth', - dict((('june', 1), ('july', 2), ('august', 3))) - ) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - if pyver < 3.0: - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_type(self): - SummerMonth = Enum('SummerMonth', 'june july august', type=int) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_type_from_subclass(self): - SummerMonth = IntEnum('SummerMonth', 'june july august') - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_unicode(self): - SummerMonth = Enum('SummerMonth', unicode('june july august')) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_unicode_list(self): - SummerMonth = Enum('SummerMonth', [unicode('june'), unicode('july'), unicode('august')]) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_unicode_iterable(self): - SummerMonth = Enum( - 'SummerMonth', - ((unicode('june'), 1), (unicode('july'), 2), (unicode('august'), 3)) - ) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_from_unicode_dict(self): - SummerMonth = Enum( - 'SummerMonth', - dict(((unicode('june'), 1), (unicode('july'), 2), (unicode('august'), 3))) - ) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - if pyver < 3.0: - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_unicode_type(self): - SummerMonth = Enum('SummerMonth', unicode('june july august'), type=int) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_unicode_type_from_subclass(self): - SummerMonth = IntEnum('SummerMonth', unicode('june july august')) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programmatic_function_unicode_class(self): - if pyver < 3.0: - class_names = unicode('SummerMonth'), 'S\xfcmm\xe9rM\xf6nth'.decode('latin1') - else: - class_names = 'SummerMonth', 'S\xfcmm\xe9rM\xf6nth' - for i, class_name in enumerate(class_names): - if pyver < 3.0 and i == 1: - self.assertRaises(TypeError, Enum, class_name, unicode('june july august')) - else: - SummerMonth = Enum(class_name, unicode('june july august')) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(e.value, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_subclassing(self): - if isinstance(Name, Exception): - raise Name - self.assertEqual(Name.BDFL, 'Guido van Rossum') - self.assertTrue(Name.BDFL, Name('Guido van Rossum')) - self.assertTrue(Name.BDFL is getattr(Name, 'BDFL')) - test_pickle_dump_load(self.assertTrue, Name.BDFL) - - def test_extending(self): - def bad_extension(): - class Color(Enum): - red = 1 - green = 2 - blue = 3 - class MoreColor(Color): - cyan = 4 - magenta = 5 - yellow = 6 - self.assertRaises(TypeError, bad_extension) - - def test_exclude_methods(self): - class whatever(Enum): - this = 'that' - these = 'those' - def really(self): - return 'no, not %s' % self.value - self.assertFalse(type(whatever.really) is whatever) - self.assertEqual(whatever.this.really(), 'no, not that') - - def test_wrong_inheritance_order(self): - def wrong_inherit(): - class Wrong(Enum, str): - NotHere = 'error before this point' - self.assertRaises(TypeError, wrong_inherit) - - def test_intenum_transitivity(self): - class number(IntEnum): - one = 1 - two = 2 - three = 3 - class numero(IntEnum): - uno = 1 - dos = 2 - tres = 3 - self.assertEqual(number.one, numero.uno) - self.assertEqual(number.two, numero.dos) - self.assertEqual(number.three, numero.tres) - - def test_introspection(self): - class Number(IntEnum): - one = 100 - two = 200 - self.assertTrue(Number.one._member_type_ is int) - self.assertTrue(Number._member_type_ is int) - class String(str, Enum): - yarn = 'soft' - rope = 'rough' - wire = 'hard' - self.assertTrue(String.yarn._member_type_ is str) - self.assertTrue(String._member_type_ is str) - class Plain(Enum): - vanilla = 'white' - one = 1 - self.assertTrue(Plain.vanilla._member_type_ is object) - self.assertTrue(Plain._member_type_ is object) - - def test_wrong_enum_in_call(self): - class Monochrome(Enum): - black = 0 - white = 1 - class Gender(Enum): - male = 0 - female = 1 - self.assertRaises(ValueError, Monochrome, Gender.male) - - def test_wrong_enum_in_mixed_call(self): - class Monochrome(IntEnum): - black = 0 - white = 1 - class Gender(Enum): - male = 0 - female = 1 - self.assertRaises(ValueError, Monochrome, Gender.male) - - def test_mixed_enum_in_call_1(self): - class Monochrome(IntEnum): - black = 0 - white = 1 - class Gender(IntEnum): - male = 0 - female = 1 - self.assertTrue(Monochrome(Gender.female) is Monochrome.white) - - def test_mixed_enum_in_call_2(self): - class Monochrome(Enum): - black = 0 - white = 1 - class Gender(IntEnum): - male = 0 - female = 1 - self.assertTrue(Monochrome(Gender.male) is Monochrome.black) - - def test_flufl_enum(self): - class Fluflnum(Enum): - def __int__(self): - return int(self.value) - class MailManOptions(Fluflnum): - option1 = 1 - option2 = 2 - option3 = 3 - self.assertEqual(int(MailManOptions.option1), 1) - - def test_no_such_enum_member(self): - class Color(Enum): - red = 1 - green = 2 - blue = 3 - self.assertRaises(ValueError, Color, 4) - self.assertRaises(KeyError, Color.__getitem__, 'chartreuse') - - def test_new_repr(self): - class Color(Enum): - red = 1 - green = 2 - blue = 3 - def __repr__(self): - return "don't you just love shades of %s?" % self.name - self.assertEqual( - repr(Color.blue), - "don't you just love shades of blue?", - ) - - def test_inherited_repr(self): - class MyEnum(Enum): - def __repr__(self): - return "My name is %s." % self.name - class MyIntEnum(int, MyEnum): - this = 1 - that = 2 - theother = 3 - self.assertEqual(repr(MyIntEnum.that), "My name is that.") - - def test_multiple_mixin_mro(self): - class auto_enum(EnumMeta): - def __new__(metacls, cls, bases, classdict): - original_dict = classdict - classdict = enum._EnumDict() - for k, v in original_dict.items(): - classdict[k] = v - temp = type(classdict)() - names = set(classdict._member_names) - i = 0 - for k in classdict._member_names: - v = classdict[k] - if v == (): - v = i - else: - i = v - i += 1 - temp[k] = v - for k, v in classdict.items(): - if k not in names: - temp[k] = v - return super(auto_enum, metacls).__new__( - metacls, cls, bases, temp) - - AutoNumberedEnum = auto_enum('AutoNumberedEnum', (Enum,), {}) - - AutoIntEnum = auto_enum('AutoIntEnum', (IntEnum,), {}) - - class TestAutoNumber(AutoNumberedEnum): - a = () - b = 3 - c = () - - class TestAutoInt(AutoIntEnum): - a = () - b = 3 - c = () - - def test_subclasses_with_getnewargs(self): - class NamedInt(int): - __qualname__ = 'NamedInt' # needed for pickle protocol 4 - def __new__(cls, *args): - _args = args - if len(args) < 1: - raise TypeError("name and value must be specified") - name, args = args[0], args[1:] - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - def __getnewargs__(self): - return self._args - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "%s(%r, %s)" % (type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '(%s + %s)' % (self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' # needed for pickle protocol 4 - x = ('the-x', 1) - y = ('the-y', 2) - - self.assertTrue(NEI.__new__ is Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - test_pickle_dump_load(self.assertTrue, NI5, 5) - self.assertEqual(NEI.y.value, 2) - test_pickle_dump_load(self.assertTrue, NEI.y) - - if pyver >= 3.4: - def test_subclasses_with_getnewargs_ex(self): - class NamedInt(int): - __qualname__ = 'NamedInt' # needed for pickle protocol 4 - def __new__(cls, *args): - _args = args - if len(args) < 2: - raise TypeError("name and value must be specified") - name, args = args[0], args[1:] - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - def __getnewargs_ex__(self): - return self._args, {} - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "{}({!r}, {})".format(type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '({0} + {1})'.format(self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' # needed for pickle protocol 4 - x = ('the-x', 1) - y = ('the-y', 2) - - - self.assertIs(NEI.__new__, Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - test_pickle_dump_load(self.assertEqual, NI5, 5, protocol=(4, HIGHEST_PROTOCOL)) - self.assertEqual(NEI.y.value, 2) - test_pickle_dump_load(self.assertTrue, NEI.y, protocol=(4, HIGHEST_PROTOCOL)) - - def test_subclasses_with_reduce(self): - class NamedInt(int): - __qualname__ = 'NamedInt' # needed for pickle protocol 4 - def __new__(cls, *args): - _args = args - if len(args) < 1: - raise TypeError("name and value must be specified") - name, args = args[0], args[1:] - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - def __reduce__(self): - return self.__class__, self._args - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "%s(%r, %s)" % (type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '(%s + %s)' % (self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' # needed for pickle protocol 4 - x = ('the-x', 1) - y = ('the-y', 2) - - - self.assertTrue(NEI.__new__ is Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - test_pickle_dump_load(self.assertEqual, NI5, 5) - self.assertEqual(NEI.y.value, 2) - test_pickle_dump_load(self.assertTrue, NEI.y) - - def test_subclasses_with_reduce_ex(self): - class NamedInt(int): - __qualname__ = 'NamedInt' # needed for pickle protocol 4 - def __new__(cls, *args): - _args = args - if len(args) < 1: - raise TypeError("name and value must be specified") - name, args = args[0], args[1:] - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - def __reduce_ex__(self, proto): - return self.__class__, self._args - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "%s(%r, %s)" % (type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '(%s + %s)' % (self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' # needed for pickle protocol 4 - x = ('the-x', 1) - y = ('the-y', 2) - - - self.assertTrue(NEI.__new__ is Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - test_pickle_dump_load(self.assertEqual, NI5, 5) - self.assertEqual(NEI.y.value, 2) - test_pickle_dump_load(self.assertTrue, NEI.y) - - def test_subclasses_without_direct_pickle_support(self): - class NamedInt(int): - __qualname__ = 'NamedInt' - def __new__(cls, *args): - _args = args - name, args = args[0], args[1:] - if len(args) == 0: - raise TypeError("name and value must be specified") - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "%s(%r, %s)" % (type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '(%s + %s)' % (self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' - x = ('the-x', 1) - y = ('the-y', 2) - - self.assertTrue(NEI.__new__ is Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - self.assertEqual(NEI.y.value, 2) - test_pickle_exception(self.assertRaises, TypeError, NEI.x) - test_pickle_exception(self.assertRaises, PicklingError, NEI) - - def test_subclasses_without_direct_pickle_support_using_name(self): - class NamedInt(int): - __qualname__ = 'NamedInt' - def __new__(cls, *args): - _args = args - name, args = args[0], args[1:] - if len(args) == 0: - raise TypeError("name and value must be specified") - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "%s(%r, %s)" % (type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '(%s + %s)' % (self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' - x = ('the-x', 1) - y = ('the-y', 2) - def __reduce_ex__(self, proto): - return getattr, (self.__class__, self._name_) - - self.assertTrue(NEI.__new__ is Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - self.assertEqual(NEI.y.value, 2) - test_pickle_dump_load(self.assertTrue, NEI.y) - test_pickle_dump_load(self.assertTrue, NEI) - - def test_tuple_subclass(self): - class SomeTuple(tuple, Enum): - __qualname__ = 'SomeTuple' - first = (1, 'for the money') - second = (2, 'for the show') - third = (3, 'for the music') - self.assertTrue(type(SomeTuple.first) is SomeTuple) - self.assertTrue(isinstance(SomeTuple.second, tuple)) - self.assertEqual(SomeTuple.third, (3, 'for the music')) - globals()['SomeTuple'] = SomeTuple - test_pickle_dump_load(self.assertTrue, SomeTuple.first) - - def test_duplicate_values_give_unique_enum_items(self): - class AutoNumber(Enum): - __order__ = 'enum_m enum_d enum_y' - enum_m = () - enum_d = () - enum_y = () - def __new__(cls): - value = len(cls.__members__) + 1 - obj = object.__new__(cls) - obj._value_ = value - return obj - def __int__(self): - return int(self._value_) - self.assertEqual(int(AutoNumber.enum_d), 2) - self.assertEqual(AutoNumber.enum_y.value, 3) - self.assertTrue(AutoNumber(1) is AutoNumber.enum_m) - self.assertEqual( - list(AutoNumber), - [AutoNumber.enum_m, AutoNumber.enum_d, AutoNumber.enum_y], - ) - - def test_inherited_new_from_enhanced_enum(self): - class AutoNumber2(Enum): - def __new__(cls): - value = len(cls.__members__) + 1 - obj = object.__new__(cls) - obj._value_ = value - return obj - def __int__(self): - return int(self._value_) - class Color(AutoNumber2): - __order__ = 'red green blue' - red = () - green = () - blue = () - self.assertEqual(len(Color), 3, "wrong number of elements: %d (should be %d)" % (len(Color), 3)) - self.assertEqual(list(Color), [Color.red, Color.green, Color.blue]) - if pyver >= 3.0: - self.assertEqual(list(map(int, Color)), [1, 2, 3]) - - def test_inherited_new_from_mixed_enum(self): - class AutoNumber3(IntEnum): - def __new__(cls): - value = len(cls.__members__) + 1 - obj = int.__new__(cls, value) - obj._value_ = value - return obj - class Color(AutoNumber3): - red = () - green = () - blue = () - self.assertEqual(len(Color), 3, "wrong number of elements: %d (should be %d)" % (len(Color), 3)) - Color.red - Color.green - Color.blue - - def test_ordered_mixin(self): - class OrderedEnum(Enum): - def __ge__(self, other): - if self.__class__ is other.__class__: - return self._value_ >= other._value_ - return NotImplemented - def __gt__(self, other): - if self.__class__ is other.__class__: - return self._value_ > other._value_ - return NotImplemented - def __le__(self, other): - if self.__class__ is other.__class__: - return self._value_ <= other._value_ - return NotImplemented - def __lt__(self, other): - if self.__class__ is other.__class__: - return self._value_ < other._value_ - return NotImplemented - class Grade(OrderedEnum): - __order__ = 'A B C D F' - A = 5 - B = 4 - C = 3 - D = 2 - F = 1 - self.assertEqual(list(Grade), [Grade.A, Grade.B, Grade.C, Grade.D, Grade.F]) - self.assertTrue(Grade.A > Grade.B) - self.assertTrue(Grade.F <= Grade.C) - self.assertTrue(Grade.D < Grade.A) - self.assertTrue(Grade.B >= Grade.B) - - def test_extending2(self): - def bad_extension(): - class Shade(Enum): - def shade(self): - print(self.name) - class Color(Shade): - red = 1 - green = 2 - blue = 3 - class MoreColor(Color): - cyan = 4 - magenta = 5 - yellow = 6 - self.assertRaises(TypeError, bad_extension) - - def test_extending3(self): - class Shade(Enum): - def shade(self): - return self.name - class Color(Shade): - def hex(self): - return '%s hexlified!' % self.value - class MoreColor(Color): - cyan = 4 - magenta = 5 - yellow = 6 - self.assertEqual(MoreColor.magenta.hex(), '5 hexlified!') - - def test_no_duplicates(self): - def bad_duplicates(): - class UniqueEnum(Enum): - def __init__(self, *args): - cls = self.__class__ - if any(self.value == e.value for e in cls): - a = self.name - e = cls(self.value).name - raise ValueError( - "aliases not allowed in UniqueEnum: %r --> %r" - % (a, e) - ) - class Color(UniqueEnum): - red = 1 - green = 2 - blue = 3 - class Color(UniqueEnum): - red = 1 - green = 2 - blue = 3 - grene = 2 - self.assertRaises(ValueError, bad_duplicates) - - def test_reversed(self): - self.assertEqual( - list(reversed(self.Season)), - [self.Season.WINTER, self.Season.AUTUMN, self.Season.SUMMER, - self.Season.SPRING] - ) - - def test_init(self): - class Planet(Enum): - MERCURY = (3.303e+23, 2.4397e6) - VENUS = (4.869e+24, 6.0518e6) - EARTH = (5.976e+24, 6.37814e6) - MARS = (6.421e+23, 3.3972e6) - JUPITER = (1.9e+27, 7.1492e7) - SATURN = (5.688e+26, 6.0268e7) - URANUS = (8.686e+25, 2.5559e7) - NEPTUNE = (1.024e+26, 2.4746e7) - def __init__(self, mass, radius): - self.mass = mass # in kilograms - self.radius = radius # in meters - @property - def surface_gravity(self): - # universal gravitational constant (m3 kg-1 s-2) - G = 6.67300E-11 - return G * self.mass / (self.radius * self.radius) - self.assertEqual(round(Planet.EARTH.surface_gravity, 2), 9.80) - self.assertEqual(Planet.EARTH.value, (5.976e+24, 6.37814e6)) - - def test_nonhash_value(self): - class AutoNumberInAList(Enum): - def __new__(cls): - value = [len(cls.__members__) + 1] - obj = object.__new__(cls) - obj._value_ = value - return obj - class ColorInAList(AutoNumberInAList): - __order__ = 'red green blue' - red = () - green = () - blue = () - self.assertEqual(list(ColorInAList), [ColorInAList.red, ColorInAList.green, ColorInAList.blue]) - self.assertEqual(ColorInAList.red.value, [1]) - self.assertEqual(ColorInAList([1]), ColorInAList.red) - - def test_conflicting_types_resolved_in_new(self): - class LabelledIntEnum(int, Enum): - def __new__(cls, *args): - value, label = args - obj = int.__new__(cls, value) - obj.label = label - obj._value_ = value - return obj - - class LabelledList(LabelledIntEnum): - unprocessed = (1, "Unprocessed") - payment_complete = (2, "Payment Complete") - - self.assertEqual(list(LabelledList), [LabelledList.unprocessed, LabelledList.payment_complete]) - self.assertEqual(LabelledList.unprocessed, 1) - self.assertEqual(LabelledList(1), LabelledList.unprocessed) - -class TestUnique(unittest.TestCase): - """2.4 doesn't allow class decorators, use function syntax.""" - - def test_unique_clean(self): - class Clean(Enum): - one = 1 - two = 'dos' - tres = 4.0 - unique(Clean) - class Cleaner(IntEnum): - single = 1 - double = 2 - triple = 3 - unique(Cleaner) - - def test_unique_dirty(self): - try: - class Dirty(Enum): - __order__ = 'one two tres' - one = 1 - two = 'dos' - tres = 1 - unique(Dirty) - except ValueError: - exc = sys.exc_info()[1] - message = exc.args[0] - self.assertTrue('tres -> one' in message) - - try: - class Dirtier(IntEnum): - __order__ = 'single double triple turkey' - single = 1 - double = 1 - triple = 3 - turkey = 3 - unique(Dirtier) - except ValueError: - exc = sys.exc_info()[1] - message = exc.args[0] - self.assertTrue('double -> single' in message) - self.assertTrue('turkey -> triple' in message) - - -class TestMe(unittest.TestCase): - - pass - -if __name__ == '__main__': - unittest.main() diff --git a/lib/feedparser.py b/lib/feedparser.py deleted file mode 100644 index 15fdc95b..00000000 --- a/lib/feedparser.py +++ /dev/null @@ -1,3909 +0,0 @@ -#!/usr/bin/env python -"""Universal feed parser - -Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds - -Visit http://feedparser.org/ for the latest version -Visit http://feedparser.org/docs/ for the latest documentation - -Required: Python 2.4 or later -Recommended: CJKCodecs and iconv_codec -""" - -__version__ = "5.0.1" -__license__ = """Copyright (c) 2002-2008, Mark Pilgrim, All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS' -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE.""" -__author__ = "Mark Pilgrim " -__contributors__ = ["Jason Diamond ", - "John Beimler ", - "Fazal Majid ", - "Aaron Swartz ", - "Kevin Marks ", - "Sam Ruby ", - "Ade Oshineye ", - "Martin Pool ", - "Kurt McKee "] -_debug = 0 - -# HTTP "User-Agent" header to send to servers when downloading feeds. -# If you are embedding feedparser in a larger application, you should -# change this to your application name and URL. -USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % __version__ - -# HTTP "Accept" header to send to servers when downloading feeds. If you don't -# want to send an Accept header, set this to None. -ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1" - -# List of preferred XML parsers, by SAX driver name. These will be tried first, -# but if they're not installed, Python will keep searching through its own list -# of pre-installed parsers until it finds one that supports everything we need. -PREFERRED_XML_PARSERS = ["drv_libxml2"] - -# If you want feedparser to automatically run HTML markup through HTML Tidy, set -# this to 1. Requires mxTidy -# or utidylib . -TIDY_MARKUP = 0 - -# List of Python interfaces for HTML Tidy, in order of preference. Only useful -# if TIDY_MARKUP = 1 -PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] - -# If you want feedparser to automatically resolve all relative URIs, set this -# to 1. -RESOLVE_RELATIVE_URIS = 1 - -# If you want feedparser to automatically sanitize all potentially unsafe -# HTML content, set this to 1. -SANITIZE_HTML = 1 - -# ---------- Python 3 modules (make it work if possible) ---------- -try: - import rfc822 -except ImportError: - from email import _parseaddr as rfc822 - -try: - # Python 3.1 introduces bytes.maketrans and simultaneously - # deprecates string.maketrans; use bytes.maketrans if possible - _maketrans = bytes.maketrans -except (NameError, AttributeError): - import string - _maketrans = string.maketrans - -# base64 support for Atom feeds that contain embedded binary data -try: - import base64, binascii - # Python 3.1 deprecates decodestring in favor of decodebytes - _base64decode = getattr(base64, 'decodebytes', base64.decodestring) -except: - base64 = binascii = None - -def _s2bytes(s): - # Convert a UTF-8 str to bytes if the interpreter is Python 3 - try: - return bytes(s, 'utf8') - except (NameError, TypeError): - # In Python 2.5 and below, bytes doesn't exist (NameError) - # In Python 2.6 and above, bytes and str are the same (TypeError) - return s - -def _l2bytes(l): - # Convert a list of ints to bytes if the interpreter is Python 3 - try: - if bytes is not str: - # In Python 2.6 and above, this call won't raise an exception - # but it will return bytes([65]) as '[65]' instead of 'A' - return bytes(l) - raise NameError - except NameError: - return ''.join(map(chr, l)) - -# If you want feedparser to allow all URL schemes, set this to () -# List culled from Python's urlparse documentation at: -# http://docs.python.org/library/urlparse.html -# as well as from "URI scheme" at Wikipedia: -# https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme -# Many more will likely need to be added! -ACCEPTABLE_URI_SCHEMES = ( - 'file', 'ftp', 'gopher', 'h323', 'hdl', 'http', 'https', 'imap', 'mailto', - 'mms', 'news', 'nntp', 'prospero', 'rsync', 'rtsp', 'rtspu', 'sftp', - 'shttp', 'sip', 'sips', 'snews', 'svn', 'svn+ssh', 'telnet', 'wais', - # Additional common-but-unofficial schemes - 'aim', 'callto', 'cvs', 'facetime', 'feed', 'git', 'gtalk', 'irc', 'ircs', - 'irc6', 'itms', 'mms', 'msnim', 'skype', 'ssh', 'smb', 'svn', 'ymsg', -) -#ACCEPTABLE_URI_SCHEMES = () - -# ---------- required modules (should come with any Python distribution) ---------- -import sgmllib, re, sys, copy, urlparse, time, types, cgi, urllib, urllib2, datetime -try: - from io import BytesIO as _StringIO -except ImportError: - try: - from cStringIO import StringIO as _StringIO - except: - from StringIO import StringIO as _StringIO - -# ---------- optional modules (feedparser will work without these, but with reduced functionality) ---------- - -# gzip is included with most Python distributions, but may not be available if you compiled your own -try: - import gzip -except: - gzip = None -try: - import zlib -except: - zlib = None - -# If a real XML parser is available, feedparser will attempt to use it. feedparser has -# been tested with the built-in SAX parser, PyXML, and libxml2. On platforms where the -# Python distribution does not come with an XML parser (such as Mac OS X 10.2 and some -# versions of FreeBSD), feedparser will quietly fall back on regex-based parsing. -try: - import xml.sax - xml.sax.make_parser(PREFERRED_XML_PARSERS) # test for valid parsers - from xml.sax.saxutils import escape as _xmlescape - _XML_AVAILABLE = 1 -except: - _XML_AVAILABLE = 0 - def _xmlescape(data,entities={}): - data = data.replace('&', '&') - data = data.replace('>', '>') - data = data.replace('<', '<') - for char, entity in entities: - data = data.replace(char, entity) - return data - -# cjkcodecs and iconv_codec provide support for more character encodings. -# Both are available from http://cjkpython.i18n.org/ -try: - import cjkcodecs.aliases -except: - pass -try: - import iconv_codec -except: - pass - -# chardet library auto-detects character encodings -# Download from http://chardet.feedparser.org/ -try: - import chardet - if _debug: - import chardet.constants - chardet.constants._debug = 1 -except: - chardet = None - -# reversable htmlentitydefs mappings for Python 2.2 -try: - from htmlentitydefs import name2codepoint, codepoint2name -except: - import htmlentitydefs - name2codepoint={} - codepoint2name={} - for (name,codepoint) in htmlentitydefs.entitydefs.iteritems(): - if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1])) - name2codepoint[name]=ord(codepoint) - codepoint2name[ord(codepoint)]=name - -# BeautifulSoup parser used for parsing microformats from embedded HTML content -# http://www.crummy.com/software/BeautifulSoup/ -# feedparser is tested with BeautifulSoup 3.0.x, but it might work with the -# older 2.x series. If it doesn't, and you can figure out why, I'll accept a -# patch and modify the compatibility statement accordingly. -try: - import BeautifulSoup -except: - BeautifulSoup = None - -# ---------- don't touch these ---------- -class ThingsNobodyCaresAboutButMe(Exception): pass -class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe): pass -class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe): pass -class NonXMLContentType(ThingsNobodyCaresAboutButMe): pass -class UndeclaredNamespace(Exception): pass - -sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') -sgmllib.special = re.compile(']|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''') - def search(self,string,index=0): - match = self.endbracket.match(string,index) - if match is not None: - # Returning a new object in the calling thread's context - # resolves a thread-safety. - return EndBracketMatch(match) - return None - class EndBracketMatch: - def __init__(self, match): - self.match = match - def start(self, n): - return self.match.end(n) - sgmllib.endbracket = EndBracketRegEx() - -SUPPORTED_VERSIONS = {'': 'unknown', - 'rss090': 'RSS 0.90', - 'rss091n': 'RSS 0.91 (Netscape)', - 'rss091u': 'RSS 0.91 (Userland)', - 'rss092': 'RSS 0.92', - 'rss093': 'RSS 0.93', - 'rss094': 'RSS 0.94', - 'rss20': 'RSS 2.0', - 'rss10': 'RSS 1.0', - 'rss': 'RSS (unknown version)', - 'atom01': 'Atom 0.1', - 'atom02': 'Atom 0.2', - 'atom03': 'Atom 0.3', - 'atom10': 'Atom 1.0', - 'atom': 'Atom (unknown version)', - 'cdf': 'CDF', - 'hotrss': 'Hot RSS' - } - -try: - UserDict = dict -except NameError: - # Python 2.1 does not have dict - from UserDict import UserDict - def dict(aList): - rc = {} - for k, v in aList: - rc[k] = v - return rc - -class FeedParserDict(UserDict): - keymap = {'channel': 'feed', - 'items': 'entries', - 'guid': 'id', - 'date': 'updated', - 'date_parsed': 'updated_parsed', - 'description': ['summary', 'subtitle'], - 'url': ['href'], - 'modified': 'updated', - 'modified_parsed': 'updated_parsed', - 'issued': 'published', - 'issued_parsed': 'published_parsed', - 'copyright': 'rights', - 'copyright_detail': 'rights_detail', - 'tagline': 'subtitle', - 'tagline_detail': 'subtitle_detail'} - def __getitem__(self, key): - if key == 'category': - return UserDict.__getitem__(self, 'tags')[0]['term'] - if key == 'enclosures': - norel = lambda link: FeedParserDict([(name,value) for (name,value) in link.items() if name!='rel']) - return [norel(link) for link in UserDict.__getitem__(self, 'links') if link['rel']=='enclosure'] - if key == 'license': - for link in UserDict.__getitem__(self, 'links'): - if link['rel']=='license' and link.has_key('href'): - return link['href'] - if key == 'categories': - return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')] - realkey = self.keymap.get(key, key) - if type(realkey) == types.ListType: - for k in realkey: - if UserDict.__contains__(self, k): - return UserDict.__getitem__(self, k) - if UserDict.__contains__(self, key): - return UserDict.__getitem__(self, key) - return UserDict.__getitem__(self, realkey) - - def __setitem__(self, key, value): - for k in self.keymap.keys(): - if key == k: - key = self.keymap[k] - if type(key) == types.ListType: - key = key[0] - return UserDict.__setitem__(self, key, value) - - def get(self, key, default=None): - if self.has_key(key): - return self[key] - else: - return default - - def setdefault(self, key, value): - if not self.has_key(key): - self[key] = value - return self[key] - - def has_key(self, key): - try: - return hasattr(self, key) or UserDict.__contains__(self, key) - except AttributeError: - return False - # This alias prevents the 2to3 tool from changing the semantics of the - # __contains__ function below and exhausting the maximum recursion depth - __has_key = has_key - - def __getattr__(self, key): - try: - return self.__dict__[key] - except KeyError: - pass - try: - assert not key.startswith('_') - return self.__getitem__(key) - except: - raise AttributeError, "object has no attribute '%s'" % key - - def __setattr__(self, key, value): - if key.startswith('_') or key == 'data': - self.__dict__[key] = value - else: - return self.__setitem__(key, value) - - def __contains__(self, key): - return self.__has_key(key) - -def zopeCompatibilityHack(): - global FeedParserDict - del FeedParserDict - def FeedParserDict(aDict=None): - rc = {} - if aDict: - rc.update(aDict) - return rc - -_ebcdic_to_ascii_map = None -def _ebcdic_to_ascii(s): - global _ebcdic_to_ascii_map - if not _ebcdic_to_ascii_map: - emap = ( - 0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, - 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, - 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, - 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, - 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, - 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, - 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, - 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, - 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,201, - 202,106,107,108,109,110,111,112,113,114,203,204,205,206,207,208, - 209,126,115,116,117,118,119,120,121,122,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231, - 123,65,66,67,68,69,70,71,72,73,232,233,234,235,236,237, - 125,74,75,76,77,78,79,80,81,82,238,239,240,241,242,243, - 92,159,83,84,85,86,87,88,89,90,244,245,246,247,248,249, - 48,49,50,51,52,53,54,55,56,57,250,251,252,253,254,255 - ) - _ebcdic_to_ascii_map = _maketrans( \ - _l2bytes(range(256)), _l2bytes(emap)) - return s.translate(_ebcdic_to_ascii_map) - -_cp1252 = { - unichr(128): unichr(8364), # euro sign - unichr(130): unichr(8218), # single low-9 quotation mark - unichr(131): unichr( 402), # latin small letter f with hook - unichr(132): unichr(8222), # double low-9 quotation mark - unichr(133): unichr(8230), # horizontal ellipsis - unichr(134): unichr(8224), # dagger - unichr(135): unichr(8225), # double dagger - unichr(136): unichr( 710), # modifier letter circumflex accent - unichr(137): unichr(8240), # per mille sign - unichr(138): unichr( 352), # latin capital letter s with caron - unichr(139): unichr(8249), # single left-pointing angle quotation mark - unichr(140): unichr( 338), # latin capital ligature oe - unichr(142): unichr( 381), # latin capital letter z with caron - unichr(145): unichr(8216), # left single quotation mark - unichr(146): unichr(8217), # right single quotation mark - unichr(147): unichr(8220), # left double quotation mark - unichr(148): unichr(8221), # right double quotation mark - unichr(149): unichr(8226), # bullet - unichr(150): unichr(8211), # en dash - unichr(151): unichr(8212), # em dash - unichr(152): unichr( 732), # small tilde - unichr(153): unichr(8482), # trade mark sign - unichr(154): unichr( 353), # latin small letter s with caron - unichr(155): unichr(8250), # single right-pointing angle quotation mark - unichr(156): unichr( 339), # latin small ligature oe - unichr(158): unichr( 382), # latin small letter z with caron - unichr(159): unichr( 376)} # latin capital letter y with diaeresis - -_urifixer = re.compile('^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)') -def _urljoin(base, uri): - uri = _urifixer.sub(r'\1\3', uri) - try: - return urlparse.urljoin(base, uri) - except: - uri = urlparse.urlunparse([urllib.quote(part) for part in urlparse.urlparse(uri)]) - return urlparse.urljoin(base, uri) - -class _FeedParserMixin: - namespaces = {'': '', - 'http://backend.userland.com/rss': '', - 'http://blogs.law.harvard.edu/tech/rss': '', - 'http://purl.org/rss/1.0/': '', - 'http://my.netscape.com/rdf/simple/0.9/': '', - 'http://example.com/newformat#': '', - 'http://example.com/necho': '', - 'http://purl.org/echo/': '', - 'uri/of/echo/namespace#': '', - 'http://purl.org/pie/': '', - 'http://purl.org/atom/ns#': '', - 'http://www.w3.org/2005/Atom': '', - 'http://purl.org/rss/1.0/modules/rss091#': '', - - 'http://webns.net/mvcb/': 'admin', - 'http://purl.org/rss/1.0/modules/aggregation/': 'ag', - 'http://purl.org/rss/1.0/modules/annotate/': 'annotate', - 'http://media.tangent.org/rss/1.0/': 'audio', - 'http://backend.userland.com/blogChannelModule': 'blogChannel', - 'http://web.resource.org/cc/': 'cc', - 'http://backend.userland.com/creativeCommonsRssModule': 'creativeCommons', - 'http://purl.org/rss/1.0/modules/company': 'co', - 'http://purl.org/rss/1.0/modules/content/': 'content', - 'http://my.theinfo.org/changed/1.0/rss/': 'cp', - 'http://purl.org/dc/elements/1.1/': 'dc', - 'http://purl.org/dc/terms/': 'dcterms', - 'http://purl.org/rss/1.0/modules/email/': 'email', - 'http://purl.org/rss/1.0/modules/event/': 'ev', - 'http://rssnamespace.org/feedburner/ext/1.0': 'feedburner', - 'http://freshmeat.net/rss/fm/': 'fm', - 'http://xmlns.com/foaf/0.1/': 'foaf', - 'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo', - 'http://postneo.com/icbm/': 'icbm', - 'http://purl.org/rss/1.0/modules/image/': 'image', - 'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes', - 'http://example.com/DTDs/PodCast-1.0.dtd': 'itunes', - 'http://purl.org/rss/1.0/modules/link/': 'l', - 'http://search.yahoo.com/mrss': 'media', - #Version 1.1.2 of the Media RSS spec added the trailing slash on the namespace - 'http://search.yahoo.com/mrss/': 'media', - 'http://madskills.com/public/xml/rss/module/pingback/': 'pingback', - 'http://prismstandard.org/namespaces/1.2/basic/': 'prism', - 'http://www.w3.org/1999/02/22-rdf-syntax-ns#': 'rdf', - 'http://www.w3.org/2000/01/rdf-schema#': 'rdfs', - 'http://purl.org/rss/1.0/modules/reference/': 'ref', - 'http://purl.org/rss/1.0/modules/richequiv/': 'reqv', - 'http://purl.org/rss/1.0/modules/search/': 'search', - 'http://purl.org/rss/1.0/modules/slash/': 'slash', - 'http://schemas.xmlsoap.org/soap/envelope/': 'soap', - 'http://purl.org/rss/1.0/modules/servicestatus/': 'ss', - 'http://hacks.benhammersley.com/rss/streaming/': 'str', - 'http://purl.org/rss/1.0/modules/subscription/': 'sub', - 'http://purl.org/rss/1.0/modules/syndication/': 'sy', - 'http://schemas.pocketsoap.com/rss/myDescModule/': 'szf', - 'http://purl.org/rss/1.0/modules/taxonomy/': 'taxo', - 'http://purl.org/rss/1.0/modules/threading/': 'thr', - 'http://purl.org/rss/1.0/modules/textinput/': 'ti', - 'http://madskills.com/public/xml/rss/module/trackback/':'trackback', - 'http://wellformedweb.org/commentAPI/': 'wfw', - 'http://purl.org/rss/1.0/modules/wiki/': 'wiki', - 'http://www.w3.org/1999/xhtml': 'xhtml', - 'http://www.w3.org/1999/xlink': 'xlink', - 'http://www.w3.org/XML/1998/namespace': 'xml' -} - _matchnamespaces = {} - - can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'icon', 'logo'] - can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description'] - can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description'] - html_types = ['text/html', 'application/xhtml+xml'] - - def __init__(self, baseuri=None, baselang=None, encoding='utf-8'): - if _debug: sys.stderr.write('initializing FeedParser\n') - if not self._matchnamespaces: - for k, v in self.namespaces.items(): - self._matchnamespaces[k.lower()] = v - self.feeddata = FeedParserDict() # feed-level data - self.encoding = encoding # character encoding - self.entries = [] # list of entry-level data - self.version = '' # feed type/version, see SUPPORTED_VERSIONS - self.namespacesInUse = {} # dictionary of namespaces defined by the feed - - # the following are used internally to track state; - # this is really out of control and should be refactored - self.infeed = 0 - self.inentry = 0 - self.incontent = 0 - self.intextinput = 0 - self.inimage = 0 - self.inauthor = 0 - self.incontributor = 0 - self.inpublisher = 0 - self.insource = 0 - self.sourcedata = FeedParserDict() - self.contentparams = FeedParserDict() - self._summaryKey = None - self.namespacemap = {} - self.elementstack = [] - self.basestack = [] - self.langstack = [] - self.baseuri = baseuri or '' - self.lang = baselang or None - self.svgOK = 0 - self.hasTitle = 0 - if baselang: - self.feeddata['language'] = baselang.replace('_','-') - - def unknown_starttag(self, tag, attrs): - if _debug: sys.stderr.write('start %s with %s\n' % (tag, attrs)) - # normalize attrs - attrs = [(k.lower(), v) for k, v in attrs] - attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] - # the sgml parser doesn't handle entities in attributes, but - # strict xml parsers do -- account for this difference - if isinstance(self, _LooseFeedParser): - attrs = [(k, v.replace('&', '&')) for k, v in attrs] - - # track xml:base and xml:lang - attrsD = dict(attrs) - baseuri = attrsD.get('xml:base', attrsD.get('base')) or self.baseuri - if type(baseuri) != type(u''): - try: - baseuri = unicode(baseuri, self.encoding) - except: - baseuri = unicode(baseuri, 'iso-8859-1') - # ensure that self.baseuri is always an absolute URI that - # uses a whitelisted URI scheme (e.g. not `javscript:`) - if self.baseuri: - self.baseuri = _makeSafeAbsoluteURI(self.baseuri, baseuri) or self.baseuri - else: - self.baseuri = _urljoin(self.baseuri, baseuri) - lang = attrsD.get('xml:lang', attrsD.get('lang')) - if lang == '': - # xml:lang could be explicitly set to '', we need to capture that - lang = None - elif lang is None: - # if no xml:lang is specified, use parent lang - lang = self.lang - if lang: - if tag in ('feed', 'rss', 'rdf:RDF'): - self.feeddata['language'] = lang.replace('_','-') - self.lang = lang - self.basestack.append(self.baseuri) - self.langstack.append(lang) - - # track namespaces - for prefix, uri in attrs: - if prefix.startswith('xmlns:'): - self.trackNamespace(prefix[6:], uri) - elif prefix == 'xmlns': - self.trackNamespace(None, uri) - - # track inline content - if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): - if tag in ['xhtml:div', 'div']: return # typepad does this 10/2007 - # element declared itself as escaped markup, but it isn't really - self.contentparams['type'] = 'application/xhtml+xml' - if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': - if tag.find(':') <> -1: - prefix, tag = tag.split(':', 1) - namespace = self.namespacesInUse.get(prefix, '') - if tag=='math' and namespace=='http://www.w3.org/1998/Math/MathML': - attrs.append(('xmlns',namespace)) - if tag=='svg' and namespace=='http://www.w3.org/2000/svg': - attrs.append(('xmlns',namespace)) - if tag == 'svg': self.svgOK += 1 - return self.handle_data('<%s%s>' % (tag, self.strattrs(attrs)), escape=0) - - # match namespaces - if tag.find(':') <> -1: - prefix, suffix = tag.split(':', 1) - else: - prefix, suffix = '', tag - prefix = self.namespacemap.get(prefix, prefix) - if prefix: - prefix = prefix + '_' - - # special hack for better tracking of empty textinput/image elements in illformed feeds - if (not prefix) and tag not in ('title', 'link', 'description', 'name'): - self.intextinput = 0 - if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'): - self.inimage = 0 - - # call special handler (if defined) or default handler - methodname = '_start_' + prefix + suffix - try: - method = getattr(self, methodname) - return method(attrsD) - except AttributeError: - # Since there's no handler or something has gone wrong we explicitly add the element and its attributes - unknown_tag = prefix + suffix - if len(attrsD) == 0: - # No attributes so merge it into the encosing dictionary - return self.push(unknown_tag, 1) - else: - # Has attributes so create it in its own dictionary - context = self._getContext() - context[unknown_tag] = attrsD - - def unknown_endtag(self, tag): - if _debug: sys.stderr.write('end %s\n' % tag) - # match namespaces - if tag.find(':') <> -1: - prefix, suffix = tag.split(':', 1) - else: - prefix, suffix = '', tag - prefix = self.namespacemap.get(prefix, prefix) - if prefix: - prefix = prefix + '_' - if suffix == 'svg' and self.svgOK: self.svgOK -= 1 - - # call special handler (if defined) or default handler - methodname = '_end_' + prefix + suffix - try: - if self.svgOK: raise AttributeError() - method = getattr(self, methodname) - method() - except AttributeError: - self.pop(prefix + suffix) - - # track inline content - if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): - # element declared itself as escaped markup, but it isn't really - if tag in ['xhtml:div', 'div']: return # typepad does this 10/2007 - self.contentparams['type'] = 'application/xhtml+xml' - if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': - tag = tag.split(':')[-1] - self.handle_data('' % tag, escape=0) - - # track xml:base and xml:lang going out of scope - if self.basestack: - self.basestack.pop() - if self.basestack and self.basestack[-1]: - self.baseuri = self.basestack[-1] - if self.langstack: - self.langstack.pop() - if self.langstack: # and (self.langstack[-1] is not None): - self.lang = self.langstack[-1] - - def handle_charref(self, ref): - # called for each character reference, e.g. for ' ', ref will be '160' - if not self.elementstack: return - ref = ref.lower() - if ref in ('34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e'): - text = '&#%s;' % ref - else: - if ref[0] == 'x': - c = int(ref[1:], 16) - else: - c = int(ref) - text = unichr(c).encode('utf-8') - self.elementstack[-1][2].append(text) - - def handle_entityref(self, ref): - # called for each entity reference, e.g. for '©', ref will be 'copy' - if not self.elementstack: return - if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref) - if ref in ('lt', 'gt', 'quot', 'amp', 'apos'): - text = '&%s;' % ref - elif ref in self.entities.keys(): - text = self.entities[ref] - if text.startswith('&#') and text.endswith(';'): - return self.handle_entityref(text) - else: - try: name2codepoint[ref] - except KeyError: text = '&%s;' % ref - else: text = unichr(name2codepoint[ref]).encode('utf-8') - self.elementstack[-1][2].append(text) - - def handle_data(self, text, escape=1): - # called for each block of plain text, i.e. outside of any tag and - # not containing any character or entity references - if not self.elementstack: return - if escape and self.contentparams.get('type') == 'application/xhtml+xml': - text = _xmlescape(text) - self.elementstack[-1][2].append(text) - - def handle_comment(self, text): - # called for each comment, e.g. - pass - - def handle_pi(self, text): - # called for each processing instruction, e.g. - pass - - def handle_decl(self, text): - pass - - def parse_declaration(self, i): - # override internal declaration handler to handle CDATA blocks - if _debug: sys.stderr.write('entering parse_declaration\n') - if self.rawdata[i:i+9] == '', i) - if k == -1: - # CDATA block began but didn't finish - k = len(self.rawdata) - return k - self.handle_data(_xmlescape(self.rawdata[i+9:k]), 0) - return k+3 - else: - k = self.rawdata.find('>', i) - if k >= 0: - return k+1 - else: - # We have an incomplete CDATA block. - return k - - def mapContentType(self, contentType): - contentType = contentType.lower() - if contentType == 'text' or contentType == 'plain': - contentType = 'text/plain' - elif contentType == 'html': - contentType = 'text/html' - elif contentType == 'xhtml': - contentType = 'application/xhtml+xml' - return contentType - - def trackNamespace(self, prefix, uri): - loweruri = uri.lower() - if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version: - self.version = 'rss090' - if loweruri == 'http://purl.org/rss/1.0/' and not self.version: - self.version = 'rss10' - if loweruri == 'http://www.w3.org/2005/atom' and not self.version: - self.version = 'atom10' - if loweruri.find('backend.userland.com/rss') <> -1: - # match any backend.userland.com namespace - uri = 'http://backend.userland.com/rss' - loweruri = uri - if self._matchnamespaces.has_key(loweruri): - self.namespacemap[prefix] = self._matchnamespaces[loweruri] - self.namespacesInUse[self._matchnamespaces[loweruri]] = uri - else: - self.namespacesInUse[prefix or ''] = uri - - def resolveURI(self, uri): - return _urljoin(self.baseuri or '', uri) - - def decodeEntities(self, element, data): - return data - - def strattrs(self, attrs): - return ''.join([' %s="%s"' % (t[0],_xmlescape(t[1],{'"':'"'})) for t in attrs]) - - def push(self, element, expectingText): - self.elementstack.append([element, expectingText, []]) - - def pop(self, element, stripWhitespace=1): - if not self.elementstack: return - if self.elementstack[-1][0] != element: return - - element, expectingText, pieces = self.elementstack.pop() - - if self.version == 'atom10' and self.contentparams.get('type','text') == 'application/xhtml+xml': - # remove enclosing child element, but only if it is a
and - # only if all the remaining content is nested underneath it. - # This means that the divs would be retained in the following: - #
foo
bar
- while pieces and len(pieces)>1 and not pieces[-1].strip(): - del pieces[-1] - while pieces and len(pieces)>1 and not pieces[0].strip(): - del pieces[0] - if pieces and (pieces[0] == '
' or pieces[0].startswith('
': - depth = 0 - for piece in pieces[:-1]: - if piece.startswith(''): - depth += 1 - else: - pieces = pieces[1:-1] - - # Ensure each piece is a str for Python 3 - for (i, v) in enumerate(pieces): - if not isinstance(v, basestring): - pieces[i] = v.decode('utf-8') - - output = ''.join(pieces) - if stripWhitespace: - output = output.strip() - if not expectingText: return output - - # decode base64 content - if base64 and self.contentparams.get('base64', 0): - try: - output = _base64decode(output) - except binascii.Error: - pass - except binascii.Incomplete: - pass - except TypeError: - # In Python 3, base64 takes and outputs bytes, not str - # This may not be the most correct way to accomplish this - output = _base64decode(output.encode('utf-8')).decode('utf-8') - - # resolve relative URIs - if (element in self.can_be_relative_uri) and output: - output = self.resolveURI(output) - - # decode entities within embedded markup - if not self.contentparams.get('base64', 0): - output = self.decodeEntities(element, output) - - if self.lookslikehtml(output): - self.contentparams['type']='text/html' - - # remove temporary cruft from contentparams - try: - del self.contentparams['mode'] - except KeyError: - pass - try: - del self.contentparams['base64'] - except KeyError: - pass - - is_htmlish = self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types - # resolve relative URIs within embedded markup - if is_htmlish and RESOLVE_RELATIVE_URIS: - if element in self.can_contain_relative_uris: - output = _resolveRelativeURIs(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html')) - - # parse microformats - # (must do this before sanitizing because some microformats - # rely on elements that we sanitize) - if is_htmlish and element in ['content', 'description', 'summary']: - mfresults = _parseMicroformats(output, self.baseuri, self.encoding) - if mfresults: - for tag in mfresults.get('tags', []): - self._addTag(tag['term'], tag['scheme'], tag['label']) - for enclosure in mfresults.get('enclosures', []): - self._start_enclosure(enclosure) - for xfn in mfresults.get('xfn', []): - self._addXFN(xfn['relationships'], xfn['href'], xfn['name']) - vcard = mfresults.get('vcard') - if vcard: - self._getContext()['vcard'] = vcard - - # sanitize embedded markup - if is_htmlish and SANITIZE_HTML: - if element in self.can_contain_dangerous_markup: - output = _sanitizeHTML(output, self.encoding, self.contentparams.get('type', 'text/html')) - - if self.encoding and type(output) != type(u''): - try: - output = unicode(output, self.encoding) - except: - pass - - # address common error where people take data that is already - # utf-8, presume that it is iso-8859-1, and re-encode it. - if self.encoding in ('utf-8', 'utf-8_INVALID_PYTHON_3') and type(output) == type(u''): - try: - output = unicode(output.encode('iso-8859-1'), 'utf-8') - except: - pass - - # map win-1252 extensions to the proper code points - if type(output) == type(u''): - output = u''.join([c in _cp1252.keys() and _cp1252[c] or c for c in output]) - - # categories/tags/keywords/whatever are handled in _end_category - if element == 'category': - return output - - if element == 'title' and self.hasTitle: - return output - - # store output in appropriate place(s) - if self.inentry and not self.insource: - if element == 'content': - self.entries[-1].setdefault(element, []) - contentparams = copy.deepcopy(self.contentparams) - contentparams['value'] = output - self.entries[-1][element].append(contentparams) - elif element == 'link': - if not self.inimage: - # query variables in urls in link elements are improperly - # converted from `?a=1&b=2` to `?a=1&b;=2` as if they're - # unhandled character references. fix this special case. - output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) - self.entries[-1][element] = output - if output: - self.entries[-1]['links'][-1]['href'] = output - else: - if element == 'description': - element = 'summary' - self.entries[-1][element] = output - if self.incontent: - contentparams = copy.deepcopy(self.contentparams) - contentparams['value'] = output - self.entries[-1][element + '_detail'] = contentparams - elif (self.infeed or self.insource):# and (not self.intextinput) and (not self.inimage): - context = self._getContext() - if element == 'description': - element = 'subtitle' - context[element] = output - if element == 'link': - # fix query variables; see above for the explanation - output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) - context[element] = output - context['links'][-1]['href'] = output - elif self.incontent: - contentparams = copy.deepcopy(self.contentparams) - contentparams['value'] = output - context[element + '_detail'] = contentparams - return output - - def pushContent(self, tag, attrsD, defaultContentType, expectingText): - self.incontent += 1 - if self.lang: self.lang=self.lang.replace('_','-') - self.contentparams = FeedParserDict({ - 'type': self.mapContentType(attrsD.get('type', defaultContentType)), - 'language': self.lang, - 'base': self.baseuri}) - self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams) - self.push(tag, expectingText) - - def popContent(self, tag): - value = self.pop(tag) - self.incontent -= 1 - self.contentparams.clear() - return value - - # a number of elements in a number of RSS variants are nominally plain - # text, but this is routinely ignored. This is an attempt to detect - # the most common cases. As false positives often result in silent - # data loss, this function errs on the conservative side. - def lookslikehtml(self, s): - if self.version.startswith('atom'): return - if self.contentparams.get('type','text/html') != 'text/plain': return - - # must have a close tag or a entity reference to qualify - if not (re.search(r'',s) or re.search("&#?\w+;",s)): return - - # all tags must be in a restricted subset of valid HTML tags - if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements, - re.findall(r' -1: - prefix = name[:colonpos] - suffix = name[colonpos+1:] - prefix = self.namespacemap.get(prefix, prefix) - name = prefix + ':' + suffix - return name - - def _getAttribute(self, attrsD, name): - return attrsD.get(self._mapToStandardPrefix(name)) - - def _isBase64(self, attrsD, contentparams): - if attrsD.get('mode', '') == 'base64': - return 1 - if self.contentparams['type'].startswith('text/'): - return 0 - if self.contentparams['type'].endswith('+xml'): - return 0 - if self.contentparams['type'].endswith('/xml'): - return 0 - return 1 - - def _itsAnHrefDamnIt(self, attrsD): - href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None))) - if href: - try: - del attrsD['url'] - except KeyError: - pass - try: - del attrsD['uri'] - except KeyError: - pass - attrsD['href'] = href - return attrsD - - def _save(self, key, value, overwrite=False): - context = self._getContext() - if overwrite: - context[key] = value - else: - context.setdefault(key, value) - - def _start_rss(self, attrsD): - versionmap = {'0.91': 'rss091u', - '0.92': 'rss092', - '0.93': 'rss093', - '0.94': 'rss094'} - #If we're here then this is an RSS feed. - #If we don't have a version or have a version that starts with something - #other than RSS then there's been a mistake. Correct it. - if not self.version or not self.version.startswith('rss'): - attr_version = attrsD.get('version', '') - version = versionmap.get(attr_version) - if version: - self.version = version - elif attr_version.startswith('2.'): - self.version = 'rss20' - else: - self.version = 'rss' - - def _start_dlhottitles(self, attrsD): - self.version = 'hotrss' - - def _start_channel(self, attrsD): - self.infeed = 1 - self._cdf_common(attrsD) - _start_feedinfo = _start_channel - - def _cdf_common(self, attrsD): - if attrsD.has_key('lastmod'): - self._start_modified({}) - self.elementstack[-1][-1] = attrsD['lastmod'] - self._end_modified() - if attrsD.has_key('href'): - self._start_link({}) - self.elementstack[-1][-1] = attrsD['href'] - self._end_link() - - def _start_feed(self, attrsD): - self.infeed = 1 - versionmap = {'0.1': 'atom01', - '0.2': 'atom02', - '0.3': 'atom03'} - if not self.version: - attr_version = attrsD.get('version') - version = versionmap.get(attr_version) - if version: - self.version = version - else: - self.version = 'atom' - - def _end_channel(self): - self.infeed = 0 - _end_feed = _end_channel - - def _start_image(self, attrsD): - context = self._getContext() - if not self.inentry: - context.setdefault('image', FeedParserDict()) - self.inimage = 1 - self.hasTitle = 0 - self.push('image', 0) - - def _end_image(self): - self.pop('image') - self.inimage = 0 - - def _start_textinput(self, attrsD): - context = self._getContext() - context.setdefault('textinput', FeedParserDict()) - self.intextinput = 1 - self.hasTitle = 0 - self.push('textinput', 0) - _start_textInput = _start_textinput - - def _end_textinput(self): - self.pop('textinput') - self.intextinput = 0 - _end_textInput = _end_textinput - - def _start_author(self, attrsD): - self.inauthor = 1 - self.push('author', 1) - # Append a new FeedParserDict when expecting an author - context = self._getContext() - context.setdefault('authors', []) - context['authors'].append(FeedParserDict()) - _start_managingeditor = _start_author - _start_dc_author = _start_author - _start_dc_creator = _start_author - _start_itunes_author = _start_author - - def _end_author(self): - self.pop('author') - self.inauthor = 0 - self._sync_author_detail() - _end_managingeditor = _end_author - _end_dc_author = _end_author - _end_dc_creator = _end_author - _end_itunes_author = _end_author - - def _start_itunes_owner(self, attrsD): - self.inpublisher = 1 - self.push('publisher', 0) - - def _end_itunes_owner(self): - self.pop('publisher') - self.inpublisher = 0 - self._sync_author_detail('publisher') - - def _start_contributor(self, attrsD): - self.incontributor = 1 - context = self._getContext() - context.setdefault('contributors', []) - context['contributors'].append(FeedParserDict()) - self.push('contributor', 0) - - def _end_contributor(self): - self.pop('contributor') - self.incontributor = 0 - - def _start_dc_contributor(self, attrsD): - self.incontributor = 1 - context = self._getContext() - context.setdefault('contributors', []) - context['contributors'].append(FeedParserDict()) - self.push('name', 0) - - def _end_dc_contributor(self): - self._end_name() - self.incontributor = 0 - - def _start_name(self, attrsD): - self.push('name', 0) - _start_itunes_name = _start_name - - def _end_name(self): - value = self.pop('name') - if self.inpublisher: - self._save_author('name', value, 'publisher') - elif self.inauthor: - self._save_author('name', value) - elif self.incontributor: - self._save_contributor('name', value) - elif self.intextinput: - context = self._getContext() - context['name'] = value - _end_itunes_name = _end_name - - def _start_width(self, attrsD): - self.push('width', 0) - - def _end_width(self): - value = self.pop('width') - try: - value = int(value) - except: - value = 0 - if self.inimage: - context = self._getContext() - context['width'] = value - - def _start_height(self, attrsD): - self.push('height', 0) - - def _end_height(self): - value = self.pop('height') - try: - value = int(value) - except: - value = 0 - if self.inimage: - context = self._getContext() - context['height'] = value - - def _start_url(self, attrsD): - self.push('href', 1) - _start_homepage = _start_url - _start_uri = _start_url - - def _end_url(self): - value = self.pop('href') - if self.inauthor: - self._save_author('href', value) - elif self.incontributor: - self._save_contributor('href', value) - _end_homepage = _end_url - _end_uri = _end_url - - def _start_email(self, attrsD): - self.push('email', 0) - _start_itunes_email = _start_email - - def _end_email(self): - value = self.pop('email') - if self.inpublisher: - self._save_author('email', value, 'publisher') - elif self.inauthor: - self._save_author('email', value) - elif self.incontributor: - self._save_contributor('email', value) - _end_itunes_email = _end_email - - def _getContext(self): - if self.insource: - context = self.sourcedata - elif self.inimage and self.feeddata.has_key('image'): - context = self.feeddata['image'] - elif self.intextinput: - context = self.feeddata['textinput'] - elif self.inentry: - context = self.entries[-1] - else: - context = self.feeddata - return context - - def _save_author(self, key, value, prefix='author'): - context = self._getContext() - context.setdefault(prefix + '_detail', FeedParserDict()) - context[prefix + '_detail'][key] = value - self._sync_author_detail() - context.setdefault('authors', [FeedParserDict()]) - context['authors'][-1][key] = value - - def _save_contributor(self, key, value): - context = self._getContext() - context.setdefault('contributors', [FeedParserDict()]) - context['contributors'][-1][key] = value - - def _sync_author_detail(self, key='author'): - context = self._getContext() - detail = context.get('%s_detail' % key) - if detail: - name = detail.get('name') - email = detail.get('email') - if name and email: - context[key] = '%s (%s)' % (name, email) - elif name: - context[key] = name - elif email: - context[key] = email - else: - author, email = context.get(key), None - if not author: return - emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?''', author) - if emailmatch: - email = emailmatch.group(0) - # probably a better way to do the following, but it passes all the tests - author = author.replace(email, '') - author = author.replace('()', '') - author = author.replace('<>', '') - author = author.replace('<>', '') - author = author.strip() - if author and (author[0] == '('): - author = author[1:] - if author and (author[-1] == ')'): - author = author[:-1] - author = author.strip() - if author or email: - context.setdefault('%s_detail' % key, FeedParserDict()) - if author: - context['%s_detail' % key]['name'] = author - if email: - context['%s_detail' % key]['email'] = email - - def _start_subtitle(self, attrsD): - self.pushContent('subtitle', attrsD, 'text/plain', 1) - _start_tagline = _start_subtitle - _start_itunes_subtitle = _start_subtitle - - def _end_subtitle(self): - self.popContent('subtitle') - _end_tagline = _end_subtitle - _end_itunes_subtitle = _end_subtitle - - def _start_rights(self, attrsD): - self.pushContent('rights', attrsD, 'text/plain', 1) - _start_dc_rights = _start_rights - _start_copyright = _start_rights - - def _end_rights(self): - self.popContent('rights') - _end_dc_rights = _end_rights - _end_copyright = _end_rights - - def _start_item(self, attrsD): - self.entries.append(FeedParserDict()) - self.push('item', 0) - self.inentry = 1 - self.guidislink = 0 - self.hasTitle = 0 - id = self._getAttribute(attrsD, 'rdf:about') - if id: - context = self._getContext() - context['id'] = id - self._cdf_common(attrsD) - _start_entry = _start_item - _start_product = _start_item - - def _end_item(self): - self.pop('item') - self.inentry = 0 - _end_entry = _end_item - - def _start_dc_language(self, attrsD): - self.push('language', 1) - _start_language = _start_dc_language - - def _end_dc_language(self): - self.lang = self.pop('language') - _end_language = _end_dc_language - - def _start_dc_publisher(self, attrsD): - self.push('publisher', 1) - _start_webmaster = _start_dc_publisher - - def _end_dc_publisher(self): - self.pop('publisher') - self._sync_author_detail('publisher') - _end_webmaster = _end_dc_publisher - - def _start_published(self, attrsD): - self.push('published', 1) - _start_dcterms_issued = _start_published - _start_issued = _start_published - - def _end_published(self): - value = self.pop('published') - self._save('published_parsed', _parse_date(value), overwrite=True) - _end_dcterms_issued = _end_published - _end_issued = _end_published - - def _start_updated(self, attrsD): - self.push('updated', 1) - _start_modified = _start_updated - _start_dcterms_modified = _start_updated - _start_pubdate = _start_updated - _start_dc_date = _start_updated - _start_lastbuilddate = _start_updated - - def _end_updated(self): - value = self.pop('updated') - parsed_value = _parse_date(value) - self._save('updated_parsed', parsed_value, overwrite=True) - _end_modified = _end_updated - _end_dcterms_modified = _end_updated - _end_pubdate = _end_updated - _end_dc_date = _end_updated - _end_lastbuilddate = _end_updated - - def _start_created(self, attrsD): - self.push('created', 1) - _start_dcterms_created = _start_created - - def _end_created(self): - value = self.pop('created') - self._save('created_parsed', _parse_date(value), overwrite=True) - _end_dcterms_created = _end_created - - def _start_expirationdate(self, attrsD): - self.push('expired', 1) - - def _end_expirationdate(self): - self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True) - - def _start_cc_license(self, attrsD): - context = self._getContext() - value = self._getAttribute(attrsD, 'rdf:resource') - attrsD = FeedParserDict() - attrsD['rel']='license' - if value: attrsD['href']=value - context.setdefault('links', []).append(attrsD) - - def _start_creativecommons_license(self, attrsD): - self.push('license', 1) - _start_creativeCommons_license = _start_creativecommons_license - - def _end_creativecommons_license(self): - value = self.pop('license') - context = self._getContext() - attrsD = FeedParserDict() - attrsD['rel']='license' - if value: attrsD['href']=value - context.setdefault('links', []).append(attrsD) - del context['license'] - _end_creativeCommons_license = _end_creativecommons_license - - def _addXFN(self, relationships, href, name): - context = self._getContext() - xfn = context.setdefault('xfn', []) - value = FeedParserDict({'relationships': relationships, 'href': href, 'name': name}) - if value not in xfn: - xfn.append(value) - - def _addTag(self, term, scheme, label): - context = self._getContext() - tags = context.setdefault('tags', []) - if (not term) and (not scheme) and (not label): return - value = FeedParserDict({'term': term, 'scheme': scheme, 'label': label}) - if value not in tags: - tags.append(value) - - def _start_category(self, attrsD): - if _debug: sys.stderr.write('entering _start_category with %s\n' % repr(attrsD)) - term = attrsD.get('term') - scheme = attrsD.get('scheme', attrsD.get('domain')) - label = attrsD.get('label') - self._addTag(term, scheme, label) - self.push('category', 1) - _start_dc_subject = _start_category - _start_keywords = _start_category - - def _start_media_category(self, attrsD): - attrsD.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema') - self._start_category(attrsD) - - def _end_itunes_keywords(self): - for term in self.pop('itunes_keywords').split(): - self._addTag(term, 'http://www.itunes.com/', None) - - def _start_itunes_category(self, attrsD): - self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None) - self.push('category', 1) - - def _end_category(self): - value = self.pop('category') - if not value: return - context = self._getContext() - tags = context['tags'] - if value and len(tags) and not tags[-1]['term']: - tags[-1]['term'] = value - else: - self._addTag(value, None, None) - _end_dc_subject = _end_category - _end_keywords = _end_category - _end_itunes_category = _end_category - _end_media_category = _end_category - - def _start_cloud(self, attrsD): - self._getContext()['cloud'] = FeedParserDict(attrsD) - - def _start_link(self, attrsD): - attrsD.setdefault('rel', 'alternate') - if attrsD['rel'] == 'self': - attrsD.setdefault('type', 'application/atom+xml') - else: - attrsD.setdefault('type', 'text/html') - context = self._getContext() - attrsD = self._itsAnHrefDamnIt(attrsD) - if attrsD.has_key('href'): - attrsD['href'] = self.resolveURI(attrsD['href']) - expectingText = self.infeed or self.inentry or self.insource - context.setdefault('links', []) - if not (self.inentry and self.inimage): - context['links'].append(FeedParserDict(attrsD)) - if attrsD.has_key('href'): - expectingText = 0 - if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): - context['link'] = attrsD['href'] - else: - self.push('link', expectingText) - _start_producturl = _start_link - - def _end_link(self): - value = self.pop('link') - context = self._getContext() - _end_producturl = _end_link - - def _start_guid(self, attrsD): - self.guidislink = (attrsD.get('ispermalink', 'true') == 'true') - self.push('id', 1) - - def _end_guid(self): - value = self.pop('id') - self._save('guidislink', self.guidislink and not self._getContext().has_key('link')) - if self.guidislink: - # guid acts as link, but only if 'ispermalink' is not present or is 'true', - # and only if the item doesn't already have a link element - self._save('link', value) - - def _start_title(self, attrsD): - if self.svgOK: return self.unknown_starttag('title', attrsD.items()) - self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) - _start_dc_title = _start_title - _start_media_title = _start_title - - def _end_title(self): - if self.svgOK: return - value = self.popContent('title') - if not value: return - context = self._getContext() - self.hasTitle = 1 - _end_dc_title = _end_title - - def _end_media_title(self): - hasTitle = self.hasTitle - self._end_title() - self.hasTitle = hasTitle - - def _start_description(self, attrsD): - context = self._getContext() - if context.has_key('summary'): - self._summaryKey = 'content' - self._start_content(attrsD) - else: - self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource) - _start_dc_description = _start_description - - def _start_abstract(self, attrsD): - self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) - - def _end_description(self): - if self._summaryKey == 'content': - self._end_content() - else: - value = self.popContent('description') - self._summaryKey = None - _end_abstract = _end_description - _end_dc_description = _end_description - - def _start_info(self, attrsD): - self.pushContent('info', attrsD, 'text/plain', 1) - _start_feedburner_browserfriendly = _start_info - - def _end_info(self): - self.popContent('info') - _end_feedburner_browserfriendly = _end_info - - def _start_generator(self, attrsD): - if attrsD: - attrsD = self._itsAnHrefDamnIt(attrsD) - if attrsD.has_key('href'): - attrsD['href'] = self.resolveURI(attrsD['href']) - self._getContext()['generator_detail'] = FeedParserDict(attrsD) - self.push('generator', 1) - - def _end_generator(self): - value = self.pop('generator') - context = self._getContext() - if context.has_key('generator_detail'): - context['generator_detail']['name'] = value - - def _start_admin_generatoragent(self, attrsD): - self.push('generator', 1) - value = self._getAttribute(attrsD, 'rdf:resource') - if value: - self.elementstack[-1][2].append(value) - self.pop('generator') - self._getContext()['generator_detail'] = FeedParserDict({'href': value}) - - def _start_admin_errorreportsto(self, attrsD): - self.push('errorreportsto', 1) - value = self._getAttribute(attrsD, 'rdf:resource') - if value: - self.elementstack[-1][2].append(value) - self.pop('errorreportsto') - - def _start_summary(self, attrsD): - context = self._getContext() - if context.has_key('summary'): - self._summaryKey = 'content' - self._start_content(attrsD) - else: - self._summaryKey = 'summary' - self.pushContent(self._summaryKey, attrsD, 'text/plain', 1) - _start_itunes_summary = _start_summary - - def _end_summary(self): - if self._summaryKey == 'content': - self._end_content() - else: - self.popContent(self._summaryKey or 'summary') - self._summaryKey = None - _end_itunes_summary = _end_summary - - def _start_enclosure(self, attrsD): - attrsD = self._itsAnHrefDamnIt(attrsD) - context = self._getContext() - attrsD['rel']='enclosure' - context.setdefault('links', []).append(FeedParserDict(attrsD)) - - def _start_source(self, attrsD): - if 'url' in attrsD: - # This means that we're processing a source element from an RSS 2.0 feed - self.sourcedata['href'] = attrsD[u'url'] - self.push('source', 1) - self.insource = 1 - self.hasTitle = 0 - - def _end_source(self): - self.insource = 0 - value = self.pop('source') - if value: - self.sourcedata['title'] = value - self._getContext()['source'] = copy.deepcopy(self.sourcedata) - self.sourcedata.clear() - - def _start_content(self, attrsD): - self.pushContent('content', attrsD, 'text/plain', 1) - src = attrsD.get('src') - if src: - self.contentparams['src'] = src - self.push('content', 1) - - def _start_prodlink(self, attrsD): - self.pushContent('content', attrsD, 'text/html', 1) - - def _start_body(self, attrsD): - self.pushContent('content', attrsD, 'application/xhtml+xml', 1) - _start_xhtml_body = _start_body - - def _start_content_encoded(self, attrsD): - self.pushContent('content', attrsD, 'text/html', 1) - _start_fullitem = _start_content_encoded - - def _end_content(self): - copyToSummary = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types) - value = self.popContent('content') - if copyToSummary: - self._save('summary', value) - - _end_body = _end_content - _end_xhtml_body = _end_content - _end_content_encoded = _end_content - _end_fullitem = _end_content - _end_prodlink = _end_content - - def _start_itunes_image(self, attrsD): - self.push('itunes_image', 0) - if attrsD.get('href'): - self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')}) - _start_itunes_link = _start_itunes_image - - def _end_itunes_block(self): - value = self.pop('itunes_block', 0) - self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0 - - def _end_itunes_explicit(self): - value = self.pop('itunes_explicit', 0) - # Convert 'yes' -> True, 'clean' to False, and any other value to None - # False and None both evaluate as False, so the difference can be ignored - # by applications that only need to know if the content is explicit. - self._getContext()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0] - - def _start_media_content(self, attrsD): - context = self._getContext() - context.setdefault('media_content', []) - context['media_content'].append(attrsD) - - def _start_media_thumbnail(self, attrsD): - context = self._getContext() - context.setdefault('media_thumbnail', []) - self.push('url', 1) # new - context['media_thumbnail'].append(attrsD) - - def _end_media_thumbnail(self): - url = self.pop('url') - context = self._getContext() - if url != None and len(url.strip()) != 0: - if not context['media_thumbnail'][-1].has_key('url'): - context['media_thumbnail'][-1]['url'] = url - - def _start_media_player(self, attrsD): - self.push('media_player', 0) - self._getContext()['media_player'] = FeedParserDict(attrsD) - - def _end_media_player(self): - value = self.pop('media_player') - context = self._getContext() - context['media_player']['content'] = value - - def _start_newlocation(self, attrsD): - self.push('newlocation', 1) - - def _end_newlocation(self): - url = self.pop('newlocation') - context = self._getContext() - # don't set newlocation if the context isn't right - if context is not self.feeddata: - return - context['newlocation'] = _makeSafeAbsoluteURI(self.baseuri, url.strip()) - -if _XML_AVAILABLE: - class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler): - def __init__(self, baseuri, baselang, encoding): - if _debug: sys.stderr.write('trying StrictFeedParser\n') - xml.sax.handler.ContentHandler.__init__(self) - _FeedParserMixin.__init__(self, baseuri, baselang, encoding) - self.bozo = 0 - self.exc = None - self.decls = {} - - def startPrefixMapping(self, prefix, uri): - self.trackNamespace(prefix, uri) - if uri == 'http://www.w3.org/1999/xlink': - self.decls['xmlns:'+prefix] = uri - - def startElementNS(self, name, qname, attrs): - namespace, localname = name - lowernamespace = str(namespace or '').lower() - if lowernamespace.find('backend.userland.com/rss') <> -1: - # match any backend.userland.com namespace - namespace = 'http://backend.userland.com/rss' - lowernamespace = namespace - if qname and qname.find(':') > 0: - givenprefix = qname.split(':')[0] - else: - givenprefix = None - prefix = self._matchnamespaces.get(lowernamespace, givenprefix) - if givenprefix and (prefix is None or (prefix == '' and lowernamespace == '')) and not self.namespacesInUse.has_key(givenprefix): - raise UndeclaredNamespace, "'%s' is not associated with a namespace" % givenprefix - localname = str(localname).lower() - - # qname implementation is horribly broken in Python 2.1 (it - # doesn't report any), and slightly broken in Python 2.2 (it - # doesn't report the xml: namespace). So we match up namespaces - # with a known list first, and then possibly override them with - # the qnames the SAX parser gives us (if indeed it gives us any - # at all). Thanks to MatejC for helping me test this and - # tirelessly telling me that it didn't work yet. - attrsD, self.decls = self.decls, {} - if localname=='math' and namespace=='http://www.w3.org/1998/Math/MathML': - attrsD['xmlns']=namespace - if localname=='svg' and namespace=='http://www.w3.org/2000/svg': - attrsD['xmlns']=namespace - - if prefix: - localname = prefix.lower() + ':' + localname - elif namespace and not qname: #Expat - for name,value in self.namespacesInUse.items(): - if name and value == namespace: - localname = name + ':' + localname - break - if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname)) - - for (namespace, attrlocalname), attrvalue in attrs._attrs.items(): - lowernamespace = (namespace or '').lower() - prefix = self._matchnamespaces.get(lowernamespace, '') - if prefix: - attrlocalname = prefix + ':' + attrlocalname - attrsD[str(attrlocalname).lower()] = attrvalue - for qname in attrs.getQNames(): - attrsD[str(qname).lower()] = attrs.getValueByQName(qname) - self.unknown_starttag(localname, attrsD.items()) - - def characters(self, text): - self.handle_data(text) - - def endElementNS(self, name, qname): - namespace, localname = name - lowernamespace = str(namespace or '').lower() - if qname and qname.find(':') > 0: - givenprefix = qname.split(':')[0] - else: - givenprefix = '' - prefix = self._matchnamespaces.get(lowernamespace, givenprefix) - if prefix: - localname = prefix + ':' + localname - elif namespace and not qname: #Expat - for name,value in self.namespacesInUse.items(): - if name and value == namespace: - localname = name + ':' + localname - break - localname = str(localname).lower() - self.unknown_endtag(localname) - - def error(self, exc): - self.bozo = 1 - self.exc = exc - - def fatalError(self, exc): - self.error(exc) - raise exc - -class _BaseHTMLProcessor(sgmllib.SGMLParser): - special = re.compile('''[<>'"]''') - bare_ampersand = re.compile("&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)") - elements_no_end_tag = [ - 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', - 'hr', 'img', 'input', 'isindex', 'keygen', 'link', 'meta', 'param', - 'source', 'track', 'wbr' - ] - - def __init__(self, encoding, _type): - self.encoding = encoding - self._type = _type - if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding) - sgmllib.SGMLParser.__init__(self) - - def reset(self): - self.pieces = [] - sgmllib.SGMLParser.reset(self) - - def _shorttag_replace(self, match): - tag = match.group(1) - if tag in self.elements_no_end_tag: - return '<' + tag + ' />' - else: - return '<' + tag + '>' - - def parse_starttag(self,i): - j=sgmllib.SGMLParser.parse_starttag(self, i) - if self._type == 'application/xhtml+xml': - if j>2 and self.rawdata[j-2:j]=='/>': - self.unknown_endtag(self.lasttag) - return j - - def feed(self, data): - data = re.compile(r'', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace - data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) - data = data.replace(''', "'") - data = data.replace('"', '"') - try: - bytes - if bytes is str: - raise NameError - self.encoding = self.encoding + '_INVALID_PYTHON_3' - except NameError: - if self.encoding and type(data) == type(u''): - data = data.encode(self.encoding) - sgmllib.SGMLParser.feed(self, data) - sgmllib.SGMLParser.close(self) - - def normalize_attrs(self, attrs): - if not attrs: return attrs - # utility method to be called by descendants - attrs = dict([(k.lower(), v) for k, v in attrs]).items() - attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] - attrs.sort() - return attrs - - def unknown_starttag(self, tag, attrs): - # called for each start tag - # attrs is a list of (attr, value) tuples - # e.g. for
, tag='pre', attrs=[('class', 'screen')]
-        if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
-        uattrs = []
-        strattrs=''
-        if attrs:
-            for key, value in attrs:
-                value=value.replace('>','>').replace('<','<').replace('"','"')
-                value = self.bare_ampersand.sub("&", value)
-                # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
-                if type(value) != type(u''):
-                    try:
-                        value = unicode(value, self.encoding)
-                    except:
-                        value = unicode(value, 'iso-8859-1')
-                try:
-                    # Currently, in Python 3 the key is already a str, and cannot be decoded again
-                    uattrs.append((unicode(key, self.encoding), value))
-                except TypeError:
-                    uattrs.append((key, value))
-            strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs])
-            if self.encoding:
-                try:
-                    strattrs=strattrs.encode(self.encoding)
-                except:
-                    pass
-        if tag in self.elements_no_end_tag:
-            self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
-        else:
-            self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
-
-    def unknown_endtag(self, tag):
-        # called for each end tag, e.g. for 
, tag will be 'pre' - # Reconstruct the original end tag. - if tag not in self.elements_no_end_tag: - self.pieces.append("" % locals()) - - def handle_charref(self, ref): - # called for each character reference, e.g. for ' ', ref will be '160' - # Reconstruct the original character reference. - if ref.startswith('x'): - value = unichr(int(ref[1:],16)) - else: - value = unichr(int(ref)) - - if value in _cp1252.keys(): - self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:]) - else: - self.pieces.append('&#%(ref)s;' % locals()) - - def handle_entityref(self, ref): - # called for each entity reference, e.g. for '©', ref will be 'copy' - # Reconstruct the original entity reference. - if name2codepoint.has_key(ref): - self.pieces.append('&%(ref)s;' % locals()) - else: - self.pieces.append('&%(ref)s' % locals()) - - def handle_data(self, text): - # called for each block of plain text, i.e. outside of any tag and - # not containing any character or entity references - # Store the original text verbatim. - if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_data, text=%s\n' % text) - self.pieces.append(text) - - def handle_comment(self, text): - # called for each HTML comment, e.g. - # Reconstruct the original comment. - self.pieces.append('' % locals()) - - def handle_pi(self, text): - # called for each processing instruction, e.g. - # Reconstruct original processing instruction. - self.pieces.append('' % locals()) - - def handle_decl(self, text): - # called for the DOCTYPE, if present, e.g. - # - # Reconstruct original DOCTYPE - self.pieces.append('' % locals()) - - _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match - def _scan_name(self, i, declstartpos): - rawdata = self.rawdata - n = len(rawdata) - if i == n: - return None, -1 - m = self._new_declname_match(rawdata, i) - if m: - s = m.group() - name = s.strip() - if (i + len(s)) == n: - return None, -1 # end of buffer - return name.lower(), m.end() - else: - self.handle_data(rawdata) -# self.updatepos(declstartpos, i) - return None, -1 - - def convert_charref(self, name): - return '&#%s;' % name - - def convert_entityref(self, name): - return '&%s;' % name - - def output(self): - '''Return processed HTML as a single string''' - return ''.join([str(p) for p in self.pieces]) - - def parse_declaration(self, i): - try: - return sgmllib.SGMLParser.parse_declaration(self, i) - except sgmllib.SGMLParseError: - # escape the doctype declaration and continue parsing - self.handle_data('<') - return i+1 - -class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor): - def __init__(self, baseuri, baselang, encoding, entities): - sgmllib.SGMLParser.__init__(self) - _FeedParserMixin.__init__(self, baseuri, baselang, encoding) - _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml') - self.entities=entities - - def decodeEntities(self, element, data): - data = data.replace('<', '<') - data = data.replace('<', '<') - data = data.replace('<', '<') - data = data.replace('>', '>') - data = data.replace('>', '>') - data = data.replace('>', '>') - data = data.replace('&', '&') - data = data.replace('&', '&') - data = data.replace('"', '"') - data = data.replace('"', '"') - data = data.replace(''', ''') - data = data.replace(''', ''') - if self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): - data = data.replace('<', '<') - data = data.replace('>', '>') - data = data.replace('&', '&') - data = data.replace('"', '"') - data = data.replace(''', "'") - return data - - def strattrs(self, attrs): - return ''.join([' %s="%s"' % (n,v.replace('"','"')) for n,v in attrs]) - -class _MicroformatsParser: - STRING = 1 - DATE = 2 - URI = 3 - NODE = 4 - EMAIL = 5 - - known_xfn_relationships = ['contact', 'acquaintance', 'friend', 'met', 'co-worker', 'coworker', 'colleague', 'co-resident', 'coresident', 'neighbor', 'child', 'parent', 'sibling', 'brother', 'sister', 'spouse', 'wife', 'husband', 'kin', 'relative', 'muse', 'crush', 'date', 'sweetheart', 'me'] - known_binary_extensions = ['zip','rar','exe','gz','tar','tgz','tbz2','bz2','z','7z','dmg','img','sit','sitx','hqx','deb','rpm','bz2','jar','rar','iso','bin','msi','mp2','mp3','ogg','ogm','mp4','m4v','m4a','avi','wma','wmv'] - - def __init__(self, data, baseuri, encoding): - self.document = BeautifulSoup.BeautifulSoup(data) - self.baseuri = baseuri - self.encoding = encoding - if type(data) == type(u''): - data = data.encode(encoding) - self.tags = [] - self.enclosures = [] - self.xfn = [] - self.vcard = None - - def vcardEscape(self, s): - if type(s) in (type(''), type(u'')): - s = s.replace(',', '\\,').replace(';', '\\;').replace('\n', '\\n') - return s - - def vcardFold(self, s): - s = re.sub(';+$', '', s) - sFolded = '' - iMax = 75 - sPrefix = '' - while len(s) > iMax: - sFolded += sPrefix + s[:iMax] + '\n' - s = s[iMax:] - sPrefix = ' ' - iMax = 74 - sFolded += sPrefix + s - return sFolded - - def normalize(self, s): - return re.sub(r'\s+', ' ', s).strip() - - def unique(self, aList): - results = [] - for element in aList: - if element not in results: - results.append(element) - return results - - def toISO8601(self, dt): - return time.strftime('%Y-%m-%dT%H:%M:%SZ', dt) - - def getPropertyValue(self, elmRoot, sProperty, iPropertyType=4, bAllowMultiple=0, bAutoEscape=0): - all = lambda x: 1 - sProperty = sProperty.lower() - bFound = 0 - bNormalize = 1 - propertyMatch = {'class': re.compile(r'\b%s\b' % sProperty)} - if bAllowMultiple and (iPropertyType != self.NODE): - snapResults = [] - containers = elmRoot(['ul', 'ol'], propertyMatch) - for container in containers: - snapResults.extend(container('li')) - bFound = (len(snapResults) != 0) - if not bFound: - snapResults = elmRoot(all, propertyMatch) - bFound = (len(snapResults) != 0) - if (not bFound) and (sProperty == 'value'): - snapResults = elmRoot('pre') - bFound = (len(snapResults) != 0) - bNormalize = not bFound - if not bFound: - snapResults = [elmRoot] - bFound = (len(snapResults) != 0) - arFilter = [] - if sProperty == 'vcard': - snapFilter = elmRoot(all, propertyMatch) - for node in snapFilter: - if node.findParent(all, propertyMatch): - arFilter.append(node) - arResults = [] - for node in snapResults: - if node not in arFilter: - arResults.append(node) - bFound = (len(arResults) != 0) - if not bFound: - if bAllowMultiple: return [] - elif iPropertyType == self.STRING: return '' - elif iPropertyType == self.DATE: return None - elif iPropertyType == self.URI: return '' - elif iPropertyType == self.NODE: return None - else: return None - arValues = [] - for elmResult in arResults: - sValue = None - if iPropertyType == self.NODE: - if bAllowMultiple: - arValues.append(elmResult) - continue - else: - return elmResult - sNodeName = elmResult.name.lower() - if (iPropertyType == self.EMAIL) and (sNodeName == 'a'): - sValue = (elmResult.get('href') or '').split('mailto:').pop().split('?')[0] - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if (not sValue) and (sNodeName == 'abbr'): - sValue = elmResult.get('title') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if (not sValue) and (iPropertyType == self.URI): - if sNodeName == 'a': sValue = elmResult.get('href') - elif sNodeName == 'img': sValue = elmResult.get('src') - elif sNodeName == 'object': sValue = elmResult.get('data') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if (not sValue) and (sNodeName == 'img'): - sValue = elmResult.get('alt') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if not sValue: - sValue = elmResult.renderContents() - sValue = re.sub(r'<\S[^>]*>', '', sValue) - sValue = sValue.replace('\r\n', '\n') - sValue = sValue.replace('\r', '\n') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if not sValue: continue - if iPropertyType == self.DATE: - sValue = _parse_date_iso8601(sValue) - if bAllowMultiple: - arValues.append(bAutoEscape and self.vcardEscape(sValue) or sValue) - else: - return bAutoEscape and self.vcardEscape(sValue) or sValue - return arValues - - def findVCards(self, elmRoot, bAgentParsing=0): - sVCards = '' - - if not bAgentParsing: - arCards = self.getPropertyValue(elmRoot, 'vcard', bAllowMultiple=1) - else: - arCards = [elmRoot] - - for elmCard in arCards: - arLines = [] - - def processSingleString(sProperty): - sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1).decode(self.encoding) - if sValue: - arLines.append(self.vcardFold(sProperty.upper() + ':' + sValue)) - return sValue or u'' - - def processSingleURI(sProperty): - sValue = self.getPropertyValue(elmCard, sProperty, self.URI) - if sValue: - sContentType = '' - sEncoding = '' - sValueKey = '' - if sValue.startswith('data:'): - sEncoding = ';ENCODING=b' - sContentType = sValue.split(';')[0].split('/').pop() - sValue = sValue.split(',', 1).pop() - else: - elmValue = self.getPropertyValue(elmCard, sProperty) - if elmValue: - if sProperty != 'url': - sValueKey = ';VALUE=uri' - sContentType = elmValue.get('type', '').strip().split('/').pop().strip() - sContentType = sContentType.upper() - if sContentType == 'OCTET-STREAM': - sContentType = '' - if sContentType: - sContentType = ';TYPE=' + sContentType.upper() - arLines.append(self.vcardFold(sProperty.upper() + sEncoding + sContentType + sValueKey + ':' + sValue)) - - def processTypeValue(sProperty, arDefaultType, arForceType=None): - arResults = self.getPropertyValue(elmCard, sProperty, bAllowMultiple=1) - for elmResult in arResults: - arType = self.getPropertyValue(elmResult, 'type', self.STRING, 1, 1) - if arForceType: - arType = self.unique(arForceType + arType) - if not arType: - arType = arDefaultType - sValue = self.getPropertyValue(elmResult, 'value', self.EMAIL, 0) - if sValue: - arLines.append(self.vcardFold(sProperty.upper() + ';TYPE=' + ','.join(arType) + ':' + sValue)) - - # AGENT - # must do this before all other properties because it is destructive - # (removes nested class="vcard" nodes so they don't interfere with - # this vcard's other properties) - arAgent = self.getPropertyValue(elmCard, 'agent', bAllowMultiple=1) - for elmAgent in arAgent: - if re.compile(r'\bvcard\b').search(elmAgent.get('class')): - sAgentValue = self.findVCards(elmAgent, 1) + '\n' - sAgentValue = sAgentValue.replace('\n', '\\n') - sAgentValue = sAgentValue.replace(';', '\\;') - if sAgentValue: - arLines.append(self.vcardFold('AGENT:' + sAgentValue)) - # Completely remove the agent element from the parse tree - elmAgent.extract() - else: - sAgentValue = self.getPropertyValue(elmAgent, 'value', self.URI, bAutoEscape=1); - if sAgentValue: - arLines.append(self.vcardFold('AGENT;VALUE=uri:' + sAgentValue)) - - # FN (full name) - sFN = processSingleString('fn') - - # N (name) - elmName = self.getPropertyValue(elmCard, 'n') - if elmName: - sFamilyName = self.getPropertyValue(elmName, 'family-name', self.STRING, bAutoEscape=1) - sGivenName = self.getPropertyValue(elmName, 'given-name', self.STRING, bAutoEscape=1) - arAdditionalNames = self.getPropertyValue(elmName, 'additional-name', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'additional-names', self.STRING, 1, 1) - arHonorificPrefixes = self.getPropertyValue(elmName, 'honorific-prefix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-prefixes', self.STRING, 1, 1) - arHonorificSuffixes = self.getPropertyValue(elmName, 'honorific-suffix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-suffixes', self.STRING, 1, 1) - arLines.append(self.vcardFold('N:' + sFamilyName + ';' + - sGivenName + ';' + - ','.join(arAdditionalNames) + ';' + - ','.join(arHonorificPrefixes) + ';' + - ','.join(arHonorificSuffixes))) - elif sFN: - # implied "N" optimization - # http://microformats.org/wiki/hcard#Implied_.22N.22_Optimization - arNames = self.normalize(sFN).split() - if len(arNames) == 2: - bFamilyNameFirst = (arNames[0].endswith(',') or - len(arNames[1]) == 1 or - ((len(arNames[1]) == 2) and (arNames[1].endswith('.')))) - if bFamilyNameFirst: - arLines.append(self.vcardFold('N:' + arNames[0] + ';' + arNames[1])) - else: - arLines.append(self.vcardFold('N:' + arNames[1] + ';' + arNames[0])) - - # SORT-STRING - sSortString = self.getPropertyValue(elmCard, 'sort-string', self.STRING, bAutoEscape=1) - if sSortString: - arLines.append(self.vcardFold('SORT-STRING:' + sSortString)) - - # NICKNAME - arNickname = self.getPropertyValue(elmCard, 'nickname', self.STRING, 1, 1) - if arNickname: - arLines.append(self.vcardFold('NICKNAME:' + ','.join(arNickname))) - - # PHOTO - processSingleURI('photo') - - # BDAY - dtBday = self.getPropertyValue(elmCard, 'bday', self.DATE) - if dtBday: - arLines.append(self.vcardFold('BDAY:' + self.toISO8601(dtBday))) - - # ADR (address) - arAdr = self.getPropertyValue(elmCard, 'adr', bAllowMultiple=1) - for elmAdr in arAdr: - arType = self.getPropertyValue(elmAdr, 'type', self.STRING, 1, 1) - if not arType: - arType = ['intl','postal','parcel','work'] # default adr types, see RFC 2426 section 3.2.1 - sPostOfficeBox = self.getPropertyValue(elmAdr, 'post-office-box', self.STRING, 0, 1) - sExtendedAddress = self.getPropertyValue(elmAdr, 'extended-address', self.STRING, 0, 1) - sStreetAddress = self.getPropertyValue(elmAdr, 'street-address', self.STRING, 0, 1) - sLocality = self.getPropertyValue(elmAdr, 'locality', self.STRING, 0, 1) - sRegion = self.getPropertyValue(elmAdr, 'region', self.STRING, 0, 1) - sPostalCode = self.getPropertyValue(elmAdr, 'postal-code', self.STRING, 0, 1) - sCountryName = self.getPropertyValue(elmAdr, 'country-name', self.STRING, 0, 1) - arLines.append(self.vcardFold('ADR;TYPE=' + ','.join(arType) + ':' + - sPostOfficeBox + ';' + - sExtendedAddress + ';' + - sStreetAddress + ';' + - sLocality + ';' + - sRegion + ';' + - sPostalCode + ';' + - sCountryName)) - - # LABEL - processTypeValue('label', ['intl','postal','parcel','work']) - - # TEL (phone number) - processTypeValue('tel', ['voice']) - - # EMAIL - processTypeValue('email', ['internet'], ['internet']) - - # MAILER - processSingleString('mailer') - - # TZ (timezone) - processSingleString('tz') - - # GEO (geographical information) - elmGeo = self.getPropertyValue(elmCard, 'geo') - if elmGeo: - sLatitude = self.getPropertyValue(elmGeo, 'latitude', self.STRING, 0, 1) - sLongitude = self.getPropertyValue(elmGeo, 'longitude', self.STRING, 0, 1) - arLines.append(self.vcardFold('GEO:' + sLatitude + ';' + sLongitude)) - - # TITLE - processSingleString('title') - - # ROLE - processSingleString('role') - - # LOGO - processSingleURI('logo') - - # ORG (organization) - elmOrg = self.getPropertyValue(elmCard, 'org') - if elmOrg: - sOrganizationName = self.getPropertyValue(elmOrg, 'organization-name', self.STRING, 0, 1) - if not sOrganizationName: - # implied "organization-name" optimization - # http://microformats.org/wiki/hcard#Implied_.22organization-name.22_Optimization - sOrganizationName = self.getPropertyValue(elmCard, 'org', self.STRING, 0, 1) - if sOrganizationName: - arLines.append(self.vcardFold('ORG:' + sOrganizationName)) - else: - arOrganizationUnit = self.getPropertyValue(elmOrg, 'organization-unit', self.STRING, 1, 1) - arLines.append(self.vcardFold('ORG:' + sOrganizationName + ';' + ';'.join(arOrganizationUnit))) - - # CATEGORY - arCategory = self.getPropertyValue(elmCard, 'category', self.STRING, 1, 1) + self.getPropertyValue(elmCard, 'categories', self.STRING, 1, 1) - if arCategory: - arLines.append(self.vcardFold('CATEGORIES:' + ','.join(arCategory))) - - # NOTE - processSingleString('note') - - # REV - processSingleString('rev') - - # SOUND - processSingleURI('sound') - - # UID - processSingleString('uid') - - # URL - processSingleURI('url') - - # CLASS - processSingleString('class') - - # KEY - processSingleURI('key') - - if arLines: - arLines = [u'BEGIN:vCard',u'VERSION:3.0'] + arLines + [u'END:vCard'] - sVCards += u'\n'.join(arLines) + u'\n' - - return sVCards.strip() - - def isProbablyDownloadable(self, elm): - attrsD = elm.attrMap - if not attrsD.has_key('href'): return 0 - linktype = attrsD.get('type', '').strip() - if linktype.startswith('audio/') or \ - linktype.startswith('video/') or \ - (linktype.startswith('application/') and not linktype.endswith('xml')): - return 1 - path = urlparse.urlparse(attrsD['href'])[2] - if path.find('.') == -1: return 0 - fileext = path.split('.').pop().lower() - return fileext in self.known_binary_extensions - - def findTags(self): - all = lambda x: 1 - for elm in self.document(all, {'rel': re.compile(r'\btag\b')}): - href = elm.get('href') - if not href: continue - urlscheme, domain, path, params, query, fragment = \ - urlparse.urlparse(_urljoin(self.baseuri, href)) - segments = path.split('/') - tag = segments.pop() - if not tag: - tag = segments.pop() - tagscheme = urlparse.urlunparse((urlscheme, domain, '/'.join(segments), '', '', '')) - if not tagscheme.endswith('/'): - tagscheme += '/' - self.tags.append(FeedParserDict({"term": tag, "scheme": tagscheme, "label": elm.string or ''})) - - def findEnclosures(self): - all = lambda x: 1 - enclosure_match = re.compile(r'\benclosure\b') - for elm in self.document(all, {'href': re.compile(r'.+')}): - if not enclosure_match.search(elm.get('rel', '')) and not self.isProbablyDownloadable(elm): continue - if elm.attrMap not in self.enclosures: - self.enclosures.append(elm.attrMap) - if elm.string and not elm.get('title'): - self.enclosures[-1]['title'] = elm.string - - def findXFN(self): - all = lambda x: 1 - for elm in self.document(all, {'rel': re.compile('.+'), 'href': re.compile('.+')}): - rels = elm.get('rel', '').split() - xfn_rels = [] - for rel in rels: - if rel in self.known_xfn_relationships: - xfn_rels.append(rel) - if xfn_rels: - self.xfn.append({"relationships": xfn_rels, "href": elm.get('href', ''), "name": elm.string}) - -def _parseMicroformats(htmlSource, baseURI, encoding): - if not BeautifulSoup: return - if _debug: sys.stderr.write('entering _parseMicroformats\n') - try: - p = _MicroformatsParser(htmlSource, baseURI, encoding) - except UnicodeEncodeError: - # sgmllib throws this exception when performing lookups of tags - # with non-ASCII characters in them. - return - p.vcard = p.findVCards(p.document) - p.findTags() - p.findEnclosures() - p.findXFN() - return {"tags": p.tags, "enclosures": p.enclosures, "xfn": p.xfn, "vcard": p.vcard} - -class _RelativeURIResolver(_BaseHTMLProcessor): - relative_uris = [('a', 'href'), - ('applet', 'codebase'), - ('area', 'href'), - ('blockquote', 'cite'), - ('body', 'background'), - ('del', 'cite'), - ('form', 'action'), - ('frame', 'longdesc'), - ('frame', 'src'), - ('iframe', 'longdesc'), - ('iframe', 'src'), - ('head', 'profile'), - ('img', 'longdesc'), - ('img', 'src'), - ('img', 'usemap'), - ('input', 'src'), - ('input', 'usemap'), - ('ins', 'cite'), - ('link', 'href'), - ('object', 'classid'), - ('object', 'codebase'), - ('object', 'data'), - ('object', 'usemap'), - ('q', 'cite'), - ('script', 'src')] - - def __init__(self, baseuri, encoding, _type): - _BaseHTMLProcessor.__init__(self, encoding, _type) - self.baseuri = baseuri - - def resolveURI(self, uri): - return _makeSafeAbsoluteURI(_urljoin(self.baseuri, uri.strip())) - - def unknown_starttag(self, tag, attrs): - if _debug: - sys.stderr.write('tag: [%s] with attributes: [%s]\n' % (tag, str(attrs))) - attrs = self.normalize_attrs(attrs) - attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs] - _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) - -def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type): - if _debug: - sys.stderr.write('entering _resolveRelativeURIs\n') - - p = _RelativeURIResolver(baseURI, encoding, _type) - p.feed(htmlSource) - return p.output() - -def _makeSafeAbsoluteURI(base, rel=None): - # bail if ACCEPTABLE_URI_SCHEMES is empty - if not ACCEPTABLE_URI_SCHEMES: - return _urljoin(base, rel or u'') - if not base: - return rel or u'' - if not rel: - scheme = urlparse.urlparse(base)[0] - if not scheme or scheme in ACCEPTABLE_URI_SCHEMES: - return base - return u'' - uri = _urljoin(base, rel) - if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES: - return u'' - return uri - -class _HTMLSanitizer(_BaseHTMLProcessor): - acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', - 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', - 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', - 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', - 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', - 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1', - 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', - 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', - 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', - 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', - 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', - 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', - 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript'] - - acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', - 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis', - 'background', 'balance', 'bgcolor', 'bgproperties', 'border', - 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding', - 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff', - 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', 'cols', - 'colspan', 'compact', 'contenteditable', 'controls', 'coords', 'data', - 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', 'delay', - 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', 'face', 'for', - 'form', 'frame', 'galleryimg', 'gutter', 'headers', 'height', 'hidefocus', - 'hidden', 'high', 'href', 'hreflang', 'hspace', 'icon', 'id', 'inputmode', - 'ismap', 'keytype', 'label', 'leftspacing', 'lang', 'list', 'longdesc', - 'loop', 'loopcount', 'loopend', 'loopstart', 'low', 'lowsrc', 'max', - 'maxlength', 'media', 'method', 'min', 'multiple', 'name', 'nohref', - 'noshade', 'nowrap', 'open', 'optimum', 'pattern', 'ping', 'point-size', - 'prompt', 'pqg', 'radiogroup', 'readonly', 'rel', 'repeat-max', - 'repeat-min', 'replace', 'required', 'rev', 'rightspacing', 'rows', - 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', - 'start', 'step', 'summary', 'suppress', 'tabindex', 'target', 'template', - 'title', 'toppadding', 'type', 'unselectable', 'usemap', 'urn', 'valign', - 'value', 'variable', 'volume', 'vspace', 'vrml', 'width', 'wrap', - 'xml:lang'] - - unacceptable_elements_with_end_tag = ['script', 'applet', 'style'] - - acceptable_css_properties = ['azimuth', 'background-color', - 'border-bottom-color', 'border-collapse', 'border-color', - 'border-left-color', 'border-right-color', 'border-top-color', 'clear', - 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', - 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', - 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', - 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', - 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', - 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', - 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', - 'white-space', 'width'] - - # survey of common keywords found in feeds - acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue', - 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', - 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', - 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', - 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', - 'transparent', 'underline', 'white', 'yellow'] - - valid_css_values = re.compile('^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' + - '\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$') - - mathml_elements = ['annotation', 'annotation-xml', 'maction', 'math', - 'merror', 'mfenced', 'mfrac', 'mi', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', - 'mphantom', 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', - 'msub', 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', - 'munderover', 'none', 'semantics'] - - mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign', - 'columnalign', 'close', 'columnlines', 'columnspacing', 'columnspan', 'depth', - 'display', 'displaystyle', 'encoding', 'equalcolumns', 'equalrows', - 'fence', 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', - 'lspace', 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', - 'maxsize', 'minsize', 'open', 'other', 'rowalign', 'rowalign', 'rowalign', - 'rowlines', 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection', - 'separator', 'separators', 'stretchy', 'width', 'width', 'xlink:href', - 'xlink:show', 'xlink:type', 'xmlns', 'xmlns:xlink'] - - # svgtiny - foreignObject + linearGradient + radialGradient + stop - svg_elements = ['a', 'animate', 'animateColor', 'animateMotion', - 'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'foreignObject', - 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', - 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', 'mpath', - 'path', 'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop', - 'svg', 'switch', 'text', 'title', 'tspan', 'use'] - - # svgtiny + class + opacity + offset + xmlns + xmlns:xlink - svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic', - 'arabic-form', 'ascent', 'attributeName', 'attributeType', - 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height', - 'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd', 'dx', - 'dy', 'descent', 'display', 'dur', 'end', 'fill', 'fill-opacity', - 'fill-rule', 'font-family', 'font-size', 'font-stretch', 'font-style', - 'font-variant', 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', - 'glyph-name', 'gradientUnits', 'hanging', 'height', 'horiz-adv-x', - 'horiz-origin-x', 'id', 'ideographic', 'k', 'keyPoints', 'keySplines', - 'keyTimes', 'lang', 'mathematical', 'marker-end', 'marker-mid', - 'marker-start', 'markerHeight', 'markerUnits', 'markerWidth', 'max', - 'min', 'name', 'offset', 'opacity', 'orient', 'origin', - 'overline-position', 'overline-thickness', 'panose-1', 'path', - 'pathLength', 'points', 'preserveAspectRatio', 'r', 'refX', 'refY', - 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures', - 'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', - 'stop-color', 'stop-opacity', 'strikethrough-position', - 'strikethrough-thickness', 'stroke', 'stroke-dasharray', - 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', - 'stroke-miterlimit', 'stroke-opacity', 'stroke-width', 'systemLanguage', - 'target', 'text-anchor', 'to', 'transform', 'type', 'u1', 'u2', - 'underline-position', 'underline-thickness', 'unicode', 'unicode-range', - 'units-per-em', 'values', 'version', 'viewBox', 'visibility', 'width', - 'widths', 'x', 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole', - 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type', - 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', - 'y2', 'zoomAndPan'] - - svg_attr_map = None - svg_elem_map = None - - acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule', - 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', - 'stroke-opacity'] - - def reset(self): - _BaseHTMLProcessor.reset(self) - self.unacceptablestack = 0 - self.mathmlOK = 0 - self.svgOK = 0 - - def unknown_starttag(self, tag, attrs): - acceptable_attributes = self.acceptable_attributes - keymap = {} - if not tag in self.acceptable_elements or self.svgOK: - if tag in self.unacceptable_elements_with_end_tag: - self.unacceptablestack += 1 - - # add implicit namespaces to html5 inline svg/mathml - if self._type.endswith('html'): - if not dict(attrs).get('xmlns'): - if tag=='svg': - attrs.append( ('xmlns','http://www.w3.org/2000/svg') ) - if tag=='math': - attrs.append( ('xmlns','http://www.w3.org/1998/Math/MathML') ) - - # not otherwise acceptable, perhaps it is MathML or SVG? - if tag=='math' and ('xmlns','http://www.w3.org/1998/Math/MathML') in attrs: - self.mathmlOK += 1 - if tag=='svg' and ('xmlns','http://www.w3.org/2000/svg') in attrs: - self.svgOK += 1 - - # chose acceptable attributes based on tag class, else bail - if self.mathmlOK and tag in self.mathml_elements: - acceptable_attributes = self.mathml_attributes - elif self.svgOK and tag in self.svg_elements: - # for most vocabularies, lowercasing is a good idea. Many - # svg elements, however, are camel case - if not self.svg_attr_map: - lower=[attr.lower() for attr in self.svg_attributes] - mix=[a for a in self.svg_attributes if a not in lower] - self.svg_attributes = lower - self.svg_attr_map = dict([(a.lower(),a) for a in mix]) - - lower=[attr.lower() for attr in self.svg_elements] - mix=[a for a in self.svg_elements if a not in lower] - self.svg_elements = lower - self.svg_elem_map = dict([(a.lower(),a) for a in mix]) - acceptable_attributes = self.svg_attributes - tag = self.svg_elem_map.get(tag,tag) - keymap = self.svg_attr_map - elif not tag in self.acceptable_elements: - return - - # declare xlink namespace, if needed - if self.mathmlOK or self.svgOK: - if filter(lambda (n,v): n.startswith('xlink:'),attrs): - if not ('xmlns:xlink','http://www.w3.org/1999/xlink') in attrs: - attrs.append(('xmlns:xlink','http://www.w3.org/1999/xlink')) - - clean_attrs = [] - for key, value in self.normalize_attrs(attrs): - if key in acceptable_attributes: - key=keymap.get(key,key) - # make sure the uri uses an acceptable uri scheme - if key == u'href': - value = _makeSafeAbsoluteURI(value) - clean_attrs.append((key,value)) - elif key=='style': - clean_value = self.sanitize_style(value) - if clean_value: clean_attrs.append((key,clean_value)) - _BaseHTMLProcessor.unknown_starttag(self, tag, clean_attrs) - - def unknown_endtag(self, tag): - if not tag in self.acceptable_elements: - if tag in self.unacceptable_elements_with_end_tag: - self.unacceptablestack -= 1 - if self.mathmlOK and tag in self.mathml_elements: - if tag == 'math' and self.mathmlOK: self.mathmlOK -= 1 - elif self.svgOK and tag in self.svg_elements: - tag = self.svg_elem_map.get(tag,tag) - if tag == 'svg' and self.svgOK: self.svgOK -= 1 - else: - return - _BaseHTMLProcessor.unknown_endtag(self, tag) - - def handle_pi(self, text): - pass - - def handle_decl(self, text): - pass - - def handle_data(self, text): - if not self.unacceptablestack: - _BaseHTMLProcessor.handle_data(self, text) - - def sanitize_style(self, style): - # disallow urls - style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style) - - # gauntlet - if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return '' - # This replaced a regexp that used re.match and was prone to pathological back-tracking. - if re.sub("\s*[-\w]+\s*:\s*[^:;]*;?", '', style).strip(): return '' - - clean = [] - for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style): - if not value: continue - if prop.lower() in self.acceptable_css_properties: - clean.append(prop + ': ' + value + ';') - elif prop.split('-')[0].lower() in ['background','border','margin','padding']: - for keyword in value.split(): - if not keyword in self.acceptable_css_keywords and \ - not self.valid_css_values.match(keyword): - break - else: - clean.append(prop + ': ' + value + ';') - elif self.svgOK and prop.lower() in self.acceptable_svg_properties: - clean.append(prop + ': ' + value + ';') - - return ' '.join(clean) - - def parse_comment(self, i, report=1): - ret = _BaseHTMLProcessor.parse_comment(self, i, report) - if ret >= 0: - return ret - # if ret == -1, this may be a malicious attempt to circumvent - # sanitization, or a page-destroying unclosed comment - match = re.compile(r'--[^>]*>').search(self.rawdata, i+4) - if match: - return match.end() - # unclosed comment; deliberately fail to handle_data() - return len(self.rawdata) - - -def _sanitizeHTML(htmlSource, encoding, _type): - p = _HTMLSanitizer(encoding, _type) - htmlSource = htmlSource.replace(''): - data = data.split('>', 1)[1] - if data.count('= '2.3.3' - assert base64 != None - user, passw = _base64decode(req.headers['Authorization'].split(' ')[1]).split(':') - realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0] - self.add_password(realm, host, user, passw) - retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) - self.reset_retry_count() - return retry - except: - return self.http_error_default(req, fp, code, msg, headers) - -def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers): - """URL, filename, or string --> stream - - This function lets you define parsers that take any input source - (URL, pathname to local or network file, or actual data as a string) - and deal with it in a uniform manner. Returned object is guaranteed - to have all the basic stdio read methods (read, readline, readlines). - Just .close() the object when you're done with it. - - If the etag argument is supplied, it will be used as the value of an - If-None-Match request header. - - If the modified argument is supplied, it can be a tuple of 9 integers - (as returned by gmtime() in the standard Python time module) or a date - string in any format supported by feedparser. Regardless, it MUST - be in GMT (Greenwich Mean Time). It will be reformatted into an - RFC 1123-compliant date and used as the value of an If-Modified-Since - request header. - - If the agent argument is supplied, it will be used as the value of a - User-Agent request header. - - If the referrer argument is supplied, it will be used as the value of a - Referer[sic] request header. - - If handlers is supplied, it is a list of handlers used to build a - urllib2 opener. - - if request_headers is supplied it is a dictionary of HTTP request headers - that will override the values generated by FeedParser. - """ - - if hasattr(url_file_stream_or_string, 'read'): - return url_file_stream_or_string - - if url_file_stream_or_string == '-': - return sys.stdin - - if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'): - # Deal with the feed URI scheme - if url_file_stream_or_string.startswith('feed:http'): - url_file_stream_or_string = url_file_stream_or_string[5:] - elif url_file_stream_or_string.startswith('feed:'): - url_file_stream_or_string = 'http:' + url_file_stream_or_string[5:] - if not agent: - agent = USER_AGENT - # test for inline user:password for basic auth - auth = None - if base64: - urltype, rest = urllib.splittype(url_file_stream_or_string) - realhost, rest = urllib.splithost(rest) - if realhost: - user_passwd, realhost = urllib.splituser(realhost) - if user_passwd: - url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest) - auth = base64.standard_b64encode(user_passwd).strip() - - # iri support - try: - if isinstance(url_file_stream_or_string,unicode): - url_file_stream_or_string = url_file_stream_or_string.encode('idna').decode('utf-8') - else: - url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna').decode('utf-8') - except: - pass - - # try to open with urllib2 (to use optional headers) - request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, request_headers) - opener = apply(urllib2.build_opener, tuple(handlers + [_FeedURLHandler()])) - opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent - try: - return opener.open(request, timeout=15) - finally: - opener.close() # JohnD - - # try to open with native open function (if url_file_stream_or_string is a filename) - try: - return open(url_file_stream_or_string, 'rb') - except: - pass - - # treat url_file_stream_or_string as string - return _StringIO(str(url_file_stream_or_string)) - -def _build_urllib2_request(url, agent, etag, modified, referrer, auth, request_headers): - request = urllib2.Request(url) - request.add_header('User-Agent', agent) - if etag: - request.add_header('If-None-Match', etag) - if type(modified) == type(''): - modified = _parse_date(modified) - elif isinstance(modified, datetime.datetime): - modified = modified.utctimetuple() - if modified: - # format into an RFC 1123-compliant timestamp. We can't use - # time.strftime() since the %a and %b directives can be affected - # by the current locale, but RFC 2616 states that dates must be - # in English. - short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - request.add_header('If-Modified-Since', '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5])) - if referrer: - request.add_header('Referer', referrer) - if gzip and zlib: - request.add_header('Accept-encoding', 'gzip, deflate') - elif gzip: - request.add_header('Accept-encoding', 'gzip') - elif zlib: - request.add_header('Accept-encoding', 'deflate') - else: - request.add_header('Accept-encoding', '') - if auth: - request.add_header('Authorization', 'Basic %s' % auth) - if ACCEPT_HEADER: - request.add_header('Accept', ACCEPT_HEADER) - # use this for whatever -- cookies, special headers, etc - # [('Cookie','Something'),('x-special-header','Another Value')] - for header_name, header_value in request_headers.items(): - request.add_header(header_name, header_value) - request.add_header('A-IM', 'feed') # RFC 3229 support - return request - -_date_handlers = [] -def registerDateHandler(func): - '''Register a date handler function (takes string, returns 9-tuple date in GMT)''' - _date_handlers.insert(0, func) - -# ISO-8601 date parsing routines written by Fazal Majid. -# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601 -# parser is beyond the scope of feedparser and would be a worthwhile addition -# to the Python library. -# A single regular expression cannot parse ISO 8601 date formats into groups -# as the standard is highly irregular (for instance is 030104 2003-01-04 or -# 0301-04-01), so we use templates instead. -# Please note the order in templates is significant because we need a -# greedy match. -_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-0MM?-?DD', 'YYYY-MM', 'YYYY-?OOO', - 'YY-?MM-?DD', 'YY-?OOO', 'YYYY', - '-YY-?MM', '-OOO', '-YY', - '--MM-?DD', '--MM', - '---DD', - 'CC', ''] -_iso8601_re = [ - tmpl.replace( - 'YYYY', r'(?P\d{4})').replace( - 'YY', r'(?P\d\d)').replace( - 'MM', r'(?P[01]\d)').replace( - 'DD', r'(?P[0123]\d)').replace( - 'OOO', r'(?P[0123]\d\d)').replace( - 'CC', r'(?P\d\d$)') - + r'(T?(?P\d{2}):(?P\d{2})' - + r'(:(?P\d{2}))?' - + r'(\.(?P\d+))?' - + r'(?P[+-](?P\d{2})(:(?P\d{2}))?|Z)?)?' - for tmpl in _iso8601_tmpl] -try: - del tmpl -except NameError: - pass -_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re] -try: - del regex -except NameError: - pass -def _parse_date_iso8601(dateString): - '''Parse a variety of ISO-8601-compatible formats like 20040105''' - m = None - for _iso8601_match in _iso8601_matches: - m = _iso8601_match(dateString) - if m: break - if not m: return - if m.span() == (0, 0): return - params = m.groupdict() - ordinal = params.get('ordinal', 0) - if ordinal: - ordinal = int(ordinal) - else: - ordinal = 0 - year = params.get('year', '--') - if not year or year == '--': - year = time.gmtime()[0] - elif len(year) == 2: - # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993 - year = 100 * int(time.gmtime()[0] / 100) + int(year) - else: - year = int(year) - month = params.get('month', '-') - if not month or month == '-': - # ordinals are NOT normalized by mktime, we simulate them - # by setting month=1, day=ordinal - if ordinal: - month = 1 - else: - month = time.gmtime()[1] - month = int(month) - day = params.get('day', 0) - if not day: - # see above - if ordinal: - day = ordinal - elif params.get('century', 0) or \ - params.get('year', 0) or params.get('month', 0): - day = 1 - else: - day = time.gmtime()[2] - else: - day = int(day) - # special case of the century - is the first year of the 21st century - # 2000 or 2001 ? The debate goes on... - if 'century' in params.keys(): - year = (int(params['century']) - 1) * 100 + 1 - # in ISO 8601 most fields are optional - for field in ['hour', 'minute', 'second', 'tzhour', 'tzmin']: - if not params.get(field, None): - params[field] = 0 - hour = int(params.get('hour', 0)) - minute = int(params.get('minute', 0)) - second = int(float(params.get('second', 0))) - # weekday is normalized by mktime(), we can ignore it - weekday = 0 - daylight_savings_flag = -1 - tm = [year, month, day, hour, minute, second, weekday, - ordinal, daylight_savings_flag] - # ISO 8601 time zone adjustments - tz = params.get('tz') - if tz and tz != 'Z': - if tz[0] == '-': - tm[3] += int(params.get('tzhour', 0)) - tm[4] += int(params.get('tzmin', 0)) - elif tz[0] == '+': - tm[3] -= int(params.get('tzhour', 0)) - tm[4] -= int(params.get('tzmin', 0)) - else: - return None - # Python's time.mktime() is a wrapper around the ANSI C mktime(3c) - # which is guaranteed to normalize d/m/y/h/m/s. - # Many implementations have bugs, but we'll pretend they don't. - return time.localtime(time.mktime(tuple(tm))) -registerDateHandler(_parse_date_iso8601) - -# 8-bit date handling routines written by ytrewq1. -_korean_year = u'\ub144' # b3e2 in euc-kr -_korean_month = u'\uc6d4' # bff9 in euc-kr -_korean_day = u'\uc77c' # c0cf in euc-kr -_korean_am = u'\uc624\uc804' # bfc0 c0fc in euc-kr -_korean_pm = u'\uc624\ud6c4' # bfc0 c8c4 in euc-kr - -_korean_onblog_date_re = \ - re.compile('(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})' % \ - (_korean_year, _korean_month, _korean_day)) -_korean_nate_date_re = \ - re.compile(u'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})' % \ - (_korean_am, _korean_pm)) -def _parse_date_onblog(dateString): - '''Parse a string according to the OnBlog 8-bit date format''' - m = _korean_onblog_date_re.match(dateString) - if not m: return - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ - {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ - 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ - 'zonediff': '+09:00'} - if _debug: sys.stderr.write('OnBlog date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_onblog) - -def _parse_date_nate(dateString): - '''Parse a string according to the Nate 8-bit date format''' - m = _korean_nate_date_re.match(dateString) - if not m: return - hour = int(m.group(5)) - ampm = m.group(4) - if (ampm == _korean_pm): - hour += 12 - hour = str(hour) - if len(hour) == 1: - hour = '0' + hour - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ - {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ - 'hour': hour, 'minute': m.group(6), 'second': m.group(7),\ - 'zonediff': '+09:00'} - if _debug: sys.stderr.write('Nate date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_nate) - -_mssql_date_re = \ - re.compile('(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?') -def _parse_date_mssql(dateString): - '''Parse a string according to the MS SQL date format''' - m = _mssql_date_re.match(dateString) - if not m: return - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ - {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ - 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ - 'zonediff': '+09:00'} - if _debug: sys.stderr.write('MS SQL date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_mssql) - -# Unicode strings for Greek date strings -_greek_months = \ - { \ - u'\u0399\u03b1\u03bd': u'Jan', # c9e1ed in iso-8859-7 - u'\u03a6\u03b5\u03b2': u'Feb', # d6e5e2 in iso-8859-7 - u'\u039c\u03ac\u03ce': u'Mar', # ccdcfe in iso-8859-7 - u'\u039c\u03b1\u03ce': u'Mar', # cce1fe in iso-8859-7 - u'\u0391\u03c0\u03c1': u'Apr', # c1f0f1 in iso-8859-7 - u'\u039c\u03ac\u03b9': u'May', # ccdce9 in iso-8859-7 - u'\u039c\u03b1\u03ca': u'May', # cce1fa in iso-8859-7 - u'\u039c\u03b1\u03b9': u'May', # cce1e9 in iso-8859-7 - u'\u0399\u03bf\u03cd\u03bd': u'Jun', # c9effded in iso-8859-7 - u'\u0399\u03bf\u03bd': u'Jun', # c9efed in iso-8859-7 - u'\u0399\u03bf\u03cd\u03bb': u'Jul', # c9effdeb in iso-8859-7 - u'\u0399\u03bf\u03bb': u'Jul', # c9f9eb in iso-8859-7 - u'\u0391\u03cd\u03b3': u'Aug', # c1fde3 in iso-8859-7 - u'\u0391\u03c5\u03b3': u'Aug', # c1f5e3 in iso-8859-7 - u'\u03a3\u03b5\u03c0': u'Sep', # d3e5f0 in iso-8859-7 - u'\u039f\u03ba\u03c4': u'Oct', # cfeaf4 in iso-8859-7 - u'\u039d\u03bf\u03ad': u'Nov', # cdefdd in iso-8859-7 - u'\u039d\u03bf\u03b5': u'Nov', # cdefe5 in iso-8859-7 - u'\u0394\u03b5\u03ba': u'Dec', # c4e5ea in iso-8859-7 - } - -_greek_wdays = \ - { \ - u'\u039a\u03c5\u03c1': u'Sun', # caf5f1 in iso-8859-7 - u'\u0394\u03b5\u03c5': u'Mon', # c4e5f5 in iso-8859-7 - u'\u03a4\u03c1\u03b9': u'Tue', # d4f1e9 in iso-8859-7 - u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7 - u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7 - u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7 - u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7 - } - -_greek_date_format_re = \ - re.compile(u'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)') - -def _parse_date_greek(dateString): - '''Parse a string according to a Greek 8-bit date format.''' - m = _greek_date_format_re.match(dateString) - if not m: return - try: - wday = _greek_wdays[m.group(1)] - month = _greek_months[m.group(3)] - except: - return - rfc822date = '%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)s' % \ - {'wday': wday, 'day': m.group(2), 'month': month, 'year': m.group(4),\ - 'hour': m.group(5), 'minute': m.group(6), 'second': m.group(7),\ - 'zonediff': m.group(8)} - if _debug: sys.stderr.write('Greek date parsed as: %s\n' % rfc822date) - return _parse_date_rfc822(rfc822date) -registerDateHandler(_parse_date_greek) - -# Unicode strings for Hungarian date strings -_hungarian_months = \ - { \ - u'janu\u00e1r': u'01', # e1 in iso-8859-2 - u'febru\u00e1ri': u'02', # e1 in iso-8859-2 - u'm\u00e1rcius': u'03', # e1 in iso-8859-2 - u'\u00e1prilis': u'04', # e1 in iso-8859-2 - u'm\u00e1ujus': u'05', # e1 in iso-8859-2 - u'j\u00fanius': u'06', # fa in iso-8859-2 - u'j\u00falius': u'07', # fa in iso-8859-2 - u'augusztus': u'08', - u'szeptember': u'09', - u'okt\u00f3ber': u'10', # f3 in iso-8859-2 - u'november': u'11', - u'december': u'12', - } - -_hungarian_date_format_re = \ - re.compile(u'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))') - -def _parse_date_hungarian(dateString): - '''Parse a string according to a Hungarian 8-bit date format.''' - m = _hungarian_date_format_re.match(dateString) - if not m: return - try: - month = _hungarian_months[m.group(2)] - day = m.group(3) - if len(day) == 1: - day = '0' + day - hour = m.group(4) - if len(hour) == 1: - hour = '0' + hour - except: - return - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s%(zonediff)s' % \ - {'year': m.group(1), 'month': month, 'day': day,\ - 'hour': hour, 'minute': m.group(5),\ - 'zonediff': m.group(6)} - if _debug: sys.stderr.write('Hungarian date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_hungarian) - -# W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by -# Drake and licensed under the Python license. Removed all range checking -# for month, day, hour, minute, and second, since mktime will normalize -# these later -def _parse_date_w3dtf(dateString): - def __extract_date(m): - year = int(m.group('year')) - if year < 100: - year = 100 * int(time.gmtime()[0] / 100) + int(year) - if year < 1000: - return 0, 0, 0 - julian = m.group('julian') - if julian: - julian = int(julian) - month = julian / 30 + 1 - day = julian % 30 + 1 - jday = None - while jday != julian: - t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) - jday = time.gmtime(t)[-2] - diff = abs(jday - julian) - if jday > julian: - if diff < day: - day = day - diff - else: - month = month - 1 - day = 31 - elif jday < julian: - if day + diff < 28: - day = day + diff - else: - month = month + 1 - return year, month, day - month = m.group('month') - day = 1 - if month is None: - month = 1 - else: - month = int(month) - day = m.group('day') - if day: - day = int(day) - else: - day = 1 - return year, month, day - - def __extract_time(m): - if not m: - return 0, 0, 0 - hours = m.group('hours') - if not hours: - return 0, 0, 0 - hours = int(hours) - minutes = int(m.group('minutes')) - seconds = m.group('seconds') - if seconds: - seconds = int(seconds) - else: - seconds = 0 - return hours, minutes, seconds - - def __extract_tzd(m): - '''Return the Time Zone Designator as an offset in seconds from UTC.''' - if not m: - return 0 - tzd = m.group('tzd') - if not tzd: - return 0 - if tzd == 'Z': - return 0 - hours = int(m.group('tzdhours')) - minutes = m.group('tzdminutes') - if minutes: - minutes = int(minutes) - else: - minutes = 0 - offset = (hours*60 + minutes) * 60 - if tzd[0] == '+': - return -offset - return offset - - __date_re = ('(?P\d\d\d\d)' - '(?:(?P-|)' - '(?:(?P\d\d)(?:(?P=dsep)(?P\d\d))?' - '|(?P\d\d\d)))?') - __tzd_re = '(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)' - __tzd_rx = re.compile(__tzd_re) - __time_re = ('(?P\d\d)(?P:|)(?P\d\d)' - '(?:(?P=tsep)(?P\d\d)(?:[.,]\d+)?)?' - + __tzd_re) - __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) - __datetime_rx = re.compile(__datetime_re) - m = __datetime_rx.match(dateString) - if (m is None) or (m.group() != dateString): return - gmt = __extract_date(m) + __extract_time(m) + (0, 0, 0) - if gmt[0] == 0: return - return time.gmtime(time.mktime(gmt) + __extract_tzd(m) - time.timezone) -registerDateHandler(_parse_date_w3dtf) - -def _parse_date_rfc822(dateString): - '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' - data = dateString.split() - if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames: - del data[0] - if len(data) == 4: - s = data[3] - i = s.find('+') - if i > 0: - data[3:] = [s[:i], s[i+1:]] - else: - data.append('') - dateString = " ".join(data) - # Account for the Etc/GMT timezone by stripping 'Etc/' - elif len(data) == 5 and data[4].lower().startswith('etc/'): - data[4] = data[4][4:] - dateString = " ".join(data) - if len(data) < 5: - dateString += ' 00:00:00 GMT' - tm = rfc822.parsedate_tz(dateString) - if tm: - return time.gmtime(rfc822.mktime_tz(tm)) -# rfc822.py defines several time zones, but we define some extra ones. -# 'ET' is equivalent to 'EST', etc. -_additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -800} -rfc822._timezones.update(_additional_timezones) -registerDateHandler(_parse_date_rfc822) - -def _parse_date_perforce(aDateString): - """parse a date in yyyy/mm/dd hh:mm:ss TTT format""" - # Fri, 2006/09/15 08:19:53 EDT - _my_date_pattern = re.compile( \ - r'(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})') - - dow, year, month, day, hour, minute, second, tz = \ - _my_date_pattern.search(aDateString).groups() - months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - dateString = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz) - tm = rfc822.parsedate_tz(dateString) - if tm: - return time.gmtime(rfc822.mktime_tz(tm)) -registerDateHandler(_parse_date_perforce) - -def _parse_date(dateString): - '''Parses a variety of date formats into a 9-tuple in GMT''' - for handler in _date_handlers: - try: - date9tuple = handler(dateString) - if not date9tuple: continue - if len(date9tuple) != 9: - if _debug: sys.stderr.write('date handler function must return 9-tuple\n') - raise ValueError - map(int, date9tuple) - return date9tuple - except Exception, e: - if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e))) - pass - return None - -def _getCharacterEncoding(http_headers, xml_data): - '''Get the character encoding of the XML document - - http_headers is a dictionary - xml_data is a raw string (not Unicode) - - This is so much trickier than it sounds, it's not even funny. - According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type - is application/xml, application/*+xml, - application/xml-external-parsed-entity, or application/xml-dtd, - the encoding given in the charset parameter of the HTTP Content-Type - takes precedence over the encoding given in the XML prefix within the - document, and defaults to 'utf-8' if neither are specified. But, if - the HTTP Content-Type is text/xml, text/*+xml, or - text/xml-external-parsed-entity, the encoding given in the XML prefix - within the document is ALWAYS IGNORED and only the encoding given in - the charset parameter of the HTTP Content-Type header should be - respected, and it defaults to 'us-ascii' if not specified. - - Furthermore, discussion on the atom-syntax mailing list with the - author of RFC 3023 leads me to the conclusion that any document - served with a Content-Type of text/* and no charset parameter - must be treated as us-ascii. (We now do this.) And also that it - must always be flagged as non-well-formed. (We now do this too.) - - If Content-Type is unspecified (input was local file or non-HTTP source) - or unrecognized (server just got it totally wrong), then go by the - encoding given in the XML prefix of the document and default to - 'iso-8859-1' as per the HTTP specification (RFC 2616). - - Then, assuming we didn't find a character encoding in the HTTP headers - (and the HTTP Content-type allowed us to look in the body), we need - to sniff the first few bytes of the XML data and try to determine - whether the encoding is ASCII-compatible. Section F of the XML - specification shows the way here: - http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info - - If the sniffed encoding is not ASCII-compatible, we need to make it - ASCII compatible so that we can sniff further into the XML declaration - to find the encoding attribute, which will tell us the true encoding. - - Of course, none of this guarantees that we will be able to parse the - feed in the declared character encoding (assuming it was declared - correctly, which many are not). CJKCodecs and iconv_codec help a lot; - you should definitely install them if you can. - http://cjkpython.i18n.org/ - ''' - - def _parseHTTPContentType(content_type): - '''takes HTTP Content-Type header and returns (content type, charset) - - If no charset is specified, returns (content type, '') - If no content type is specified, returns ('', '') - Both return parameters are guaranteed to be lowercase strings - ''' - content_type = content_type or '' - content_type, params = cgi.parse_header(content_type) - return content_type, params.get('charset', '').replace("'", '') - - sniffed_xml_encoding = '' - xml_encoding = '' - true_encoding = '' - http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type', http_headers.get('Content-type'))) - # Must sniff for non-ASCII-compatible character encodings before - # searching for XML declaration. This heuristic is defined in - # section F of the XML specification: - # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info - try: - if xml_data[:4] == _l2bytes([0x4c, 0x6f, 0xa7, 0x94]): - # EBCDIC - xml_data = _ebcdic_to_ascii(xml_data) - elif xml_data[:4] == _l2bytes([0x00, 0x3c, 0x00, 0x3f]): - # UTF-16BE - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xfe, 0xff])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): - # UTF-16BE with BOM - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x3f, 0x00]): - # UTF-16LE - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xff, 0xfe])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): - # UTF-16LE with BOM - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x00, 0x00, 0x00, 0x3c]): - # UTF-32BE - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x00, 0x00]): - # UTF-32LE - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): - # UTF-32BE with BOM - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') - elif xml_data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): - # UTF-32LE with BOM - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') - elif xml_data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): - # UTF-8 with BOM - sniffed_xml_encoding = 'utf-8' - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') - else: - # ASCII-compatible - pass - xml_encoding_match = re.compile(_s2bytes('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')).match(xml_data) - except: - xml_encoding_match = None - if xml_encoding_match: - xml_encoding = xml_encoding_match.groups()[0].decode('utf-8').lower() - if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')): - xml_encoding = sniffed_xml_encoding - acceptable_content_type = 0 - application_content_types = ('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity') - text_content_types = ('text/xml', 'text/xml-external-parsed-entity') - if (http_content_type in application_content_types) or \ - (http_content_type.startswith('application/') and http_content_type.endswith('+xml')): - acceptable_content_type = 1 - true_encoding = http_encoding or xml_encoding or 'utf-8' - elif (http_content_type in text_content_types) or \ - (http_content_type.startswith('text/')) and http_content_type.endswith('+xml'): - acceptable_content_type = 1 - true_encoding = http_encoding or 'us-ascii' - elif http_content_type.startswith('text/'): - true_encoding = http_encoding or 'us-ascii' - elif http_headers and (not (http_headers.has_key('content-type') or http_headers.has_key('Content-type'))): - true_encoding = xml_encoding or 'iso-8859-1' - else: - true_encoding = xml_encoding or 'utf-8' - # some feeds claim to be gb2312 but are actually gb18030. - # apparently MSIE and Firefox both do the following switch: - if true_encoding.lower() == 'gb2312': - true_encoding = 'gb18030' - return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type - -def _toUTF8(data, encoding): - '''Changes an XML data stream on the fly to specify a new encoding - - data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already - encoding is a string recognized by encodings.aliases - ''' - if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding) - # strip Byte Order Mark (if present) - if (len(data) >= 4) and (data[:2] == _l2bytes([0xfe, 0xff])) and (data[2:4] != _l2bytes([0x00, 0x00])): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-16be': - sys.stderr.write('trying utf-16be instead\n') - encoding = 'utf-16be' - data = data[2:] - elif (len(data) >= 4) and (data[:2] == _l2bytes([0xff, 0xfe])) and (data[2:4] != _l2bytes([0x00, 0x00])): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-16le': - sys.stderr.write('trying utf-16le instead\n') - encoding = 'utf-16le' - data = data[2:] - elif data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-8': - sys.stderr.write('trying utf-8 instead\n') - encoding = 'utf-8' - data = data[3:] - elif data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-32be': - sys.stderr.write('trying utf-32be instead\n') - encoding = 'utf-32be' - data = data[4:] - elif data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-32le': - sys.stderr.write('trying utf-32le instead\n') - encoding = 'utf-32le' - data = data[4:] - newdata = unicode(data, encoding) - if _debug: sys.stderr.write('successfully converted %s data to unicode\n' % encoding) - declmatch = re.compile('^<\?xml[^>]*?>') - newdecl = '''''' - if declmatch.search(newdata): - newdata = declmatch.sub(newdecl, newdata) - else: - newdata = newdecl + u'\n' + newdata - return newdata.encode('utf-8') - -def _stripDoctype(data): - '''Strips DOCTYPE from XML document, returns (rss_version, stripped_data) - - rss_version may be 'rss091n' or None - stripped_data is the same XML document, minus the DOCTYPE - ''' - start = re.search(_s2bytes('<\w'), data) - start = start and start.start() or -1 - head,data = data[:start+1], data[start+1:] - - entity_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE) - entity_results=entity_pattern.findall(head) - head = entity_pattern.sub(_s2bytes(''), head) - doctype_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE) - doctype_results = doctype_pattern.findall(head) - doctype = doctype_results and doctype_results[0] or _s2bytes('') - if doctype.lower().count(_s2bytes('netscape')): - version = 'rss091n' - else: - version = None - - # only allow in 'safe' inline entity definitions - replacement=_s2bytes('') - if len(doctype_results)==1 and entity_results: - safe_pattern=re.compile(_s2bytes('\s+(\w+)\s+"(&#\w+;|[^&"]*)"')) - safe_entities=filter(lambda e: safe_pattern.match(e),entity_results) - if safe_entities: - replacement=_s2bytes('\n \n]>') - data = doctype_pattern.sub(replacement, head) + data - - return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)]) - -def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}): - '''Parse a feed from a URL, file, stream, or string. - - request_headers, if given, is a dict from http header name to value to add - to the request; this overrides internally generated values. - ''' - result = FeedParserDict() - result['feed'] = FeedParserDict() - result['entries'] = [] - if _XML_AVAILABLE: - result['bozo'] = 0 - if not isinstance(handlers, list): - handlers = [handlers] - try: - f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers) - data = f.read() - except Exception, e: - result['bozo'] = 1 - result['bozo_exception'] = e - data = None - f = None - - if hasattr(f, 'headers'): - result['headers'] = dict(f.headers) - # overwrite existing headers using response_headers - if 'headers' in result: - result['headers'].update(response_headers) - elif response_headers: - result['headers'] = copy.deepcopy(response_headers) - - # if feed is gzip-compressed, decompress it - if f and data and 'headers' in result: - if gzip and result['headers'].get('content-encoding') == 'gzip': - try: - data = gzip.GzipFile(fileobj=_StringIO(data)).read() - except Exception, e: - # Some feeds claim to be gzipped but they're not, so - # we get garbage. Ideally, we should re-request the - # feed without the 'Accept-encoding: gzip' header, - # but we don't. - result['bozo'] = 1 - result['bozo_exception'] = e - data = '' - elif zlib and result['headers'].get('content-encoding') == 'deflate': - try: - data = zlib.decompress(data, -zlib.MAX_WBITS) - except Exception, e: - result['bozo'] = 1 - result['bozo_exception'] = e - data = '' - - # save HTTP headers - if 'headers' in result: - if 'etag' in result['headers'] or 'ETag' in result['headers']: - etag = result['headers'].get('etag', result['headers'].get('ETag')) - if etag: - result['etag'] = etag - if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']: - modified = result['headers'].get('last-modified', result['headers'].get('Last-Modified')) - if modified: - result['modified'] = _parse_date(modified) - if hasattr(f, 'url'): - result['href'] = f.url - result['status'] = 200 - if hasattr(f, 'status'): - result['status'] = f.status - if hasattr(f, 'close'): - f.close() - - # there are four encodings to keep track of: - # - http_encoding is the encoding declared in the Content-Type HTTP header - # - xml_encoding is the encoding declared in the [:][ :.] -GNTP_INFO_LINE = re.compile( - 'GNTP/(?P\d+\.\d+) (?PREGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)' + - ' (?P[A-Z0-9]+(:(?P[A-F0-9]+))?) ?' + - '((?P[A-Z0-9]+):(?P[A-F0-9]+).(?P[A-F0-9]+))?\r\n', - re.IGNORECASE -) - -GNTP_INFO_LINE_SHORT = re.compile( - 'GNTP/(?P\d+\.\d+) (?PREGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)', - re.IGNORECASE -) - -GNTP_HEADER = re.compile('([\w-]+):(.+)') - -GNTP_EOL = gntp.shim.b('\r\n') -GNTP_SEP = gntp.shim.b(': ') - - -class _GNTPBuffer(gntp.shim.StringIO): - """GNTP Buffer class""" - def writeln(self, value=None): - if value: - self.write(gntp.shim.b(value)) - self.write(GNTP_EOL) - - def writeheader(self, key, value): - if not isinstance(value, str): - value = str(value) - self.write(gntp.shim.b(key)) - self.write(GNTP_SEP) - self.write(gntp.shim.b(value)) - self.write(GNTP_EOL) - - -class _GNTPBase(object): - """Base initilization - - :param string messagetype: GNTP Message type - :param string version: GNTP Protocol version - :param string encription: Encryption protocol - """ - def __init__(self, messagetype=None, version='1.0', encryption=None): - self.info = { - 'version': version, - 'messagetype': messagetype, - 'encryptionAlgorithmID': encryption - } - self.hash_algo = { - 'MD5': hashlib.md5, - 'SHA1': hashlib.sha1, - 'SHA256': hashlib.sha256, - 'SHA512': hashlib.sha512, - } - self.headers = {} - self.resources = {} - - def __str__(self): - return self.encode() - - def _parse_info(self, data): - """Parse the first line of a GNTP message to get security and other info values - - :param string data: GNTP Message - :return dict: Parsed GNTP Info line - """ - - match = GNTP_INFO_LINE.match(data) - - if not match: - raise errors.ParseError('ERROR_PARSING_INFO_LINE') - - info = match.groupdict() - if info['encryptionAlgorithmID'] == 'NONE': - info['encryptionAlgorithmID'] = None - - return info - - def set_password(self, password, encryptAlgo='MD5'): - """Set a password for a GNTP Message - - :param string password: Null to clear password - :param string encryptAlgo: Supports MD5, SHA1, SHA256, SHA512 - """ - if not password: - self.info['encryptionAlgorithmID'] = None - self.info['keyHashAlgorithm'] = None - return - - self.password = gntp.shim.b(password) - self.encryptAlgo = encryptAlgo.upper() - - if not self.encryptAlgo in self.hash_algo: - raise errors.UnsupportedError('INVALID HASH "%s"' % self.encryptAlgo) - - hashfunction = self.hash_algo.get(self.encryptAlgo) - - password = password.encode('utf8') - seed = time.ctime().encode('utf8') - salt = hashfunction(seed).hexdigest() - saltHash = hashfunction(seed).digest() - keyBasis = password + saltHash - key = hashfunction(keyBasis).digest() - keyHash = hashfunction(key).hexdigest() - - self.info['keyHashAlgorithmID'] = self.encryptAlgo - self.info['keyHash'] = keyHash.upper() - self.info['salt'] = salt.upper() - - def _decode_hex(self, value): - """Helper function to decode hex string to `proper` hex string - - :param string value: Human readable hex string - :return string: Hex string - """ - result = '' - for i in range(0, len(value), 2): - tmp = int(value[i:i + 2], 16) - result += chr(tmp) - return result - - def _decode_binary(self, rawIdentifier, identifier): - rawIdentifier += '\r\n\r\n' - dataLength = int(identifier['Length']) - pointerStart = self.raw.find(rawIdentifier) + len(rawIdentifier) - pointerEnd = pointerStart + dataLength - data = self.raw[pointerStart:pointerEnd] - if not len(data) == dataLength: - raise errors.ParseError('INVALID_DATA_LENGTH Expected: %s Recieved %s' % (dataLength, len(data))) - return data - - def _validate_password(self, password): - """Validate GNTP Message against stored password""" - self.password = password - if password is None: - raise errors.AuthError('Missing password') - keyHash = self.info.get('keyHash', None) - if keyHash is None and self.password is None: - return True - if keyHash is None: - raise errors.AuthError('Invalid keyHash') - if self.password is None: - raise errors.AuthError('Missing password') - - keyHashAlgorithmID = self.info.get('keyHashAlgorithmID','MD5') - - password = self.password.encode('utf8') - saltHash = self._decode_hex(self.info['salt']) - - keyBasis = password + saltHash - self.key = self.hash_algo[keyHashAlgorithmID](keyBasis).digest() - keyHash = self.hash_algo[keyHashAlgorithmID](self.key).hexdigest() - - if not keyHash.upper() == self.info['keyHash'].upper(): - raise errors.AuthError('Invalid Hash') - return True - - def validate(self): - """Verify required headers""" - for header in self._requiredHeaders: - if not self.headers.get(header, False): - raise errors.ParseError('Missing Notification Header: ' + header) - - def _format_info(self): - """Generate info line for GNTP Message - - :return string: - """ - info = 'GNTP/%s %s' % ( - self.info.get('version'), - self.info.get('messagetype'), - ) - if self.info.get('encryptionAlgorithmID', None): - info += ' %s:%s' % ( - self.info.get('encryptionAlgorithmID'), - self.info.get('ivValue'), - ) - else: - info += ' NONE' - - if self.info.get('keyHashAlgorithmID', None): - info += ' %s:%s.%s' % ( - self.info.get('keyHashAlgorithmID'), - self.info.get('keyHash'), - self.info.get('salt') - ) - - return info - - def _parse_dict(self, data): - """Helper function to parse blocks of GNTP headers into a dictionary - - :param string data: - :return dict: Dictionary of parsed GNTP Headers - """ - d = {} - for line in data.split('\r\n'): - match = GNTP_HEADER.match(line) - if not match: - continue - - key = match.group(1).strip() - val = match.group(2).strip() - d[key] = val - return d - - def add_header(self, key, value): - self.headers[key] = value - - def add_resource(self, data): - """Add binary resource - - :param string data: Binary Data - """ - data = gntp.shim.b(data) - identifier = hashlib.md5(data).hexdigest() - self.resources[identifier] = data - return 'x-growl-resource://%s' % identifier - - def decode(self, data, password=None): - """Decode GNTP Message - - :param string data: - """ - self.password = password - self.raw = gntp.shim.u(data) - parts = self.raw.split('\r\n\r\n') - self.info = self._parse_info(self.raw) - self.headers = self._parse_dict(parts[0]) - - def encode(self): - """Encode a generic GNTP Message - - :return string: GNTP Message ready to be sent. Returned as a byte string - """ - - buff = _GNTPBuffer() - - buff.writeln(self._format_info()) - - #Headers - for k, v in self.headers.items(): - buff.writeheader(k, v) - buff.writeln() - - #Resources - for resource, data in self.resources.items(): - buff.writeheader('Identifier', resource) - buff.writeheader('Length', len(data)) - buff.writeln() - buff.write(data) - buff.writeln() - buff.writeln() - - return buff.getvalue() - - -class GNTPRegister(_GNTPBase): - """Represents a GNTP Registration Command - - :param string data: (Optional) See decode() - :param string password: (Optional) Password to use while encoding/decoding messages - """ - _requiredHeaders = [ - 'Application-Name', - 'Notifications-Count' - ] - _requiredNotificationHeaders = ['Notification-Name'] - - def __init__(self, data=None, password=None): - _GNTPBase.__init__(self, 'REGISTER') - self.notifications = [] - - if data: - self.decode(data, password) - else: - self.set_password(password) - self.add_header('Application-Name', 'pygntp') - self.add_header('Notifications-Count', 0) - - def validate(self): - '''Validate required headers and validate notification headers''' - for header in self._requiredHeaders: - if not self.headers.get(header, False): - raise errors.ParseError('Missing Registration Header: ' + header) - for notice in self.notifications: - for header in self._requiredNotificationHeaders: - if not notice.get(header, False): - raise errors.ParseError('Missing Notification Header: ' + header) - - def decode(self, data, password): - """Decode existing GNTP Registration message - - :param string data: Message to decode - """ - self.raw = gntp.shim.u(data) - parts = self.raw.split('\r\n\r\n') - self.info = self._parse_info(self.raw) - self._validate_password(password) - self.headers = self._parse_dict(parts[0]) - - for i, part in enumerate(parts): - if i == 0: - continue # Skip Header - if part.strip() == '': - continue - notice = self._parse_dict(part) - if notice.get('Notification-Name', False): - self.notifications.append(notice) - elif notice.get('Identifier', False): - notice['Data'] = self._decode_binary(part, notice) - #open('register.png','wblol').write(notice['Data']) - self.resources[notice.get('Identifier')] = notice - - def add_notification(self, name, enabled=True): - """Add new Notification to Registration message - - :param string name: Notification Name - :param boolean enabled: Enable this notification by default - """ - notice = {} - notice['Notification-Name'] = name - notice['Notification-Enabled'] = enabled - - self.notifications.append(notice) - self.add_header('Notifications-Count', len(self.notifications)) - - def encode(self): - """Encode a GNTP Registration Message - - :return string: Encoded GNTP Registration message. Returned as a byte string - """ - - buff = _GNTPBuffer() - - buff.writeln(self._format_info()) - - #Headers - for k, v in self.headers.items(): - buff.writeheader(k, v) - buff.writeln() - - #Notifications - if len(self.notifications) > 0: - for notice in self.notifications: - for k, v in notice.items(): - buff.writeheader(k, v) - buff.writeln() - - #Resources - for resource, data in self.resources.items(): - buff.writeheader('Identifier', resource) - buff.writeheader('Length', len(data)) - buff.writeln() - buff.write(data) - buff.writeln() - buff.writeln() - - return buff.getvalue() - - -class GNTPNotice(_GNTPBase): - """Represents a GNTP Notification Command - - :param string data: (Optional) See decode() - :param string app: (Optional) Set Application-Name - :param string name: (Optional) Set Notification-Name - :param string title: (Optional) Set Notification Title - :param string password: (Optional) Password to use while encoding/decoding messages - """ - _requiredHeaders = [ - 'Application-Name', - 'Notification-Name', - 'Notification-Title' - ] - - def __init__(self, data=None, app=None, name=None, title=None, password=None): - _GNTPBase.__init__(self, 'NOTIFY') - - if data: - self.decode(data, password) - else: - self.set_password(password) - if app: - self.add_header('Application-Name', app) - if name: - self.add_header('Notification-Name', name) - if title: - self.add_header('Notification-Title', title) - - def decode(self, data, password): - """Decode existing GNTP Notification message - - :param string data: Message to decode. - """ - self.raw = gntp.shim.u(data) - parts = self.raw.split('\r\n\r\n') - self.info = self._parse_info(self.raw) - self._validate_password(password) - self.headers = self._parse_dict(parts[0]) - - for i, part in enumerate(parts): - if i == 0: - continue # Skip Header - if part.strip() == '': - continue - notice = self._parse_dict(part) - if notice.get('Identifier', False): - notice['Data'] = self._decode_binary(part, notice) - #open('notice.png','wblol').write(notice['Data']) - self.resources[notice.get('Identifier')] = notice - - -class GNTPSubscribe(_GNTPBase): - """Represents a GNTP Subscribe Command - - :param string data: (Optional) See decode() - :param string password: (Optional) Password to use while encoding/decoding messages - """ - _requiredHeaders = [ - 'Subscriber-ID', - 'Subscriber-Name', - ] - - def __init__(self, data=None, password=None): - _GNTPBase.__init__(self, 'SUBSCRIBE') - if data: - self.decode(data, password) - else: - self.set_password(password) - - -class GNTPOK(_GNTPBase): - """Represents a GNTP OK Response - - :param string data: (Optional) See _GNTPResponse.decode() - :param string action: (Optional) Set type of action the OK Response is for - """ - _requiredHeaders = ['Response-Action'] - - def __init__(self, data=None, action=None): - _GNTPBase.__init__(self, '-OK') - if data: - self.decode(data) - if action: - self.add_header('Response-Action', action) - - -class GNTPError(_GNTPBase): - """Represents a GNTP Error response - - :param string data: (Optional) See _GNTPResponse.decode() - :param string errorcode: (Optional) Error code - :param string errordesc: (Optional) Error Description - """ - _requiredHeaders = ['Error-Code', 'Error-Description'] - - def __init__(self, data=None, errorcode=None, errordesc=None): - _GNTPBase.__init__(self, '-ERROR') - if data: - self.decode(data) - if errorcode: - self.add_header('Error-Code', errorcode) - self.add_header('Error-Description', errordesc) - - def error(self): - return (self.headers.get('Error-Code', None), - self.headers.get('Error-Description', None)) - - -def parse_gntp(data, password=None): - """Attempt to parse a message as a GNTP message - - :param string data: Message to be parsed - :param string password: Optional password to be used to verify the message - """ - data = gntp.shim.u(data) - match = GNTP_INFO_LINE_SHORT.match(data) - if not match: - raise errors.ParseError('INVALID_GNTP_INFO') - info = match.groupdict() - if info['messagetype'] == 'REGISTER': - return GNTPRegister(data, password=password) - elif info['messagetype'] == 'NOTIFY': - return GNTPNotice(data, password=password) - elif info['messagetype'] == 'SUBSCRIBE': - return GNTPSubscribe(data, password=password) - elif info['messagetype'] == '-OK': - return GNTPOK(data) - elif info['messagetype'] == '-ERROR': - return GNTPError(data) - raise errors.ParseError('INVALID_GNTP_MESSAGE') diff --git a/lib/gntp/errors.py b/lib/gntp/errors.py deleted file mode 100644 index c006fd68..00000000 --- a/lib/gntp/errors.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright: 2013 Paul Traylor -# These sources are released under the terms of the MIT license: see LICENSE - -class BaseError(Exception): - pass - - -class ParseError(BaseError): - errorcode = 500 - errordesc = 'Error parsing the message' - - -class AuthError(BaseError): - errorcode = 400 - errordesc = 'Error with authorization' - - -class UnsupportedError(BaseError): - errorcode = 500 - errordesc = 'Currently unsupported by gntp.py' - - -class NetworkError(BaseError): - errorcode = 500 - errordesc = "Error connecting to growl server" diff --git a/lib/gntp/notifier.py b/lib/gntp/notifier.py deleted file mode 100644 index 1719ecdf..00000000 --- a/lib/gntp/notifier.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright: 2013 Paul Traylor -# These sources are released under the terms of the MIT license: see LICENSE - -""" -The gntp.notifier module is provided as a simple way to send notifications -using GNTP - -.. note:: - This class is intended to mostly mirror the older Python bindings such - that you should be able to replace instances of the old bindings with - this class. - `Original Python bindings `_ - -""" -import logging -import platform -import socket -import sys - -from gntp.version import __version__ -import gntp.core -import gntp.errors as errors -import gntp.shim - -__all__ = [ - 'mini', - 'GrowlNotifier', -] - -logger = logging.getLogger(__name__) - - -class GrowlNotifier(object): - """Helper class to simplfy sending Growl messages - - :param string applicationName: Sending application name - :param list notification: List of valid notifications - :param list defaultNotifications: List of notifications that should be enabled - by default - :param string applicationIcon: Icon URL - :param string hostname: Remote host - :param integer port: Remote port - """ - - passwordHash = 'MD5' - socketTimeout = 3 - - def __init__(self, applicationName='Python GNTP', notifications=[], - defaultNotifications=None, applicationIcon=None, hostname='localhost', - password=None, port=23053): - - self.applicationName = applicationName - self.notifications = list(notifications) - if defaultNotifications: - self.defaultNotifications = list(defaultNotifications) - else: - self.defaultNotifications = self.notifications - self.applicationIcon = applicationIcon - - self.password = password - self.hostname = hostname - self.port = int(port) - - def _checkIcon(self, data): - ''' - Check the icon to see if it's valid - - If it's a simple URL icon, then we return True. If it's a data icon - then we return False - ''' - logger.info('Checking icon') - return gntp.shim.u(data).startswith('http') - - def register(self): - """Send GNTP Registration - - .. warning:: - Before sending notifications to Growl, you need to have - sent a registration message at least once - """ - logger.info('Sending registration to %s:%s', self.hostname, self.port) - register = gntp.core.GNTPRegister() - register.add_header('Application-Name', self.applicationName) - for notification in self.notifications: - enabled = notification in self.defaultNotifications - register.add_notification(notification, enabled) - if self.applicationIcon: - if self._checkIcon(self.applicationIcon): - register.add_header('Application-Icon', self.applicationIcon) - else: - resource = register.add_resource(self.applicationIcon) - register.add_header('Application-Icon', resource) - if self.password: - register.set_password(self.password, self.passwordHash) - self.add_origin_info(register) - self.register_hook(register) - return self._send('register', register) - - def notify(self, noteType, title, description, icon=None, sticky=False, - priority=None, callback=None, identifier=None, custom={}): - """Send a GNTP notifications - - .. warning:: - Must have registered with growl beforehand or messages will be ignored - - :param string noteType: One of the notification names registered earlier - :param string title: Notification title (usually displayed on the notification) - :param string description: The main content of the notification - :param string icon: Icon URL path - :param boolean sticky: Sticky notification - :param integer priority: Message priority level from -2 to 2 - :param string callback: URL callback - :param dict custom: Custom attributes. Key names should be prefixed with X- - according to the spec but this is not enforced by this class - - .. warning:: - For now, only URL callbacks are supported. In the future, the - callback argument will also support a function - """ - logger.info('Sending notification [%s] to %s:%s', noteType, self.hostname, self.port) - assert noteType in self.notifications - notice = gntp.core.GNTPNotice() - notice.add_header('Application-Name', self.applicationName) - notice.add_header('Notification-Name', noteType) - notice.add_header('Notification-Title', title) - if self.password: - notice.set_password(self.password, self.passwordHash) - if sticky: - notice.add_header('Notification-Sticky', sticky) - if priority: - notice.add_header('Notification-Priority', priority) - if icon: - if self._checkIcon(icon): - notice.add_header('Notification-Icon', icon) - else: - resource = notice.add_resource(icon) - notice.add_header('Notification-Icon', resource) - - if description: - notice.add_header('Notification-Text', description) - if callback: - notice.add_header('Notification-Callback-Target', callback) - if identifier: - notice.add_header('Notification-Coalescing-ID', identifier) - - for key in custom: - notice.add_header(key, custom[key]) - - self.add_origin_info(notice) - self.notify_hook(notice) - - return self._send('notify', notice) - - def subscribe(self, id, name, port): - """Send a Subscribe request to a remote machine""" - sub = gntp.core.GNTPSubscribe() - sub.add_header('Subscriber-ID', id) - sub.add_header('Subscriber-Name', name) - sub.add_header('Subscriber-Port', port) - if self.password: - sub.set_password(self.password, self.passwordHash) - - self.add_origin_info(sub) - self.subscribe_hook(sub) - - return self._send('subscribe', sub) - - def add_origin_info(self, packet): - """Add optional Origin headers to message""" - packet.add_header('Origin-Machine-Name', platform.node()) - packet.add_header('Origin-Software-Name', 'gntp.py') - packet.add_header('Origin-Software-Version', __version__) - packet.add_header('Origin-Platform-Name', platform.system()) - packet.add_header('Origin-Platform-Version', platform.platform()) - - def register_hook(self, packet): - pass - - def notify_hook(self, packet): - pass - - def subscribe_hook(self, packet): - pass - - def _send(self, messagetype, packet): - """Send the GNTP Packet""" - - packet.validate() - data = packet.encode() - - logger.debug('To : %s:%s <%s>\n%s', self.hostname, self.port, packet.__class__, data) - - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.settimeout(self.socketTimeout) - try: - s.connect((self.hostname, self.port)) - s.send(data) - recv_data = s.recv(1024) - while not recv_data.endswith(gntp.shim.b("\r\n\r\n")): - recv_data += s.recv(1024) - except socket.error: - # Python2.5 and Python3 compatibile exception - exc = sys.exc_info()[1] - raise errors.NetworkError(exc) - - response = gntp.core.parse_gntp(recv_data) - s.close() - - logger.debug('From : %s:%s <%s>\n%s', self.hostname, self.port, response.__class__, response) - - if type(response) == gntp.core.GNTPOK: - return True - logger.error('Invalid response: %s', response.error()) - return response.error() - - -def mini(description, applicationName='PythonMini', noteType="Message", - title="Mini Message", applicationIcon=None, hostname='localhost', - password=None, port=23053, sticky=False, priority=None, - callback=None, notificationIcon=None, identifier=None, - notifierFactory=GrowlNotifier): - """Single notification function - - Simple notification function in one line. Has only one required parameter - and attempts to use reasonable defaults for everything else - :param string description: Notification message - - .. warning:: - For now, only URL callbacks are supported. In the future, the - callback argument will also support a function - """ - try: - growl = notifierFactory( - applicationName=applicationName, - notifications=[noteType], - defaultNotifications=[noteType], - applicationIcon=applicationIcon, - hostname=hostname, - password=password, - port=port, - ) - result = growl.register() - if result is not True: - return result - - return growl.notify( - noteType=noteType, - title=title, - description=description, - icon=notificationIcon, - sticky=sticky, - priority=priority, - callback=callback, - identifier=identifier, - ) - except Exception: - # We want the "mini" function to be simple and swallow Exceptions - # in order to be less invasive - logger.exception("Growl error") - -if __name__ == '__main__': - # If we're running this module directly we're likely running it as a test - # so extra debugging is useful - logging.basicConfig(level=logging.INFO) - mini('Testing mini notification') diff --git a/lib/gntp/shim.py b/lib/gntp/shim.py deleted file mode 100644 index 3a387828..00000000 --- a/lib/gntp/shim.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright: 2013 Paul Traylor -# These sources are released under the terms of the MIT license: see LICENSE - -""" -Python2.5 and Python3.3 compatibility shim - -Heavily inspirted by the "six" library. -https://pypi.python.org/pypi/six -""" - -import sys - -PY3 = sys.version_info[0] == 3 - -if PY3: - def b(s): - if isinstance(s, bytes): - return s - return s.encode('utf8', 'replace') - - def u(s): - if isinstance(s, bytes): - return s.decode('utf8', 'replace') - return s - - from io import BytesIO as StringIO - from configparser import RawConfigParser -else: - def b(s): - if isinstance(s, unicode): - return s.encode('utf8', 'replace') - return s - - def u(s): - if isinstance(s, unicode): - return s - if isinstance(s, int): - s = str(s) - return unicode(s, "utf8", "replace") - - from StringIO import StringIO - from ConfigParser import RawConfigParser - -b.__doc__ = "Ensure we have a byte string" -u.__doc__ = "Ensure we have a unicode string" diff --git a/lib/gntp/version.py b/lib/gntp/version.py deleted file mode 100644 index 2166aaca..00000000 --- a/lib/gntp/version.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright: 2013 Paul Traylor -# These sources are released under the terms of the MIT license: see LICENSE - -__version__ = '1.0.2' diff --git a/lib/html5lib/__init__.py b/lib/html5lib/__init__.py deleted file mode 100644 index 3ba1163c..00000000 --- a/lib/html5lib/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -HTML parsing library based on the WHATWG "HTML5" -specification. The parser is designed to be compatible with existing -HTML found in the wild and implements well-defined error recovery that -is largely compatible with modern desktop web browsers. - -Example usage: - -import html5lib -f = open("my_document.html") -tree = html5lib.parse(f) -""" - -from __future__ import absolute_import, division, unicode_literals - -from .html5parser import HTMLParser, parse, parseFragment -from .treebuilders import getTreeBuilder -from .treewalkers import getTreeWalker -from .serializer import serialize - -__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", - "getTreeWalker", "serialize"] - -# this has to be at the top level, see how setup.py parses this -__version__ = "0.999999" diff --git a/lib/html5lib/constants.py b/lib/html5lib/constants.py deleted file mode 100644 index d938e0ae..00000000 --- a/lib/html5lib/constants.py +++ /dev/null @@ -1,3102 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import string - -EOF = None - -E = { - "null-character": - "Null character in input stream, replaced with U+FFFD.", - "invalid-codepoint": - "Invalid codepoint in stream.", - "incorrectly-placed-solidus": - "Solidus (/) incorrectly placed in tag.", - "incorrect-cr-newline-entity": - "Incorrect CR newline entity, replaced with LF.", - "illegal-windows-1252-entity": - "Entity used with illegal number (windows-1252 reference).", - "cant-convert-numeric-entity": - "Numeric entity couldn't be converted to character " - "(codepoint U+%(charAsInt)08x).", - "illegal-codepoint-for-numeric-entity": - "Numeric entity represents an illegal codepoint: " - "U+%(charAsInt)08x.", - "numeric-entity-without-semicolon": - "Numeric entity didn't end with ';'.", - "expected-numeric-entity-but-got-eof": - "Numeric entity expected. Got end of file instead.", - "expected-numeric-entity": - "Numeric entity expected but none found.", - "named-entity-without-semicolon": - "Named entity didn't end with ';'.", - "expected-named-entity": - "Named entity expected. Got none.", - "attributes-in-end-tag": - "End tag contains unexpected attributes.", - 'self-closing-flag-on-end-tag': - "End tag contains unexpected self-closing flag.", - "expected-tag-name-but-got-right-bracket": - "Expected tag name. Got '>' instead.", - "expected-tag-name-but-got-question-mark": - "Expected tag name. Got '?' instead. (HTML doesn't " - "support processing instructions.)", - "expected-tag-name": - "Expected tag name. Got something else instead", - "expected-closing-tag-but-got-right-bracket": - "Expected closing tag. Got '>' instead. Ignoring ''.", - "expected-closing-tag-but-got-eof": - "Expected closing tag. Unexpected end of file.", - "expected-closing-tag-but-got-char": - "Expected closing tag. Unexpected character '%(data)s' found.", - "eof-in-tag-name": - "Unexpected end of file in the tag name.", - "expected-attribute-name-but-got-eof": - "Unexpected end of file. Expected attribute name instead.", - "eof-in-attribute-name": - "Unexpected end of file in attribute name.", - "invalid-character-in-attribute-name": - "Invalid character in attribute name", - "duplicate-attribute": - "Dropped duplicate attribute on tag.", - "expected-end-of-tag-name-but-got-eof": - "Unexpected end of file. Expected = or end of tag.", - "expected-attribute-value-but-got-eof": - "Unexpected end of file. Expected attribute value.", - "expected-attribute-value-but-got-right-bracket": - "Expected attribute value. Got '>' instead.", - 'equals-in-unquoted-attribute-value': - "Unexpected = in unquoted attribute", - 'unexpected-character-in-unquoted-attribute-value': - "Unexpected character in unquoted attribute", - "invalid-character-after-attribute-name": - "Unexpected character after attribute name.", - "unexpected-character-after-attribute-value": - "Unexpected character after attribute value.", - "eof-in-attribute-value-double-quote": - "Unexpected end of file in attribute value (\").", - "eof-in-attribute-value-single-quote": - "Unexpected end of file in attribute value (').", - "eof-in-attribute-value-no-quotes": - "Unexpected end of file in attribute value.", - "unexpected-EOF-after-solidus-in-tag": - "Unexpected end of file in tag. Expected >", - "unexpected-character-after-solidus-in-tag": - "Unexpected character after / in tag. Expected >", - "expected-dashes-or-doctype": - "Expected '--' or 'DOCTYPE'. Not found.", - "unexpected-bang-after-double-dash-in-comment": - "Unexpected ! after -- in comment", - "unexpected-space-after-double-dash-in-comment": - "Unexpected space after -- in comment", - "incorrect-comment": - "Incorrect comment.", - "eof-in-comment": - "Unexpected end of file in comment.", - "eof-in-comment-end-dash": - "Unexpected end of file in comment (-)", - "unexpected-dash-after-double-dash-in-comment": - "Unexpected '-' after '--' found in comment.", - "eof-in-comment-double-dash": - "Unexpected end of file in comment (--).", - "eof-in-comment-end-space-state": - "Unexpected end of file in comment.", - "eof-in-comment-end-bang-state": - "Unexpected end of file in comment.", - "unexpected-char-in-comment": - "Unexpected character in comment found.", - "need-space-after-doctype": - "No space after literal string 'DOCTYPE'.", - "expected-doctype-name-but-got-right-bracket": - "Unexpected > character. Expected DOCTYPE name.", - "expected-doctype-name-but-got-eof": - "Unexpected end of file. Expected DOCTYPE name.", - "eof-in-doctype-name": - "Unexpected end of file in DOCTYPE name.", - "eof-in-doctype": - "Unexpected end of file in DOCTYPE.", - "expected-space-or-right-bracket-in-doctype": - "Expected space or '>'. Got '%(data)s'", - "unexpected-end-of-doctype": - "Unexpected end of DOCTYPE.", - "unexpected-char-in-doctype": - "Unexpected character in DOCTYPE.", - "eof-in-innerhtml": - "XXX innerHTML EOF", - "unexpected-doctype": - "Unexpected DOCTYPE. Ignored.", - "non-html-root": - "html needs to be the first start tag.", - "expected-doctype-but-got-eof": - "Unexpected End of file. Expected DOCTYPE.", - "unknown-doctype": - "Erroneous DOCTYPE.", - "expected-doctype-but-got-chars": - "Unexpected non-space characters. Expected DOCTYPE.", - "expected-doctype-but-got-start-tag": - "Unexpected start tag (%(name)s). Expected DOCTYPE.", - "expected-doctype-but-got-end-tag": - "Unexpected end tag (%(name)s). Expected DOCTYPE.", - "end-tag-after-implied-root": - "Unexpected end tag (%(name)s) after the (implied) root element.", - "expected-named-closing-tag-but-got-eof": - "Unexpected end of file. Expected end tag (%(name)s).", - "two-heads-are-not-better-than-one": - "Unexpected start tag head in existing head. Ignored.", - "unexpected-end-tag": - "Unexpected end tag (%(name)s). Ignored.", - "unexpected-start-tag-out-of-my-head": - "Unexpected start tag (%(name)s) that can be in head. Moved.", - "unexpected-start-tag": - "Unexpected start tag (%(name)s).", - "missing-end-tag": - "Missing end tag (%(name)s).", - "missing-end-tags": - "Missing end tags (%(name)s).", - "unexpected-start-tag-implies-end-tag": - "Unexpected start tag (%(startName)s) " - "implies end tag (%(endName)s).", - "unexpected-start-tag-treated-as": - "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", - "deprecated-tag": - "Unexpected start tag %(name)s. Don't use it!", - "unexpected-start-tag-ignored": - "Unexpected start tag %(name)s. Ignored.", - "expected-one-end-tag-but-got-another": - "Unexpected end tag (%(gotName)s). " - "Missing end tag (%(expectedName)s).", - "end-tag-too-early": - "End tag (%(name)s) seen too early. Expected other end tag.", - "end-tag-too-early-named": - "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", - "end-tag-too-early-ignored": - "End tag (%(name)s) seen too early. Ignored.", - "adoption-agency-1.1": - "End tag (%(name)s) violates step 1, " - "paragraph 1 of the adoption agency algorithm.", - "adoption-agency-1.2": - "End tag (%(name)s) violates step 1, " - "paragraph 2 of the adoption agency algorithm.", - "adoption-agency-1.3": - "End tag (%(name)s) violates step 1, " - "paragraph 3 of the adoption agency algorithm.", - "adoption-agency-4.4": - "End tag (%(name)s) violates step 4, " - "paragraph 4 of the adoption agency algorithm.", - "unexpected-end-tag-treated-as": - "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", - "no-end-tag": - "This element (%(name)s) has no end tag.", - "unexpected-implied-end-tag-in-table": - "Unexpected implied end tag (%(name)s) in the table phase.", - "unexpected-implied-end-tag-in-table-body": - "Unexpected implied end tag (%(name)s) in the table body phase.", - "unexpected-char-implies-table-voodoo": - "Unexpected non-space characters in " - "table context caused voodoo mode.", - "unexpected-hidden-input-in-table": - "Unexpected input with type hidden in table context.", - "unexpected-form-in-table": - "Unexpected form in table context.", - "unexpected-start-tag-implies-table-voodoo": - "Unexpected start tag (%(name)s) in " - "table context caused voodoo mode.", - "unexpected-end-tag-implies-table-voodoo": - "Unexpected end tag (%(name)s) in " - "table context caused voodoo mode.", - "unexpected-cell-in-table-body": - "Unexpected table cell start tag (%(name)s) " - "in the table body phase.", - "unexpected-cell-end-tag": - "Got table cell end tag (%(name)s) " - "while required end tags are missing.", - "unexpected-end-tag-in-table-body": - "Unexpected end tag (%(name)s) in the table body phase. Ignored.", - "unexpected-implied-end-tag-in-table-row": - "Unexpected implied end tag (%(name)s) in the table row phase.", - "unexpected-end-tag-in-table-row": - "Unexpected end tag (%(name)s) in the table row phase. Ignored.", - "unexpected-select-in-select": - "Unexpected select start tag in the select phase " - "treated as select end tag.", - "unexpected-input-in-select": - "Unexpected input start tag in the select phase.", - "unexpected-start-tag-in-select": - "Unexpected start tag token (%(name)s in the select phase. " - "Ignored.", - "unexpected-end-tag-in-select": - "Unexpected end tag (%(name)s) in the select phase. Ignored.", - "unexpected-table-element-start-tag-in-select-in-table": - "Unexpected table element start tag (%(name)s) in the select in table phase.", - "unexpected-table-element-end-tag-in-select-in-table": - "Unexpected table element end tag (%(name)s) in the select in table phase.", - "unexpected-char-after-body": - "Unexpected non-space characters in the after body phase.", - "unexpected-start-tag-after-body": - "Unexpected start tag token (%(name)s)" - " in the after body phase.", - "unexpected-end-tag-after-body": - "Unexpected end tag token (%(name)s)" - " in the after body phase.", - "unexpected-char-in-frameset": - "Unexpected characters in the frameset phase. Characters ignored.", - "unexpected-start-tag-in-frameset": - "Unexpected start tag token (%(name)s)" - " in the frameset phase. Ignored.", - "unexpected-frameset-in-frameset-innerhtml": - "Unexpected end tag token (frameset) " - "in the frameset phase (innerHTML).", - "unexpected-end-tag-in-frameset": - "Unexpected end tag token (%(name)s)" - " in the frameset phase. Ignored.", - "unexpected-char-after-frameset": - "Unexpected non-space characters in the " - "after frameset phase. Ignored.", - "unexpected-start-tag-after-frameset": - "Unexpected start tag (%(name)s)" - " in the after frameset phase. Ignored.", - "unexpected-end-tag-after-frameset": - "Unexpected end tag (%(name)s)" - " in the after frameset phase. Ignored.", - "unexpected-end-tag-after-body-innerhtml": - "Unexpected end tag after body(innerHtml)", - "expected-eof-but-got-char": - "Unexpected non-space characters. Expected end of file.", - "expected-eof-but-got-start-tag": - "Unexpected start tag (%(name)s)" - ". Expected end of file.", - "expected-eof-but-got-end-tag": - "Unexpected end tag (%(name)s)" - ". Expected end of file.", - "eof-in-table": - "Unexpected end of file. Expected table content.", - "eof-in-select": - "Unexpected end of file. Expected select content.", - "eof-in-frameset": - "Unexpected end of file. Expected frameset content.", - "eof-in-script-in-script": - "Unexpected end of file. Expected script content.", - "eof-in-foreign-lands": - "Unexpected end of file. Expected foreign content", - "non-void-element-with-trailing-solidus": - "Trailing solidus not allowed on element %(name)s", - "unexpected-html-element-in-foreign-content": - "Element %(name)s not allowed in a non-html context", - "unexpected-end-tag-before-html": - "Unexpected end tag (%(name)s) before html.", - "XXX-undefined-error": - "Undefined error (this sucks and should be fixed)", -} - -namespaces = { - "html": "http://www.w3.org/1999/xhtml", - "mathml": "http://www.w3.org/1998/Math/MathML", - "svg": "http://www.w3.org/2000/svg", - "xlink": "http://www.w3.org/1999/xlink", - "xml": "http://www.w3.org/XML/1998/namespace", - "xmlns": "http://www.w3.org/2000/xmlns/" -} - -scopingElements = frozenset([ - (namespaces["html"], "applet"), - (namespaces["html"], "caption"), - (namespaces["html"], "html"), - (namespaces["html"], "marquee"), - (namespaces["html"], "object"), - (namespaces["html"], "table"), - (namespaces["html"], "td"), - (namespaces["html"], "th"), - (namespaces["mathml"], "mi"), - (namespaces["mathml"], "mo"), - (namespaces["mathml"], "mn"), - (namespaces["mathml"], "ms"), - (namespaces["mathml"], "mtext"), - (namespaces["mathml"], "annotation-xml"), - (namespaces["svg"], "foreignObject"), - (namespaces["svg"], "desc"), - (namespaces["svg"], "title"), -]) - -formattingElements = frozenset([ - (namespaces["html"], "a"), - (namespaces["html"], "b"), - (namespaces["html"], "big"), - (namespaces["html"], "code"), - (namespaces["html"], "em"), - (namespaces["html"], "font"), - (namespaces["html"], "i"), - (namespaces["html"], "nobr"), - (namespaces["html"], "s"), - (namespaces["html"], "small"), - (namespaces["html"], "strike"), - (namespaces["html"], "strong"), - (namespaces["html"], "tt"), - (namespaces["html"], "u") -]) - -specialElements = frozenset([ - (namespaces["html"], "address"), - (namespaces["html"], "applet"), - (namespaces["html"], "area"), - (namespaces["html"], "article"), - (namespaces["html"], "aside"), - (namespaces["html"], "base"), - (namespaces["html"], "basefont"), - (namespaces["html"], "bgsound"), - (namespaces["html"], "blockquote"), - (namespaces["html"], "body"), - (namespaces["html"], "br"), - (namespaces["html"], "button"), - (namespaces["html"], "caption"), - (namespaces["html"], "center"), - (namespaces["html"], "col"), - (namespaces["html"], "colgroup"), - (namespaces["html"], "command"), - (namespaces["html"], "dd"), - (namespaces["html"], "details"), - (namespaces["html"], "dir"), - (namespaces["html"], "div"), - (namespaces["html"], "dl"), - (namespaces["html"], "dt"), - (namespaces["html"], "embed"), - (namespaces["html"], "fieldset"), - (namespaces["html"], "figure"), - (namespaces["html"], "footer"), - (namespaces["html"], "form"), - (namespaces["html"], "frame"), - (namespaces["html"], "frameset"), - (namespaces["html"], "h1"), - (namespaces["html"], "h2"), - (namespaces["html"], "h3"), - (namespaces["html"], "h4"), - (namespaces["html"], "h5"), - (namespaces["html"], "h6"), - (namespaces["html"], "head"), - (namespaces["html"], "header"), - (namespaces["html"], "hr"), - (namespaces["html"], "html"), - (namespaces["html"], "iframe"), - # Note that image is commented out in the spec as "this isn't an - # element that can end up on the stack, so it doesn't matter," - (namespaces["html"], "image"), - (namespaces["html"], "img"), - (namespaces["html"], "input"), - (namespaces["html"], "isindex"), - (namespaces["html"], "li"), - (namespaces["html"], "link"), - (namespaces["html"], "listing"), - (namespaces["html"], "marquee"), - (namespaces["html"], "menu"), - (namespaces["html"], "meta"), - (namespaces["html"], "nav"), - (namespaces["html"], "noembed"), - (namespaces["html"], "noframes"), - (namespaces["html"], "noscript"), - (namespaces["html"], "object"), - (namespaces["html"], "ol"), - (namespaces["html"], "p"), - (namespaces["html"], "param"), - (namespaces["html"], "plaintext"), - (namespaces["html"], "pre"), - (namespaces["html"], "script"), - (namespaces["html"], "section"), - (namespaces["html"], "select"), - (namespaces["html"], "style"), - (namespaces["html"], "table"), - (namespaces["html"], "tbody"), - (namespaces["html"], "td"), - (namespaces["html"], "textarea"), - (namespaces["html"], "tfoot"), - (namespaces["html"], "th"), - (namespaces["html"], "thead"), - (namespaces["html"], "title"), - (namespaces["html"], "tr"), - (namespaces["html"], "ul"), - (namespaces["html"], "wbr"), - (namespaces["html"], "xmp"), - (namespaces["svg"], "foreignObject") -]) - -htmlIntegrationPointElements = frozenset([ - (namespaces["mathml"], "annotaion-xml"), - (namespaces["svg"], "foreignObject"), - (namespaces["svg"], "desc"), - (namespaces["svg"], "title") -]) - -mathmlTextIntegrationPointElements = frozenset([ - (namespaces["mathml"], "mi"), - (namespaces["mathml"], "mo"), - (namespaces["mathml"], "mn"), - (namespaces["mathml"], "ms"), - (namespaces["mathml"], "mtext") -]) - -adjustForeignAttributes = { - "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), - "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), - "xlink:href": ("xlink", "href", namespaces["xlink"]), - "xlink:role": ("xlink", "role", namespaces["xlink"]), - "xlink:show": ("xlink", "show", namespaces["xlink"]), - "xlink:title": ("xlink", "title", namespaces["xlink"]), - "xlink:type": ("xlink", "type", namespaces["xlink"]), - "xml:base": ("xml", "base", namespaces["xml"]), - "xml:lang": ("xml", "lang", namespaces["xml"]), - "xml:space": ("xml", "space", namespaces["xml"]), - "xmlns": (None, "xmlns", namespaces["xmlns"]), - "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) -} - -unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in - adjustForeignAttributes.items()]) - -spaceCharacters = frozenset([ - "\t", - "\n", - "\u000C", - " ", - "\r" -]) - -tableInsertModeElements = frozenset([ - "table", - "tbody", - "tfoot", - "thead", - "tr" -]) - -asciiLowercase = frozenset(string.ascii_lowercase) -asciiUppercase = frozenset(string.ascii_uppercase) -asciiLetters = frozenset(string.ascii_letters) -digits = frozenset(string.digits) -hexDigits = frozenset(string.hexdigits) - -asciiUpper2Lower = dict([(ord(c), ord(c.lower())) - for c in string.ascii_uppercase]) - -# Heading elements need to be ordered -headingElements = ( - "h1", - "h2", - "h3", - "h4", - "h5", - "h6" -) - -voidElements = frozenset([ - "base", - "command", - "event-source", - "link", - "meta", - "hr", - "br", - "img", - "embed", - "param", - "area", - "col", - "input", - "source", - "track" -]) - -cdataElements = frozenset(['title', 'textarea']) - -rcdataElements = frozenset([ - 'style', - 'script', - 'xmp', - 'iframe', - 'noembed', - 'noframes', - 'noscript' -]) - -booleanAttributes = { - "": frozenset(["irrelevant"]), - "style": frozenset(["scoped"]), - "img": frozenset(["ismap"]), - "audio": frozenset(["autoplay", "controls"]), - "video": frozenset(["autoplay", "controls"]), - "script": frozenset(["defer", "async"]), - "details": frozenset(["open"]), - "datagrid": frozenset(["multiple", "disabled"]), - "command": frozenset(["hidden", "disabled", "checked", "default"]), - "hr": frozenset(["noshade"]), - "menu": frozenset(["autosubmit"]), - "fieldset": frozenset(["disabled", "readonly"]), - "option": frozenset(["disabled", "readonly", "selected"]), - "optgroup": frozenset(["disabled", "readonly"]), - "button": frozenset(["disabled", "autofocus"]), - "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), - "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), - "output": frozenset(["disabled", "readonly"]), -} - -# entitiesWindows1252 has to be _ordered_ and needs to have an index. It -# therefore can't be a frozenset. -entitiesWindows1252 = ( - 8364, # 0x80 0x20AC EURO SIGN - 65533, # 0x81 UNDEFINED - 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK - 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK - 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK - 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS - 8224, # 0x86 0x2020 DAGGER - 8225, # 0x87 0x2021 DOUBLE DAGGER - 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT - 8240, # 0x89 0x2030 PER MILLE SIGN - 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON - 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE - 65533, # 0x8D UNDEFINED - 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON - 65533, # 0x8F UNDEFINED - 65533, # 0x90 UNDEFINED - 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK - 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK - 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK - 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK - 8226, # 0x95 0x2022 BULLET - 8211, # 0x96 0x2013 EN DASH - 8212, # 0x97 0x2014 EM DASH - 732, # 0x98 0x02DC SMALL TILDE - 8482, # 0x99 0x2122 TRADE MARK SIGN - 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON - 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE - 65533, # 0x9D UNDEFINED - 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON - 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS -) - -xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) - -entities = { - "AElig": "\xc6", - "AElig;": "\xc6", - "AMP": "&", - "AMP;": "&", - "Aacute": "\xc1", - "Aacute;": "\xc1", - "Abreve;": "\u0102", - "Acirc": "\xc2", - "Acirc;": "\xc2", - "Acy;": "\u0410", - "Afr;": "\U0001d504", - "Agrave": "\xc0", - "Agrave;": "\xc0", - "Alpha;": "\u0391", - "Amacr;": "\u0100", - "And;": "\u2a53", - "Aogon;": "\u0104", - "Aopf;": "\U0001d538", - "ApplyFunction;": "\u2061", - "Aring": "\xc5", - "Aring;": "\xc5", - "Ascr;": "\U0001d49c", - "Assign;": "\u2254", - "Atilde": "\xc3", - "Atilde;": "\xc3", - "Auml": "\xc4", - "Auml;": "\xc4", - "Backslash;": "\u2216", - "Barv;": "\u2ae7", - "Barwed;": "\u2306", - "Bcy;": "\u0411", - "Because;": "\u2235", - "Bernoullis;": "\u212c", - "Beta;": "\u0392", - "Bfr;": "\U0001d505", - "Bopf;": "\U0001d539", - "Breve;": "\u02d8", - "Bscr;": "\u212c", - "Bumpeq;": "\u224e", - "CHcy;": "\u0427", - "COPY": "\xa9", - "COPY;": "\xa9", - "Cacute;": "\u0106", - "Cap;": "\u22d2", - "CapitalDifferentialD;": "\u2145", - "Cayleys;": "\u212d", - "Ccaron;": "\u010c", - "Ccedil": "\xc7", - "Ccedil;": "\xc7", - "Ccirc;": "\u0108", - "Cconint;": "\u2230", - "Cdot;": "\u010a", - "Cedilla;": "\xb8", - "CenterDot;": "\xb7", - "Cfr;": "\u212d", - "Chi;": "\u03a7", - "CircleDot;": "\u2299", - "CircleMinus;": "\u2296", - "CirclePlus;": "\u2295", - "CircleTimes;": "\u2297", - "ClockwiseContourIntegral;": "\u2232", - "CloseCurlyDoubleQuote;": "\u201d", - "CloseCurlyQuote;": "\u2019", - "Colon;": "\u2237", - "Colone;": "\u2a74", - "Congruent;": "\u2261", - "Conint;": "\u222f", - "ContourIntegral;": "\u222e", - "Copf;": "\u2102", - "Coproduct;": "\u2210", - "CounterClockwiseContourIntegral;": "\u2233", - "Cross;": "\u2a2f", - "Cscr;": "\U0001d49e", - "Cup;": "\u22d3", - "CupCap;": "\u224d", - "DD;": "\u2145", - "DDotrahd;": "\u2911", - "DJcy;": "\u0402", - "DScy;": "\u0405", - "DZcy;": "\u040f", - "Dagger;": "\u2021", - "Darr;": "\u21a1", - "Dashv;": "\u2ae4", - "Dcaron;": "\u010e", - "Dcy;": "\u0414", - "Del;": "\u2207", - "Delta;": "\u0394", - "Dfr;": "\U0001d507", - "DiacriticalAcute;": "\xb4", - "DiacriticalDot;": "\u02d9", - "DiacriticalDoubleAcute;": "\u02dd", - "DiacriticalGrave;": "`", - "DiacriticalTilde;": "\u02dc", - "Diamond;": "\u22c4", - "DifferentialD;": "\u2146", - "Dopf;": "\U0001d53b", - "Dot;": "\xa8", - "DotDot;": "\u20dc", - "DotEqual;": "\u2250", - "DoubleContourIntegral;": "\u222f", - "DoubleDot;": "\xa8", - "DoubleDownArrow;": "\u21d3", - "DoubleLeftArrow;": "\u21d0", - "DoubleLeftRightArrow;": "\u21d4", - "DoubleLeftTee;": "\u2ae4", - "DoubleLongLeftArrow;": "\u27f8", - "DoubleLongLeftRightArrow;": "\u27fa", - "DoubleLongRightArrow;": "\u27f9", - "DoubleRightArrow;": "\u21d2", - "DoubleRightTee;": "\u22a8", - "DoubleUpArrow;": "\u21d1", - "DoubleUpDownArrow;": "\u21d5", - "DoubleVerticalBar;": "\u2225", - "DownArrow;": "\u2193", - "DownArrowBar;": "\u2913", - "DownArrowUpArrow;": "\u21f5", - "DownBreve;": "\u0311", - "DownLeftRightVector;": "\u2950", - "DownLeftTeeVector;": "\u295e", - "DownLeftVector;": "\u21bd", - "DownLeftVectorBar;": "\u2956", - "DownRightTeeVector;": "\u295f", - "DownRightVector;": "\u21c1", - "DownRightVectorBar;": "\u2957", - "DownTee;": "\u22a4", - "DownTeeArrow;": "\u21a7", - "Downarrow;": "\u21d3", - "Dscr;": "\U0001d49f", - "Dstrok;": "\u0110", - "ENG;": "\u014a", - "ETH": "\xd0", - "ETH;": "\xd0", - "Eacute": "\xc9", - "Eacute;": "\xc9", - "Ecaron;": "\u011a", - "Ecirc": "\xca", - "Ecirc;": "\xca", - "Ecy;": "\u042d", - "Edot;": "\u0116", - "Efr;": "\U0001d508", - "Egrave": "\xc8", - "Egrave;": "\xc8", - "Element;": "\u2208", - "Emacr;": "\u0112", - "EmptySmallSquare;": "\u25fb", - "EmptyVerySmallSquare;": "\u25ab", - "Eogon;": "\u0118", - "Eopf;": "\U0001d53c", - "Epsilon;": "\u0395", - "Equal;": "\u2a75", - "EqualTilde;": "\u2242", - "Equilibrium;": "\u21cc", - "Escr;": "\u2130", - "Esim;": "\u2a73", - "Eta;": "\u0397", - "Euml": "\xcb", - "Euml;": "\xcb", - "Exists;": "\u2203", - "ExponentialE;": "\u2147", - "Fcy;": "\u0424", - "Ffr;": "\U0001d509", - "FilledSmallSquare;": "\u25fc", - "FilledVerySmallSquare;": "\u25aa", - "Fopf;": "\U0001d53d", - "ForAll;": "\u2200", - "Fouriertrf;": "\u2131", - "Fscr;": "\u2131", - "GJcy;": "\u0403", - "GT": ">", - "GT;": ">", - "Gamma;": "\u0393", - "Gammad;": "\u03dc", - "Gbreve;": "\u011e", - "Gcedil;": "\u0122", - "Gcirc;": "\u011c", - "Gcy;": "\u0413", - "Gdot;": "\u0120", - "Gfr;": "\U0001d50a", - "Gg;": "\u22d9", - "Gopf;": "\U0001d53e", - "GreaterEqual;": "\u2265", - "GreaterEqualLess;": "\u22db", - "GreaterFullEqual;": "\u2267", - "GreaterGreater;": "\u2aa2", - "GreaterLess;": "\u2277", - "GreaterSlantEqual;": "\u2a7e", - "GreaterTilde;": "\u2273", - "Gscr;": "\U0001d4a2", - "Gt;": "\u226b", - "HARDcy;": "\u042a", - "Hacek;": "\u02c7", - "Hat;": "^", - "Hcirc;": "\u0124", - "Hfr;": "\u210c", - "HilbertSpace;": "\u210b", - "Hopf;": "\u210d", - "HorizontalLine;": "\u2500", - "Hscr;": "\u210b", - "Hstrok;": "\u0126", - "HumpDownHump;": "\u224e", - "HumpEqual;": "\u224f", - "IEcy;": "\u0415", - "IJlig;": "\u0132", - "IOcy;": "\u0401", - "Iacute": "\xcd", - "Iacute;": "\xcd", - "Icirc": "\xce", - "Icirc;": "\xce", - "Icy;": "\u0418", - "Idot;": "\u0130", - "Ifr;": "\u2111", - "Igrave": "\xcc", - "Igrave;": "\xcc", - "Im;": "\u2111", - "Imacr;": "\u012a", - "ImaginaryI;": "\u2148", - "Implies;": "\u21d2", - "Int;": "\u222c", - "Integral;": "\u222b", - "Intersection;": "\u22c2", - "InvisibleComma;": "\u2063", - "InvisibleTimes;": "\u2062", - "Iogon;": "\u012e", - "Iopf;": "\U0001d540", - "Iota;": "\u0399", - "Iscr;": "\u2110", - "Itilde;": "\u0128", - "Iukcy;": "\u0406", - "Iuml": "\xcf", - "Iuml;": "\xcf", - "Jcirc;": "\u0134", - "Jcy;": "\u0419", - "Jfr;": "\U0001d50d", - "Jopf;": "\U0001d541", - "Jscr;": "\U0001d4a5", - "Jsercy;": "\u0408", - "Jukcy;": "\u0404", - "KHcy;": "\u0425", - "KJcy;": "\u040c", - "Kappa;": "\u039a", - "Kcedil;": "\u0136", - "Kcy;": "\u041a", - "Kfr;": "\U0001d50e", - "Kopf;": "\U0001d542", - "Kscr;": "\U0001d4a6", - "LJcy;": "\u0409", - "LT": "<", - "LT;": "<", - "Lacute;": "\u0139", - "Lambda;": "\u039b", - "Lang;": "\u27ea", - "Laplacetrf;": "\u2112", - "Larr;": "\u219e", - "Lcaron;": "\u013d", - "Lcedil;": "\u013b", - "Lcy;": "\u041b", - "LeftAngleBracket;": "\u27e8", - "LeftArrow;": "\u2190", - "LeftArrowBar;": "\u21e4", - "LeftArrowRightArrow;": "\u21c6", - "LeftCeiling;": "\u2308", - "LeftDoubleBracket;": "\u27e6", - "LeftDownTeeVector;": "\u2961", - "LeftDownVector;": "\u21c3", - "LeftDownVectorBar;": "\u2959", - "LeftFloor;": "\u230a", - "LeftRightArrow;": "\u2194", - "LeftRightVector;": "\u294e", - "LeftTee;": "\u22a3", - "LeftTeeArrow;": "\u21a4", - "LeftTeeVector;": "\u295a", - "LeftTriangle;": "\u22b2", - "LeftTriangleBar;": "\u29cf", - "LeftTriangleEqual;": "\u22b4", - "LeftUpDownVector;": "\u2951", - "LeftUpTeeVector;": "\u2960", - "LeftUpVector;": "\u21bf", - "LeftUpVectorBar;": "\u2958", - "LeftVector;": "\u21bc", - "LeftVectorBar;": "\u2952", - "Leftarrow;": "\u21d0", - "Leftrightarrow;": "\u21d4", - "LessEqualGreater;": "\u22da", - "LessFullEqual;": "\u2266", - "LessGreater;": "\u2276", - "LessLess;": "\u2aa1", - "LessSlantEqual;": "\u2a7d", - "LessTilde;": "\u2272", - "Lfr;": "\U0001d50f", - "Ll;": "\u22d8", - "Lleftarrow;": "\u21da", - "Lmidot;": "\u013f", - "LongLeftArrow;": "\u27f5", - "LongLeftRightArrow;": "\u27f7", - "LongRightArrow;": "\u27f6", - "Longleftarrow;": "\u27f8", - "Longleftrightarrow;": "\u27fa", - "Longrightarrow;": "\u27f9", - "Lopf;": "\U0001d543", - "LowerLeftArrow;": "\u2199", - "LowerRightArrow;": "\u2198", - "Lscr;": "\u2112", - "Lsh;": "\u21b0", - "Lstrok;": "\u0141", - "Lt;": "\u226a", - "Map;": "\u2905", - "Mcy;": "\u041c", - "MediumSpace;": "\u205f", - "Mellintrf;": "\u2133", - "Mfr;": "\U0001d510", - "MinusPlus;": "\u2213", - "Mopf;": "\U0001d544", - "Mscr;": "\u2133", - "Mu;": "\u039c", - "NJcy;": "\u040a", - "Nacute;": "\u0143", - "Ncaron;": "\u0147", - "Ncedil;": "\u0145", - "Ncy;": "\u041d", - "NegativeMediumSpace;": "\u200b", - "NegativeThickSpace;": "\u200b", - "NegativeThinSpace;": "\u200b", - "NegativeVeryThinSpace;": "\u200b", - "NestedGreaterGreater;": "\u226b", - "NestedLessLess;": "\u226a", - "NewLine;": "\n", - "Nfr;": "\U0001d511", - "NoBreak;": "\u2060", - "NonBreakingSpace;": "\xa0", - "Nopf;": "\u2115", - "Not;": "\u2aec", - "NotCongruent;": "\u2262", - "NotCupCap;": "\u226d", - "NotDoubleVerticalBar;": "\u2226", - "NotElement;": "\u2209", - "NotEqual;": "\u2260", - "NotEqualTilde;": "\u2242\u0338", - "NotExists;": "\u2204", - "NotGreater;": "\u226f", - "NotGreaterEqual;": "\u2271", - "NotGreaterFullEqual;": "\u2267\u0338", - "NotGreaterGreater;": "\u226b\u0338", - "NotGreaterLess;": "\u2279", - "NotGreaterSlantEqual;": "\u2a7e\u0338", - "NotGreaterTilde;": "\u2275", - "NotHumpDownHump;": "\u224e\u0338", - "NotHumpEqual;": "\u224f\u0338", - "NotLeftTriangle;": "\u22ea", - "NotLeftTriangleBar;": "\u29cf\u0338", - "NotLeftTriangleEqual;": "\u22ec", - "NotLess;": "\u226e", - "NotLessEqual;": "\u2270", - "NotLessGreater;": "\u2278", - "NotLessLess;": "\u226a\u0338", - "NotLessSlantEqual;": "\u2a7d\u0338", - "NotLessTilde;": "\u2274", - "NotNestedGreaterGreater;": "\u2aa2\u0338", - "NotNestedLessLess;": "\u2aa1\u0338", - "NotPrecedes;": "\u2280", - "NotPrecedesEqual;": "\u2aaf\u0338", - "NotPrecedesSlantEqual;": "\u22e0", - "NotReverseElement;": "\u220c", - "NotRightTriangle;": "\u22eb", - "NotRightTriangleBar;": "\u29d0\u0338", - "NotRightTriangleEqual;": "\u22ed", - "NotSquareSubset;": "\u228f\u0338", - "NotSquareSubsetEqual;": "\u22e2", - "NotSquareSuperset;": "\u2290\u0338", - "NotSquareSupersetEqual;": "\u22e3", - "NotSubset;": "\u2282\u20d2", - "NotSubsetEqual;": "\u2288", - "NotSucceeds;": "\u2281", - "NotSucceedsEqual;": "\u2ab0\u0338", - "NotSucceedsSlantEqual;": "\u22e1", - "NotSucceedsTilde;": "\u227f\u0338", - "NotSuperset;": "\u2283\u20d2", - "NotSupersetEqual;": "\u2289", - "NotTilde;": "\u2241", - "NotTildeEqual;": "\u2244", - "NotTildeFullEqual;": "\u2247", - "NotTildeTilde;": "\u2249", - "NotVerticalBar;": "\u2224", - "Nscr;": "\U0001d4a9", - "Ntilde": "\xd1", - "Ntilde;": "\xd1", - "Nu;": "\u039d", - "OElig;": "\u0152", - "Oacute": "\xd3", - "Oacute;": "\xd3", - "Ocirc": "\xd4", - "Ocirc;": "\xd4", - "Ocy;": "\u041e", - "Odblac;": "\u0150", - "Ofr;": "\U0001d512", - "Ograve": "\xd2", - "Ograve;": "\xd2", - "Omacr;": "\u014c", - "Omega;": "\u03a9", - "Omicron;": "\u039f", - "Oopf;": "\U0001d546", - "OpenCurlyDoubleQuote;": "\u201c", - "OpenCurlyQuote;": "\u2018", - "Or;": "\u2a54", - "Oscr;": "\U0001d4aa", - "Oslash": "\xd8", - "Oslash;": "\xd8", - "Otilde": "\xd5", - "Otilde;": "\xd5", - "Otimes;": "\u2a37", - "Ouml": "\xd6", - "Ouml;": "\xd6", - "OverBar;": "\u203e", - "OverBrace;": "\u23de", - "OverBracket;": "\u23b4", - "OverParenthesis;": "\u23dc", - "PartialD;": "\u2202", - "Pcy;": "\u041f", - "Pfr;": "\U0001d513", - "Phi;": "\u03a6", - "Pi;": "\u03a0", - "PlusMinus;": "\xb1", - "Poincareplane;": "\u210c", - "Popf;": "\u2119", - "Pr;": "\u2abb", - "Precedes;": "\u227a", - "PrecedesEqual;": "\u2aaf", - "PrecedesSlantEqual;": "\u227c", - "PrecedesTilde;": "\u227e", - "Prime;": "\u2033", - "Product;": "\u220f", - "Proportion;": "\u2237", - "Proportional;": "\u221d", - "Pscr;": "\U0001d4ab", - "Psi;": "\u03a8", - "QUOT": "\"", - "QUOT;": "\"", - "Qfr;": "\U0001d514", - "Qopf;": "\u211a", - "Qscr;": "\U0001d4ac", - "RBarr;": "\u2910", - "REG": "\xae", - "REG;": "\xae", - "Racute;": "\u0154", - "Rang;": "\u27eb", - "Rarr;": "\u21a0", - "Rarrtl;": "\u2916", - "Rcaron;": "\u0158", - "Rcedil;": "\u0156", - "Rcy;": "\u0420", - "Re;": "\u211c", - "ReverseElement;": "\u220b", - "ReverseEquilibrium;": "\u21cb", - "ReverseUpEquilibrium;": "\u296f", - "Rfr;": "\u211c", - "Rho;": "\u03a1", - "RightAngleBracket;": "\u27e9", - "RightArrow;": "\u2192", - "RightArrowBar;": "\u21e5", - "RightArrowLeftArrow;": "\u21c4", - "RightCeiling;": "\u2309", - "RightDoubleBracket;": "\u27e7", - "RightDownTeeVector;": "\u295d", - "RightDownVector;": "\u21c2", - "RightDownVectorBar;": "\u2955", - "RightFloor;": "\u230b", - "RightTee;": "\u22a2", - "RightTeeArrow;": "\u21a6", - "RightTeeVector;": "\u295b", - "RightTriangle;": "\u22b3", - "RightTriangleBar;": "\u29d0", - "RightTriangleEqual;": "\u22b5", - "RightUpDownVector;": "\u294f", - "RightUpTeeVector;": "\u295c", - "RightUpVector;": "\u21be", - "RightUpVectorBar;": "\u2954", - "RightVector;": "\u21c0", - "RightVectorBar;": "\u2953", - "Rightarrow;": "\u21d2", - "Ropf;": "\u211d", - "RoundImplies;": "\u2970", - "Rrightarrow;": "\u21db", - "Rscr;": "\u211b", - "Rsh;": "\u21b1", - "RuleDelayed;": "\u29f4", - "SHCHcy;": "\u0429", - "SHcy;": "\u0428", - "SOFTcy;": "\u042c", - "Sacute;": "\u015a", - "Sc;": "\u2abc", - "Scaron;": "\u0160", - "Scedil;": "\u015e", - "Scirc;": "\u015c", - "Scy;": "\u0421", - "Sfr;": "\U0001d516", - "ShortDownArrow;": "\u2193", - "ShortLeftArrow;": "\u2190", - "ShortRightArrow;": "\u2192", - "ShortUpArrow;": "\u2191", - "Sigma;": "\u03a3", - "SmallCircle;": "\u2218", - "Sopf;": "\U0001d54a", - "Sqrt;": "\u221a", - "Square;": "\u25a1", - "SquareIntersection;": "\u2293", - "SquareSubset;": "\u228f", - "SquareSubsetEqual;": "\u2291", - "SquareSuperset;": "\u2290", - "SquareSupersetEqual;": "\u2292", - "SquareUnion;": "\u2294", - "Sscr;": "\U0001d4ae", - "Star;": "\u22c6", - "Sub;": "\u22d0", - "Subset;": "\u22d0", - "SubsetEqual;": "\u2286", - "Succeeds;": "\u227b", - "SucceedsEqual;": "\u2ab0", - "SucceedsSlantEqual;": "\u227d", - "SucceedsTilde;": "\u227f", - "SuchThat;": "\u220b", - "Sum;": "\u2211", - "Sup;": "\u22d1", - "Superset;": "\u2283", - "SupersetEqual;": "\u2287", - "Supset;": "\u22d1", - "THORN": "\xde", - "THORN;": "\xde", - "TRADE;": "\u2122", - "TSHcy;": "\u040b", - "TScy;": "\u0426", - "Tab;": "\t", - "Tau;": "\u03a4", - "Tcaron;": "\u0164", - "Tcedil;": "\u0162", - "Tcy;": "\u0422", - "Tfr;": "\U0001d517", - "Therefore;": "\u2234", - "Theta;": "\u0398", - "ThickSpace;": "\u205f\u200a", - "ThinSpace;": "\u2009", - "Tilde;": "\u223c", - "TildeEqual;": "\u2243", - "TildeFullEqual;": "\u2245", - "TildeTilde;": "\u2248", - "Topf;": "\U0001d54b", - "TripleDot;": "\u20db", - "Tscr;": "\U0001d4af", - "Tstrok;": "\u0166", - "Uacute": "\xda", - "Uacute;": "\xda", - "Uarr;": "\u219f", - "Uarrocir;": "\u2949", - "Ubrcy;": "\u040e", - "Ubreve;": "\u016c", - "Ucirc": "\xdb", - "Ucirc;": "\xdb", - "Ucy;": "\u0423", - "Udblac;": "\u0170", - "Ufr;": "\U0001d518", - "Ugrave": "\xd9", - "Ugrave;": "\xd9", - "Umacr;": "\u016a", - "UnderBar;": "_", - "UnderBrace;": "\u23df", - "UnderBracket;": "\u23b5", - "UnderParenthesis;": "\u23dd", - "Union;": "\u22c3", - "UnionPlus;": "\u228e", - "Uogon;": "\u0172", - "Uopf;": "\U0001d54c", - "UpArrow;": "\u2191", - "UpArrowBar;": "\u2912", - "UpArrowDownArrow;": "\u21c5", - "UpDownArrow;": "\u2195", - "UpEquilibrium;": "\u296e", - "UpTee;": "\u22a5", - "UpTeeArrow;": "\u21a5", - "Uparrow;": "\u21d1", - "Updownarrow;": "\u21d5", - "UpperLeftArrow;": "\u2196", - "UpperRightArrow;": "\u2197", - "Upsi;": "\u03d2", - "Upsilon;": "\u03a5", - "Uring;": "\u016e", - "Uscr;": "\U0001d4b0", - "Utilde;": "\u0168", - "Uuml": "\xdc", - "Uuml;": "\xdc", - "VDash;": "\u22ab", - "Vbar;": "\u2aeb", - "Vcy;": "\u0412", - "Vdash;": "\u22a9", - "Vdashl;": "\u2ae6", - "Vee;": "\u22c1", - "Verbar;": "\u2016", - "Vert;": "\u2016", - "VerticalBar;": "\u2223", - "VerticalLine;": "|", - "VerticalSeparator;": "\u2758", - "VerticalTilde;": "\u2240", - "VeryThinSpace;": "\u200a", - "Vfr;": "\U0001d519", - "Vopf;": "\U0001d54d", - "Vscr;": "\U0001d4b1", - "Vvdash;": "\u22aa", - "Wcirc;": "\u0174", - "Wedge;": "\u22c0", - "Wfr;": "\U0001d51a", - "Wopf;": "\U0001d54e", - "Wscr;": "\U0001d4b2", - "Xfr;": "\U0001d51b", - "Xi;": "\u039e", - "Xopf;": "\U0001d54f", - "Xscr;": "\U0001d4b3", - "YAcy;": "\u042f", - "YIcy;": "\u0407", - "YUcy;": "\u042e", - "Yacute": "\xdd", - "Yacute;": "\xdd", - "Ycirc;": "\u0176", - "Ycy;": "\u042b", - "Yfr;": "\U0001d51c", - "Yopf;": "\U0001d550", - "Yscr;": "\U0001d4b4", - "Yuml;": "\u0178", - "ZHcy;": "\u0416", - "Zacute;": "\u0179", - "Zcaron;": "\u017d", - "Zcy;": "\u0417", - "Zdot;": "\u017b", - "ZeroWidthSpace;": "\u200b", - "Zeta;": "\u0396", - "Zfr;": "\u2128", - "Zopf;": "\u2124", - "Zscr;": "\U0001d4b5", - "aacute": "\xe1", - "aacute;": "\xe1", - "abreve;": "\u0103", - "ac;": "\u223e", - "acE;": "\u223e\u0333", - "acd;": "\u223f", - "acirc": "\xe2", - "acirc;": "\xe2", - "acute": "\xb4", - "acute;": "\xb4", - "acy;": "\u0430", - "aelig": "\xe6", - "aelig;": "\xe6", - "af;": "\u2061", - "afr;": "\U0001d51e", - "agrave": "\xe0", - "agrave;": "\xe0", - "alefsym;": "\u2135", - "aleph;": "\u2135", - "alpha;": "\u03b1", - "amacr;": "\u0101", - "amalg;": "\u2a3f", - "amp": "&", - "amp;": "&", - "and;": "\u2227", - "andand;": "\u2a55", - "andd;": "\u2a5c", - "andslope;": "\u2a58", - "andv;": "\u2a5a", - "ang;": "\u2220", - "ange;": "\u29a4", - "angle;": "\u2220", - "angmsd;": "\u2221", - "angmsdaa;": "\u29a8", - "angmsdab;": "\u29a9", - "angmsdac;": "\u29aa", - "angmsdad;": "\u29ab", - "angmsdae;": "\u29ac", - "angmsdaf;": "\u29ad", - "angmsdag;": "\u29ae", - "angmsdah;": "\u29af", - "angrt;": "\u221f", - "angrtvb;": "\u22be", - "angrtvbd;": "\u299d", - "angsph;": "\u2222", - "angst;": "\xc5", - "angzarr;": "\u237c", - "aogon;": "\u0105", - "aopf;": "\U0001d552", - "ap;": "\u2248", - "apE;": "\u2a70", - "apacir;": "\u2a6f", - "ape;": "\u224a", - "apid;": "\u224b", - "apos;": "'", - "approx;": "\u2248", - "approxeq;": "\u224a", - "aring": "\xe5", - "aring;": "\xe5", - "ascr;": "\U0001d4b6", - "ast;": "*", - "asymp;": "\u2248", - "asympeq;": "\u224d", - "atilde": "\xe3", - "atilde;": "\xe3", - "auml": "\xe4", - "auml;": "\xe4", - "awconint;": "\u2233", - "awint;": "\u2a11", - "bNot;": "\u2aed", - "backcong;": "\u224c", - "backepsilon;": "\u03f6", - "backprime;": "\u2035", - "backsim;": "\u223d", - "backsimeq;": "\u22cd", - "barvee;": "\u22bd", - "barwed;": "\u2305", - "barwedge;": "\u2305", - "bbrk;": "\u23b5", - "bbrktbrk;": "\u23b6", - "bcong;": "\u224c", - "bcy;": "\u0431", - "bdquo;": "\u201e", - "becaus;": "\u2235", - "because;": "\u2235", - "bemptyv;": "\u29b0", - "bepsi;": "\u03f6", - "bernou;": "\u212c", - "beta;": "\u03b2", - "beth;": "\u2136", - "between;": "\u226c", - "bfr;": "\U0001d51f", - "bigcap;": "\u22c2", - "bigcirc;": "\u25ef", - "bigcup;": "\u22c3", - "bigodot;": "\u2a00", - "bigoplus;": "\u2a01", - "bigotimes;": "\u2a02", - "bigsqcup;": "\u2a06", - "bigstar;": "\u2605", - "bigtriangledown;": "\u25bd", - "bigtriangleup;": "\u25b3", - "biguplus;": "\u2a04", - "bigvee;": "\u22c1", - "bigwedge;": "\u22c0", - "bkarow;": "\u290d", - "blacklozenge;": "\u29eb", - "blacksquare;": "\u25aa", - "blacktriangle;": "\u25b4", - "blacktriangledown;": "\u25be", - "blacktriangleleft;": "\u25c2", - "blacktriangleright;": "\u25b8", - "blank;": "\u2423", - "blk12;": "\u2592", - "blk14;": "\u2591", - "blk34;": "\u2593", - "block;": "\u2588", - "bne;": "=\u20e5", - "bnequiv;": "\u2261\u20e5", - "bnot;": "\u2310", - "bopf;": "\U0001d553", - "bot;": "\u22a5", - "bottom;": "\u22a5", - "bowtie;": "\u22c8", - "boxDL;": "\u2557", - "boxDR;": "\u2554", - "boxDl;": "\u2556", - "boxDr;": "\u2553", - "boxH;": "\u2550", - "boxHD;": "\u2566", - "boxHU;": "\u2569", - "boxHd;": "\u2564", - "boxHu;": "\u2567", - "boxUL;": "\u255d", - "boxUR;": "\u255a", - "boxUl;": "\u255c", - "boxUr;": "\u2559", - "boxV;": "\u2551", - "boxVH;": "\u256c", - "boxVL;": "\u2563", - "boxVR;": "\u2560", - "boxVh;": "\u256b", - "boxVl;": "\u2562", - "boxVr;": "\u255f", - "boxbox;": "\u29c9", - "boxdL;": "\u2555", - "boxdR;": "\u2552", - "boxdl;": "\u2510", - "boxdr;": "\u250c", - "boxh;": "\u2500", - "boxhD;": "\u2565", - "boxhU;": "\u2568", - "boxhd;": "\u252c", - "boxhu;": "\u2534", - "boxminus;": "\u229f", - "boxplus;": "\u229e", - "boxtimes;": "\u22a0", - "boxuL;": "\u255b", - "boxuR;": "\u2558", - "boxul;": "\u2518", - "boxur;": "\u2514", - "boxv;": "\u2502", - "boxvH;": "\u256a", - "boxvL;": "\u2561", - "boxvR;": "\u255e", - "boxvh;": "\u253c", - "boxvl;": "\u2524", - "boxvr;": "\u251c", - "bprime;": "\u2035", - "breve;": "\u02d8", - "brvbar": "\xa6", - "brvbar;": "\xa6", - "bscr;": "\U0001d4b7", - "bsemi;": "\u204f", - "bsim;": "\u223d", - "bsime;": "\u22cd", - "bsol;": "\\", - "bsolb;": "\u29c5", - "bsolhsub;": "\u27c8", - "bull;": "\u2022", - "bullet;": "\u2022", - "bump;": "\u224e", - "bumpE;": "\u2aae", - "bumpe;": "\u224f", - "bumpeq;": "\u224f", - "cacute;": "\u0107", - "cap;": "\u2229", - "capand;": "\u2a44", - "capbrcup;": "\u2a49", - "capcap;": "\u2a4b", - "capcup;": "\u2a47", - "capdot;": "\u2a40", - "caps;": "\u2229\ufe00", - "caret;": "\u2041", - "caron;": "\u02c7", - "ccaps;": "\u2a4d", - "ccaron;": "\u010d", - "ccedil": "\xe7", - "ccedil;": "\xe7", - "ccirc;": "\u0109", - "ccups;": "\u2a4c", - "ccupssm;": "\u2a50", - "cdot;": "\u010b", - "cedil": "\xb8", - "cedil;": "\xb8", - "cemptyv;": "\u29b2", - "cent": "\xa2", - "cent;": "\xa2", - "centerdot;": "\xb7", - "cfr;": "\U0001d520", - "chcy;": "\u0447", - "check;": "\u2713", - "checkmark;": "\u2713", - "chi;": "\u03c7", - "cir;": "\u25cb", - "cirE;": "\u29c3", - "circ;": "\u02c6", - "circeq;": "\u2257", - "circlearrowleft;": "\u21ba", - "circlearrowright;": "\u21bb", - "circledR;": "\xae", - "circledS;": "\u24c8", - "circledast;": "\u229b", - "circledcirc;": "\u229a", - "circleddash;": "\u229d", - "cire;": "\u2257", - "cirfnint;": "\u2a10", - "cirmid;": "\u2aef", - "cirscir;": "\u29c2", - "clubs;": "\u2663", - "clubsuit;": "\u2663", - "colon;": ":", - "colone;": "\u2254", - "coloneq;": "\u2254", - "comma;": ",", - "commat;": "@", - "comp;": "\u2201", - "compfn;": "\u2218", - "complement;": "\u2201", - "complexes;": "\u2102", - "cong;": "\u2245", - "congdot;": "\u2a6d", - "conint;": "\u222e", - "copf;": "\U0001d554", - "coprod;": "\u2210", - "copy": "\xa9", - "copy;": "\xa9", - "copysr;": "\u2117", - "crarr;": "\u21b5", - "cross;": "\u2717", - "cscr;": "\U0001d4b8", - "csub;": "\u2acf", - "csube;": "\u2ad1", - "csup;": "\u2ad0", - "csupe;": "\u2ad2", - "ctdot;": "\u22ef", - "cudarrl;": "\u2938", - "cudarrr;": "\u2935", - "cuepr;": "\u22de", - "cuesc;": "\u22df", - "cularr;": "\u21b6", - "cularrp;": "\u293d", - "cup;": "\u222a", - "cupbrcap;": "\u2a48", - "cupcap;": "\u2a46", - "cupcup;": "\u2a4a", - "cupdot;": "\u228d", - "cupor;": "\u2a45", - "cups;": "\u222a\ufe00", - "curarr;": "\u21b7", - "curarrm;": "\u293c", - "curlyeqprec;": "\u22de", - "curlyeqsucc;": "\u22df", - "curlyvee;": "\u22ce", - "curlywedge;": "\u22cf", - "curren": "\xa4", - "curren;": "\xa4", - "curvearrowleft;": "\u21b6", - "curvearrowright;": "\u21b7", - "cuvee;": "\u22ce", - "cuwed;": "\u22cf", - "cwconint;": "\u2232", - "cwint;": "\u2231", - "cylcty;": "\u232d", - "dArr;": "\u21d3", - "dHar;": "\u2965", - "dagger;": "\u2020", - "daleth;": "\u2138", - "darr;": "\u2193", - "dash;": "\u2010", - "dashv;": "\u22a3", - "dbkarow;": "\u290f", - "dblac;": "\u02dd", - "dcaron;": "\u010f", - "dcy;": "\u0434", - "dd;": "\u2146", - "ddagger;": "\u2021", - "ddarr;": "\u21ca", - "ddotseq;": "\u2a77", - "deg": "\xb0", - "deg;": "\xb0", - "delta;": "\u03b4", - "demptyv;": "\u29b1", - "dfisht;": "\u297f", - "dfr;": "\U0001d521", - "dharl;": "\u21c3", - "dharr;": "\u21c2", - "diam;": "\u22c4", - "diamond;": "\u22c4", - "diamondsuit;": "\u2666", - "diams;": "\u2666", - "die;": "\xa8", - "digamma;": "\u03dd", - "disin;": "\u22f2", - "div;": "\xf7", - "divide": "\xf7", - "divide;": "\xf7", - "divideontimes;": "\u22c7", - "divonx;": "\u22c7", - "djcy;": "\u0452", - "dlcorn;": "\u231e", - "dlcrop;": "\u230d", - "dollar;": "$", - "dopf;": "\U0001d555", - "dot;": "\u02d9", - "doteq;": "\u2250", - "doteqdot;": "\u2251", - "dotminus;": "\u2238", - "dotplus;": "\u2214", - "dotsquare;": "\u22a1", - "doublebarwedge;": "\u2306", - "downarrow;": "\u2193", - "downdownarrows;": "\u21ca", - "downharpoonleft;": "\u21c3", - "downharpoonright;": "\u21c2", - "drbkarow;": "\u2910", - "drcorn;": "\u231f", - "drcrop;": "\u230c", - "dscr;": "\U0001d4b9", - "dscy;": "\u0455", - "dsol;": "\u29f6", - "dstrok;": "\u0111", - "dtdot;": "\u22f1", - "dtri;": "\u25bf", - "dtrif;": "\u25be", - "duarr;": "\u21f5", - "duhar;": "\u296f", - "dwangle;": "\u29a6", - "dzcy;": "\u045f", - "dzigrarr;": "\u27ff", - "eDDot;": "\u2a77", - "eDot;": "\u2251", - "eacute": "\xe9", - "eacute;": "\xe9", - "easter;": "\u2a6e", - "ecaron;": "\u011b", - "ecir;": "\u2256", - "ecirc": "\xea", - "ecirc;": "\xea", - "ecolon;": "\u2255", - "ecy;": "\u044d", - "edot;": "\u0117", - "ee;": "\u2147", - "efDot;": "\u2252", - "efr;": "\U0001d522", - "eg;": "\u2a9a", - "egrave": "\xe8", - "egrave;": "\xe8", - "egs;": "\u2a96", - "egsdot;": "\u2a98", - "el;": "\u2a99", - "elinters;": "\u23e7", - "ell;": "\u2113", - "els;": "\u2a95", - "elsdot;": "\u2a97", - "emacr;": "\u0113", - "empty;": "\u2205", - "emptyset;": "\u2205", - "emptyv;": "\u2205", - "emsp13;": "\u2004", - "emsp14;": "\u2005", - "emsp;": "\u2003", - "eng;": "\u014b", - "ensp;": "\u2002", - "eogon;": "\u0119", - "eopf;": "\U0001d556", - "epar;": "\u22d5", - "eparsl;": "\u29e3", - "eplus;": "\u2a71", - "epsi;": "\u03b5", - "epsilon;": "\u03b5", - "epsiv;": "\u03f5", - "eqcirc;": "\u2256", - "eqcolon;": "\u2255", - "eqsim;": "\u2242", - "eqslantgtr;": "\u2a96", - "eqslantless;": "\u2a95", - "equals;": "=", - "equest;": "\u225f", - "equiv;": "\u2261", - "equivDD;": "\u2a78", - "eqvparsl;": "\u29e5", - "erDot;": "\u2253", - "erarr;": "\u2971", - "escr;": "\u212f", - "esdot;": "\u2250", - "esim;": "\u2242", - "eta;": "\u03b7", - "eth": "\xf0", - "eth;": "\xf0", - "euml": "\xeb", - "euml;": "\xeb", - "euro;": "\u20ac", - "excl;": "!", - "exist;": "\u2203", - "expectation;": "\u2130", - "exponentiale;": "\u2147", - "fallingdotseq;": "\u2252", - "fcy;": "\u0444", - "female;": "\u2640", - "ffilig;": "\ufb03", - "fflig;": "\ufb00", - "ffllig;": "\ufb04", - "ffr;": "\U0001d523", - "filig;": "\ufb01", - "fjlig;": "fj", - "flat;": "\u266d", - "fllig;": "\ufb02", - "fltns;": "\u25b1", - "fnof;": "\u0192", - "fopf;": "\U0001d557", - "forall;": "\u2200", - "fork;": "\u22d4", - "forkv;": "\u2ad9", - "fpartint;": "\u2a0d", - "frac12": "\xbd", - "frac12;": "\xbd", - "frac13;": "\u2153", - "frac14": "\xbc", - "frac14;": "\xbc", - "frac15;": "\u2155", - "frac16;": "\u2159", - "frac18;": "\u215b", - "frac23;": "\u2154", - "frac25;": "\u2156", - "frac34": "\xbe", - "frac34;": "\xbe", - "frac35;": "\u2157", - "frac38;": "\u215c", - "frac45;": "\u2158", - "frac56;": "\u215a", - "frac58;": "\u215d", - "frac78;": "\u215e", - "frasl;": "\u2044", - "frown;": "\u2322", - "fscr;": "\U0001d4bb", - "gE;": "\u2267", - "gEl;": "\u2a8c", - "gacute;": "\u01f5", - "gamma;": "\u03b3", - "gammad;": "\u03dd", - "gap;": "\u2a86", - "gbreve;": "\u011f", - "gcirc;": "\u011d", - "gcy;": "\u0433", - "gdot;": "\u0121", - "ge;": "\u2265", - "gel;": "\u22db", - "geq;": "\u2265", - "geqq;": "\u2267", - "geqslant;": "\u2a7e", - "ges;": "\u2a7e", - "gescc;": "\u2aa9", - "gesdot;": "\u2a80", - "gesdoto;": "\u2a82", - "gesdotol;": "\u2a84", - "gesl;": "\u22db\ufe00", - "gesles;": "\u2a94", - "gfr;": "\U0001d524", - "gg;": "\u226b", - "ggg;": "\u22d9", - "gimel;": "\u2137", - "gjcy;": "\u0453", - "gl;": "\u2277", - "glE;": "\u2a92", - "gla;": "\u2aa5", - "glj;": "\u2aa4", - "gnE;": "\u2269", - "gnap;": "\u2a8a", - "gnapprox;": "\u2a8a", - "gne;": "\u2a88", - "gneq;": "\u2a88", - "gneqq;": "\u2269", - "gnsim;": "\u22e7", - "gopf;": "\U0001d558", - "grave;": "`", - "gscr;": "\u210a", - "gsim;": "\u2273", - "gsime;": "\u2a8e", - "gsiml;": "\u2a90", - "gt": ">", - "gt;": ">", - "gtcc;": "\u2aa7", - "gtcir;": "\u2a7a", - "gtdot;": "\u22d7", - "gtlPar;": "\u2995", - "gtquest;": "\u2a7c", - "gtrapprox;": "\u2a86", - "gtrarr;": "\u2978", - "gtrdot;": "\u22d7", - "gtreqless;": "\u22db", - "gtreqqless;": "\u2a8c", - "gtrless;": "\u2277", - "gtrsim;": "\u2273", - "gvertneqq;": "\u2269\ufe00", - "gvnE;": "\u2269\ufe00", - "hArr;": "\u21d4", - "hairsp;": "\u200a", - "half;": "\xbd", - "hamilt;": "\u210b", - "hardcy;": "\u044a", - "harr;": "\u2194", - "harrcir;": "\u2948", - "harrw;": "\u21ad", - "hbar;": "\u210f", - "hcirc;": "\u0125", - "hearts;": "\u2665", - "heartsuit;": "\u2665", - "hellip;": "\u2026", - "hercon;": "\u22b9", - "hfr;": "\U0001d525", - "hksearow;": "\u2925", - "hkswarow;": "\u2926", - "hoarr;": "\u21ff", - "homtht;": "\u223b", - "hookleftarrow;": "\u21a9", - "hookrightarrow;": "\u21aa", - "hopf;": "\U0001d559", - "horbar;": "\u2015", - "hscr;": "\U0001d4bd", - "hslash;": "\u210f", - "hstrok;": "\u0127", - "hybull;": "\u2043", - "hyphen;": "\u2010", - "iacute": "\xed", - "iacute;": "\xed", - "ic;": "\u2063", - "icirc": "\xee", - "icirc;": "\xee", - "icy;": "\u0438", - "iecy;": "\u0435", - "iexcl": "\xa1", - "iexcl;": "\xa1", - "iff;": "\u21d4", - "ifr;": "\U0001d526", - "igrave": "\xec", - "igrave;": "\xec", - "ii;": "\u2148", - "iiiint;": "\u2a0c", - "iiint;": "\u222d", - "iinfin;": "\u29dc", - "iiota;": "\u2129", - "ijlig;": "\u0133", - "imacr;": "\u012b", - "image;": "\u2111", - "imagline;": "\u2110", - "imagpart;": "\u2111", - "imath;": "\u0131", - "imof;": "\u22b7", - "imped;": "\u01b5", - "in;": "\u2208", - "incare;": "\u2105", - "infin;": "\u221e", - "infintie;": "\u29dd", - "inodot;": "\u0131", - "int;": "\u222b", - "intcal;": "\u22ba", - "integers;": "\u2124", - "intercal;": "\u22ba", - "intlarhk;": "\u2a17", - "intprod;": "\u2a3c", - "iocy;": "\u0451", - "iogon;": "\u012f", - "iopf;": "\U0001d55a", - "iota;": "\u03b9", - "iprod;": "\u2a3c", - "iquest": "\xbf", - "iquest;": "\xbf", - "iscr;": "\U0001d4be", - "isin;": "\u2208", - "isinE;": "\u22f9", - "isindot;": "\u22f5", - "isins;": "\u22f4", - "isinsv;": "\u22f3", - "isinv;": "\u2208", - "it;": "\u2062", - "itilde;": "\u0129", - "iukcy;": "\u0456", - "iuml": "\xef", - "iuml;": "\xef", - "jcirc;": "\u0135", - "jcy;": "\u0439", - "jfr;": "\U0001d527", - "jmath;": "\u0237", - "jopf;": "\U0001d55b", - "jscr;": "\U0001d4bf", - "jsercy;": "\u0458", - "jukcy;": "\u0454", - "kappa;": "\u03ba", - "kappav;": "\u03f0", - "kcedil;": "\u0137", - "kcy;": "\u043a", - "kfr;": "\U0001d528", - "kgreen;": "\u0138", - "khcy;": "\u0445", - "kjcy;": "\u045c", - "kopf;": "\U0001d55c", - "kscr;": "\U0001d4c0", - "lAarr;": "\u21da", - "lArr;": "\u21d0", - "lAtail;": "\u291b", - "lBarr;": "\u290e", - "lE;": "\u2266", - "lEg;": "\u2a8b", - "lHar;": "\u2962", - "lacute;": "\u013a", - "laemptyv;": "\u29b4", - "lagran;": "\u2112", - "lambda;": "\u03bb", - "lang;": "\u27e8", - "langd;": "\u2991", - "langle;": "\u27e8", - "lap;": "\u2a85", - "laquo": "\xab", - "laquo;": "\xab", - "larr;": "\u2190", - "larrb;": "\u21e4", - "larrbfs;": "\u291f", - "larrfs;": "\u291d", - "larrhk;": "\u21a9", - "larrlp;": "\u21ab", - "larrpl;": "\u2939", - "larrsim;": "\u2973", - "larrtl;": "\u21a2", - "lat;": "\u2aab", - "latail;": "\u2919", - "late;": "\u2aad", - "lates;": "\u2aad\ufe00", - "lbarr;": "\u290c", - "lbbrk;": "\u2772", - "lbrace;": "{", - "lbrack;": "[", - "lbrke;": "\u298b", - "lbrksld;": "\u298f", - "lbrkslu;": "\u298d", - "lcaron;": "\u013e", - "lcedil;": "\u013c", - "lceil;": "\u2308", - "lcub;": "{", - "lcy;": "\u043b", - "ldca;": "\u2936", - "ldquo;": "\u201c", - "ldquor;": "\u201e", - "ldrdhar;": "\u2967", - "ldrushar;": "\u294b", - "ldsh;": "\u21b2", - "le;": "\u2264", - "leftarrow;": "\u2190", - "leftarrowtail;": "\u21a2", - "leftharpoondown;": "\u21bd", - "leftharpoonup;": "\u21bc", - "leftleftarrows;": "\u21c7", - "leftrightarrow;": "\u2194", - "leftrightarrows;": "\u21c6", - "leftrightharpoons;": "\u21cb", - "leftrightsquigarrow;": "\u21ad", - "leftthreetimes;": "\u22cb", - "leg;": "\u22da", - "leq;": "\u2264", - "leqq;": "\u2266", - "leqslant;": "\u2a7d", - "les;": "\u2a7d", - "lescc;": "\u2aa8", - "lesdot;": "\u2a7f", - "lesdoto;": "\u2a81", - "lesdotor;": "\u2a83", - "lesg;": "\u22da\ufe00", - "lesges;": "\u2a93", - "lessapprox;": "\u2a85", - "lessdot;": "\u22d6", - "lesseqgtr;": "\u22da", - "lesseqqgtr;": "\u2a8b", - "lessgtr;": "\u2276", - "lesssim;": "\u2272", - "lfisht;": "\u297c", - "lfloor;": "\u230a", - "lfr;": "\U0001d529", - "lg;": "\u2276", - "lgE;": "\u2a91", - "lhard;": "\u21bd", - "lharu;": "\u21bc", - "lharul;": "\u296a", - "lhblk;": "\u2584", - "ljcy;": "\u0459", - "ll;": "\u226a", - "llarr;": "\u21c7", - "llcorner;": "\u231e", - "llhard;": "\u296b", - "lltri;": "\u25fa", - "lmidot;": "\u0140", - "lmoust;": "\u23b0", - "lmoustache;": "\u23b0", - "lnE;": "\u2268", - "lnap;": "\u2a89", - "lnapprox;": "\u2a89", - "lne;": "\u2a87", - "lneq;": "\u2a87", - "lneqq;": "\u2268", - "lnsim;": "\u22e6", - "loang;": "\u27ec", - "loarr;": "\u21fd", - "lobrk;": "\u27e6", - "longleftarrow;": "\u27f5", - "longleftrightarrow;": "\u27f7", - "longmapsto;": "\u27fc", - "longrightarrow;": "\u27f6", - "looparrowleft;": "\u21ab", - "looparrowright;": "\u21ac", - "lopar;": "\u2985", - "lopf;": "\U0001d55d", - "loplus;": "\u2a2d", - "lotimes;": "\u2a34", - "lowast;": "\u2217", - "lowbar;": "_", - "loz;": "\u25ca", - "lozenge;": "\u25ca", - "lozf;": "\u29eb", - "lpar;": "(", - "lparlt;": "\u2993", - "lrarr;": "\u21c6", - "lrcorner;": "\u231f", - "lrhar;": "\u21cb", - "lrhard;": "\u296d", - "lrm;": "\u200e", - "lrtri;": "\u22bf", - "lsaquo;": "\u2039", - "lscr;": "\U0001d4c1", - "lsh;": "\u21b0", - "lsim;": "\u2272", - "lsime;": "\u2a8d", - "lsimg;": "\u2a8f", - "lsqb;": "[", - "lsquo;": "\u2018", - "lsquor;": "\u201a", - "lstrok;": "\u0142", - "lt": "<", - "lt;": "<", - "ltcc;": "\u2aa6", - "ltcir;": "\u2a79", - "ltdot;": "\u22d6", - "lthree;": "\u22cb", - "ltimes;": "\u22c9", - "ltlarr;": "\u2976", - "ltquest;": "\u2a7b", - "ltrPar;": "\u2996", - "ltri;": "\u25c3", - "ltrie;": "\u22b4", - "ltrif;": "\u25c2", - "lurdshar;": "\u294a", - "luruhar;": "\u2966", - "lvertneqq;": "\u2268\ufe00", - "lvnE;": "\u2268\ufe00", - "mDDot;": "\u223a", - "macr": "\xaf", - "macr;": "\xaf", - "male;": "\u2642", - "malt;": "\u2720", - "maltese;": "\u2720", - "map;": "\u21a6", - "mapsto;": "\u21a6", - "mapstodown;": "\u21a7", - "mapstoleft;": "\u21a4", - "mapstoup;": "\u21a5", - "marker;": "\u25ae", - "mcomma;": "\u2a29", - "mcy;": "\u043c", - "mdash;": "\u2014", - "measuredangle;": "\u2221", - "mfr;": "\U0001d52a", - "mho;": "\u2127", - "micro": "\xb5", - "micro;": "\xb5", - "mid;": "\u2223", - "midast;": "*", - "midcir;": "\u2af0", - "middot": "\xb7", - "middot;": "\xb7", - "minus;": "\u2212", - "minusb;": "\u229f", - "minusd;": "\u2238", - "minusdu;": "\u2a2a", - "mlcp;": "\u2adb", - "mldr;": "\u2026", - "mnplus;": "\u2213", - "models;": "\u22a7", - "mopf;": "\U0001d55e", - "mp;": "\u2213", - "mscr;": "\U0001d4c2", - "mstpos;": "\u223e", - "mu;": "\u03bc", - "multimap;": "\u22b8", - "mumap;": "\u22b8", - "nGg;": "\u22d9\u0338", - "nGt;": "\u226b\u20d2", - "nGtv;": "\u226b\u0338", - "nLeftarrow;": "\u21cd", - "nLeftrightarrow;": "\u21ce", - "nLl;": "\u22d8\u0338", - "nLt;": "\u226a\u20d2", - "nLtv;": "\u226a\u0338", - "nRightarrow;": "\u21cf", - "nVDash;": "\u22af", - "nVdash;": "\u22ae", - "nabla;": "\u2207", - "nacute;": "\u0144", - "nang;": "\u2220\u20d2", - "nap;": "\u2249", - "napE;": "\u2a70\u0338", - "napid;": "\u224b\u0338", - "napos;": "\u0149", - "napprox;": "\u2249", - "natur;": "\u266e", - "natural;": "\u266e", - "naturals;": "\u2115", - "nbsp": "\xa0", - "nbsp;": "\xa0", - "nbump;": "\u224e\u0338", - "nbumpe;": "\u224f\u0338", - "ncap;": "\u2a43", - "ncaron;": "\u0148", - "ncedil;": "\u0146", - "ncong;": "\u2247", - "ncongdot;": "\u2a6d\u0338", - "ncup;": "\u2a42", - "ncy;": "\u043d", - "ndash;": "\u2013", - "ne;": "\u2260", - "neArr;": "\u21d7", - "nearhk;": "\u2924", - "nearr;": "\u2197", - "nearrow;": "\u2197", - "nedot;": "\u2250\u0338", - "nequiv;": "\u2262", - "nesear;": "\u2928", - "nesim;": "\u2242\u0338", - "nexist;": "\u2204", - "nexists;": "\u2204", - "nfr;": "\U0001d52b", - "ngE;": "\u2267\u0338", - "nge;": "\u2271", - "ngeq;": "\u2271", - "ngeqq;": "\u2267\u0338", - "ngeqslant;": "\u2a7e\u0338", - "nges;": "\u2a7e\u0338", - "ngsim;": "\u2275", - "ngt;": "\u226f", - "ngtr;": "\u226f", - "nhArr;": "\u21ce", - "nharr;": "\u21ae", - "nhpar;": "\u2af2", - "ni;": "\u220b", - "nis;": "\u22fc", - "nisd;": "\u22fa", - "niv;": "\u220b", - "njcy;": "\u045a", - "nlArr;": "\u21cd", - "nlE;": "\u2266\u0338", - "nlarr;": "\u219a", - "nldr;": "\u2025", - "nle;": "\u2270", - "nleftarrow;": "\u219a", - "nleftrightarrow;": "\u21ae", - "nleq;": "\u2270", - "nleqq;": "\u2266\u0338", - "nleqslant;": "\u2a7d\u0338", - "nles;": "\u2a7d\u0338", - "nless;": "\u226e", - "nlsim;": "\u2274", - "nlt;": "\u226e", - "nltri;": "\u22ea", - "nltrie;": "\u22ec", - "nmid;": "\u2224", - "nopf;": "\U0001d55f", - "not": "\xac", - "not;": "\xac", - "notin;": "\u2209", - "notinE;": "\u22f9\u0338", - "notindot;": "\u22f5\u0338", - "notinva;": "\u2209", - "notinvb;": "\u22f7", - "notinvc;": "\u22f6", - "notni;": "\u220c", - "notniva;": "\u220c", - "notnivb;": "\u22fe", - "notnivc;": "\u22fd", - "npar;": "\u2226", - "nparallel;": "\u2226", - "nparsl;": "\u2afd\u20e5", - "npart;": "\u2202\u0338", - "npolint;": "\u2a14", - "npr;": "\u2280", - "nprcue;": "\u22e0", - "npre;": "\u2aaf\u0338", - "nprec;": "\u2280", - "npreceq;": "\u2aaf\u0338", - "nrArr;": "\u21cf", - "nrarr;": "\u219b", - "nrarrc;": "\u2933\u0338", - "nrarrw;": "\u219d\u0338", - "nrightarrow;": "\u219b", - "nrtri;": "\u22eb", - "nrtrie;": "\u22ed", - "nsc;": "\u2281", - "nsccue;": "\u22e1", - "nsce;": "\u2ab0\u0338", - "nscr;": "\U0001d4c3", - "nshortmid;": "\u2224", - "nshortparallel;": "\u2226", - "nsim;": "\u2241", - "nsime;": "\u2244", - "nsimeq;": "\u2244", - "nsmid;": "\u2224", - "nspar;": "\u2226", - "nsqsube;": "\u22e2", - "nsqsupe;": "\u22e3", - "nsub;": "\u2284", - "nsubE;": "\u2ac5\u0338", - "nsube;": "\u2288", - "nsubset;": "\u2282\u20d2", - "nsubseteq;": "\u2288", - "nsubseteqq;": "\u2ac5\u0338", - "nsucc;": "\u2281", - "nsucceq;": "\u2ab0\u0338", - "nsup;": "\u2285", - "nsupE;": "\u2ac6\u0338", - "nsupe;": "\u2289", - "nsupset;": "\u2283\u20d2", - "nsupseteq;": "\u2289", - "nsupseteqq;": "\u2ac6\u0338", - "ntgl;": "\u2279", - "ntilde": "\xf1", - "ntilde;": "\xf1", - "ntlg;": "\u2278", - "ntriangleleft;": "\u22ea", - "ntrianglelefteq;": "\u22ec", - "ntriangleright;": "\u22eb", - "ntrianglerighteq;": "\u22ed", - "nu;": "\u03bd", - "num;": "#", - "numero;": "\u2116", - "numsp;": "\u2007", - "nvDash;": "\u22ad", - "nvHarr;": "\u2904", - "nvap;": "\u224d\u20d2", - "nvdash;": "\u22ac", - "nvge;": "\u2265\u20d2", - "nvgt;": ">\u20d2", - "nvinfin;": "\u29de", - "nvlArr;": "\u2902", - "nvle;": "\u2264\u20d2", - "nvlt;": "<\u20d2", - "nvltrie;": "\u22b4\u20d2", - "nvrArr;": "\u2903", - "nvrtrie;": "\u22b5\u20d2", - "nvsim;": "\u223c\u20d2", - "nwArr;": "\u21d6", - "nwarhk;": "\u2923", - "nwarr;": "\u2196", - "nwarrow;": "\u2196", - "nwnear;": "\u2927", - "oS;": "\u24c8", - "oacute": "\xf3", - "oacute;": "\xf3", - "oast;": "\u229b", - "ocir;": "\u229a", - "ocirc": "\xf4", - "ocirc;": "\xf4", - "ocy;": "\u043e", - "odash;": "\u229d", - "odblac;": "\u0151", - "odiv;": "\u2a38", - "odot;": "\u2299", - "odsold;": "\u29bc", - "oelig;": "\u0153", - "ofcir;": "\u29bf", - "ofr;": "\U0001d52c", - "ogon;": "\u02db", - "ograve": "\xf2", - "ograve;": "\xf2", - "ogt;": "\u29c1", - "ohbar;": "\u29b5", - "ohm;": "\u03a9", - "oint;": "\u222e", - "olarr;": "\u21ba", - "olcir;": "\u29be", - "olcross;": "\u29bb", - "oline;": "\u203e", - "olt;": "\u29c0", - "omacr;": "\u014d", - "omega;": "\u03c9", - "omicron;": "\u03bf", - "omid;": "\u29b6", - "ominus;": "\u2296", - "oopf;": "\U0001d560", - "opar;": "\u29b7", - "operp;": "\u29b9", - "oplus;": "\u2295", - "or;": "\u2228", - "orarr;": "\u21bb", - "ord;": "\u2a5d", - "order;": "\u2134", - "orderof;": "\u2134", - "ordf": "\xaa", - "ordf;": "\xaa", - "ordm": "\xba", - "ordm;": "\xba", - "origof;": "\u22b6", - "oror;": "\u2a56", - "orslope;": "\u2a57", - "orv;": "\u2a5b", - "oscr;": "\u2134", - "oslash": "\xf8", - "oslash;": "\xf8", - "osol;": "\u2298", - "otilde": "\xf5", - "otilde;": "\xf5", - "otimes;": "\u2297", - "otimesas;": "\u2a36", - "ouml": "\xf6", - "ouml;": "\xf6", - "ovbar;": "\u233d", - "par;": "\u2225", - "para": "\xb6", - "para;": "\xb6", - "parallel;": "\u2225", - "parsim;": "\u2af3", - "parsl;": "\u2afd", - "part;": "\u2202", - "pcy;": "\u043f", - "percnt;": "%", - "period;": ".", - "permil;": "\u2030", - "perp;": "\u22a5", - "pertenk;": "\u2031", - "pfr;": "\U0001d52d", - "phi;": "\u03c6", - "phiv;": "\u03d5", - "phmmat;": "\u2133", - "phone;": "\u260e", - "pi;": "\u03c0", - "pitchfork;": "\u22d4", - "piv;": "\u03d6", - "planck;": "\u210f", - "planckh;": "\u210e", - "plankv;": "\u210f", - "plus;": "+", - "plusacir;": "\u2a23", - "plusb;": "\u229e", - "pluscir;": "\u2a22", - "plusdo;": "\u2214", - "plusdu;": "\u2a25", - "pluse;": "\u2a72", - "plusmn": "\xb1", - "plusmn;": "\xb1", - "plussim;": "\u2a26", - "plustwo;": "\u2a27", - "pm;": "\xb1", - "pointint;": "\u2a15", - "popf;": "\U0001d561", - "pound": "\xa3", - "pound;": "\xa3", - "pr;": "\u227a", - "prE;": "\u2ab3", - "prap;": "\u2ab7", - "prcue;": "\u227c", - "pre;": "\u2aaf", - "prec;": "\u227a", - "precapprox;": "\u2ab7", - "preccurlyeq;": "\u227c", - "preceq;": "\u2aaf", - "precnapprox;": "\u2ab9", - "precneqq;": "\u2ab5", - "precnsim;": "\u22e8", - "precsim;": "\u227e", - "prime;": "\u2032", - "primes;": "\u2119", - "prnE;": "\u2ab5", - "prnap;": "\u2ab9", - "prnsim;": "\u22e8", - "prod;": "\u220f", - "profalar;": "\u232e", - "profline;": "\u2312", - "profsurf;": "\u2313", - "prop;": "\u221d", - "propto;": "\u221d", - "prsim;": "\u227e", - "prurel;": "\u22b0", - "pscr;": "\U0001d4c5", - "psi;": "\u03c8", - "puncsp;": "\u2008", - "qfr;": "\U0001d52e", - "qint;": "\u2a0c", - "qopf;": "\U0001d562", - "qprime;": "\u2057", - "qscr;": "\U0001d4c6", - "quaternions;": "\u210d", - "quatint;": "\u2a16", - "quest;": "?", - "questeq;": "\u225f", - "quot": "\"", - "quot;": "\"", - "rAarr;": "\u21db", - "rArr;": "\u21d2", - "rAtail;": "\u291c", - "rBarr;": "\u290f", - "rHar;": "\u2964", - "race;": "\u223d\u0331", - "racute;": "\u0155", - "radic;": "\u221a", - "raemptyv;": "\u29b3", - "rang;": "\u27e9", - "rangd;": "\u2992", - "range;": "\u29a5", - "rangle;": "\u27e9", - "raquo": "\xbb", - "raquo;": "\xbb", - "rarr;": "\u2192", - "rarrap;": "\u2975", - "rarrb;": "\u21e5", - "rarrbfs;": "\u2920", - "rarrc;": "\u2933", - "rarrfs;": "\u291e", - "rarrhk;": "\u21aa", - "rarrlp;": "\u21ac", - "rarrpl;": "\u2945", - "rarrsim;": "\u2974", - "rarrtl;": "\u21a3", - "rarrw;": "\u219d", - "ratail;": "\u291a", - "ratio;": "\u2236", - "rationals;": "\u211a", - "rbarr;": "\u290d", - "rbbrk;": "\u2773", - "rbrace;": "}", - "rbrack;": "]", - "rbrke;": "\u298c", - "rbrksld;": "\u298e", - "rbrkslu;": "\u2990", - "rcaron;": "\u0159", - "rcedil;": "\u0157", - "rceil;": "\u2309", - "rcub;": "}", - "rcy;": "\u0440", - "rdca;": "\u2937", - "rdldhar;": "\u2969", - "rdquo;": "\u201d", - "rdquor;": "\u201d", - "rdsh;": "\u21b3", - "real;": "\u211c", - "realine;": "\u211b", - "realpart;": "\u211c", - "reals;": "\u211d", - "rect;": "\u25ad", - "reg": "\xae", - "reg;": "\xae", - "rfisht;": "\u297d", - "rfloor;": "\u230b", - "rfr;": "\U0001d52f", - "rhard;": "\u21c1", - "rharu;": "\u21c0", - "rharul;": "\u296c", - "rho;": "\u03c1", - "rhov;": "\u03f1", - "rightarrow;": "\u2192", - "rightarrowtail;": "\u21a3", - "rightharpoondown;": "\u21c1", - "rightharpoonup;": "\u21c0", - "rightleftarrows;": "\u21c4", - "rightleftharpoons;": "\u21cc", - "rightrightarrows;": "\u21c9", - "rightsquigarrow;": "\u219d", - "rightthreetimes;": "\u22cc", - "ring;": "\u02da", - "risingdotseq;": "\u2253", - "rlarr;": "\u21c4", - "rlhar;": "\u21cc", - "rlm;": "\u200f", - "rmoust;": "\u23b1", - "rmoustache;": "\u23b1", - "rnmid;": "\u2aee", - "roang;": "\u27ed", - "roarr;": "\u21fe", - "robrk;": "\u27e7", - "ropar;": "\u2986", - "ropf;": "\U0001d563", - "roplus;": "\u2a2e", - "rotimes;": "\u2a35", - "rpar;": ")", - "rpargt;": "\u2994", - "rppolint;": "\u2a12", - "rrarr;": "\u21c9", - "rsaquo;": "\u203a", - "rscr;": "\U0001d4c7", - "rsh;": "\u21b1", - "rsqb;": "]", - "rsquo;": "\u2019", - "rsquor;": "\u2019", - "rthree;": "\u22cc", - "rtimes;": "\u22ca", - "rtri;": "\u25b9", - "rtrie;": "\u22b5", - "rtrif;": "\u25b8", - "rtriltri;": "\u29ce", - "ruluhar;": "\u2968", - "rx;": "\u211e", - "sacute;": "\u015b", - "sbquo;": "\u201a", - "sc;": "\u227b", - "scE;": "\u2ab4", - "scap;": "\u2ab8", - "scaron;": "\u0161", - "sccue;": "\u227d", - "sce;": "\u2ab0", - "scedil;": "\u015f", - "scirc;": "\u015d", - "scnE;": "\u2ab6", - "scnap;": "\u2aba", - "scnsim;": "\u22e9", - "scpolint;": "\u2a13", - "scsim;": "\u227f", - "scy;": "\u0441", - "sdot;": "\u22c5", - "sdotb;": "\u22a1", - "sdote;": "\u2a66", - "seArr;": "\u21d8", - "searhk;": "\u2925", - "searr;": "\u2198", - "searrow;": "\u2198", - "sect": "\xa7", - "sect;": "\xa7", - "semi;": ";", - "seswar;": "\u2929", - "setminus;": "\u2216", - "setmn;": "\u2216", - "sext;": "\u2736", - "sfr;": "\U0001d530", - "sfrown;": "\u2322", - "sharp;": "\u266f", - "shchcy;": "\u0449", - "shcy;": "\u0448", - "shortmid;": "\u2223", - "shortparallel;": "\u2225", - "shy": "\xad", - "shy;": "\xad", - "sigma;": "\u03c3", - "sigmaf;": "\u03c2", - "sigmav;": "\u03c2", - "sim;": "\u223c", - "simdot;": "\u2a6a", - "sime;": "\u2243", - "simeq;": "\u2243", - "simg;": "\u2a9e", - "simgE;": "\u2aa0", - "siml;": "\u2a9d", - "simlE;": "\u2a9f", - "simne;": "\u2246", - "simplus;": "\u2a24", - "simrarr;": "\u2972", - "slarr;": "\u2190", - "smallsetminus;": "\u2216", - "smashp;": "\u2a33", - "smeparsl;": "\u29e4", - "smid;": "\u2223", - "smile;": "\u2323", - "smt;": "\u2aaa", - "smte;": "\u2aac", - "smtes;": "\u2aac\ufe00", - "softcy;": "\u044c", - "sol;": "/", - "solb;": "\u29c4", - "solbar;": "\u233f", - "sopf;": "\U0001d564", - "spades;": "\u2660", - "spadesuit;": "\u2660", - "spar;": "\u2225", - "sqcap;": "\u2293", - "sqcaps;": "\u2293\ufe00", - "sqcup;": "\u2294", - "sqcups;": "\u2294\ufe00", - "sqsub;": "\u228f", - "sqsube;": "\u2291", - "sqsubset;": "\u228f", - "sqsubseteq;": "\u2291", - "sqsup;": "\u2290", - "sqsupe;": "\u2292", - "sqsupset;": "\u2290", - "sqsupseteq;": "\u2292", - "squ;": "\u25a1", - "square;": "\u25a1", - "squarf;": "\u25aa", - "squf;": "\u25aa", - "srarr;": "\u2192", - "sscr;": "\U0001d4c8", - "ssetmn;": "\u2216", - "ssmile;": "\u2323", - "sstarf;": "\u22c6", - "star;": "\u2606", - "starf;": "\u2605", - "straightepsilon;": "\u03f5", - "straightphi;": "\u03d5", - "strns;": "\xaf", - "sub;": "\u2282", - "subE;": "\u2ac5", - "subdot;": "\u2abd", - "sube;": "\u2286", - "subedot;": "\u2ac3", - "submult;": "\u2ac1", - "subnE;": "\u2acb", - "subne;": "\u228a", - "subplus;": "\u2abf", - "subrarr;": "\u2979", - "subset;": "\u2282", - "subseteq;": "\u2286", - "subseteqq;": "\u2ac5", - "subsetneq;": "\u228a", - "subsetneqq;": "\u2acb", - "subsim;": "\u2ac7", - "subsub;": "\u2ad5", - "subsup;": "\u2ad3", - "succ;": "\u227b", - "succapprox;": "\u2ab8", - "succcurlyeq;": "\u227d", - "succeq;": "\u2ab0", - "succnapprox;": "\u2aba", - "succneqq;": "\u2ab6", - "succnsim;": "\u22e9", - "succsim;": "\u227f", - "sum;": "\u2211", - "sung;": "\u266a", - "sup1": "\xb9", - "sup1;": "\xb9", - "sup2": "\xb2", - "sup2;": "\xb2", - "sup3": "\xb3", - "sup3;": "\xb3", - "sup;": "\u2283", - "supE;": "\u2ac6", - "supdot;": "\u2abe", - "supdsub;": "\u2ad8", - "supe;": "\u2287", - "supedot;": "\u2ac4", - "suphsol;": "\u27c9", - "suphsub;": "\u2ad7", - "suplarr;": "\u297b", - "supmult;": "\u2ac2", - "supnE;": "\u2acc", - "supne;": "\u228b", - "supplus;": "\u2ac0", - "supset;": "\u2283", - "supseteq;": "\u2287", - "supseteqq;": "\u2ac6", - "supsetneq;": "\u228b", - "supsetneqq;": "\u2acc", - "supsim;": "\u2ac8", - "supsub;": "\u2ad4", - "supsup;": "\u2ad6", - "swArr;": "\u21d9", - "swarhk;": "\u2926", - "swarr;": "\u2199", - "swarrow;": "\u2199", - "swnwar;": "\u292a", - "szlig": "\xdf", - "szlig;": "\xdf", - "target;": "\u2316", - "tau;": "\u03c4", - "tbrk;": "\u23b4", - "tcaron;": "\u0165", - "tcedil;": "\u0163", - "tcy;": "\u0442", - "tdot;": "\u20db", - "telrec;": "\u2315", - "tfr;": "\U0001d531", - "there4;": "\u2234", - "therefore;": "\u2234", - "theta;": "\u03b8", - "thetasym;": "\u03d1", - "thetav;": "\u03d1", - "thickapprox;": "\u2248", - "thicksim;": "\u223c", - "thinsp;": "\u2009", - "thkap;": "\u2248", - "thksim;": "\u223c", - "thorn": "\xfe", - "thorn;": "\xfe", - "tilde;": "\u02dc", - "times": "\xd7", - "times;": "\xd7", - "timesb;": "\u22a0", - "timesbar;": "\u2a31", - "timesd;": "\u2a30", - "tint;": "\u222d", - "toea;": "\u2928", - "top;": "\u22a4", - "topbot;": "\u2336", - "topcir;": "\u2af1", - "topf;": "\U0001d565", - "topfork;": "\u2ada", - "tosa;": "\u2929", - "tprime;": "\u2034", - "trade;": "\u2122", - "triangle;": "\u25b5", - "triangledown;": "\u25bf", - "triangleleft;": "\u25c3", - "trianglelefteq;": "\u22b4", - "triangleq;": "\u225c", - "triangleright;": "\u25b9", - "trianglerighteq;": "\u22b5", - "tridot;": "\u25ec", - "trie;": "\u225c", - "triminus;": "\u2a3a", - "triplus;": "\u2a39", - "trisb;": "\u29cd", - "tritime;": "\u2a3b", - "trpezium;": "\u23e2", - "tscr;": "\U0001d4c9", - "tscy;": "\u0446", - "tshcy;": "\u045b", - "tstrok;": "\u0167", - "twixt;": "\u226c", - "twoheadleftarrow;": "\u219e", - "twoheadrightarrow;": "\u21a0", - "uArr;": "\u21d1", - "uHar;": "\u2963", - "uacute": "\xfa", - "uacute;": "\xfa", - "uarr;": "\u2191", - "ubrcy;": "\u045e", - "ubreve;": "\u016d", - "ucirc": "\xfb", - "ucirc;": "\xfb", - "ucy;": "\u0443", - "udarr;": "\u21c5", - "udblac;": "\u0171", - "udhar;": "\u296e", - "ufisht;": "\u297e", - "ufr;": "\U0001d532", - "ugrave": "\xf9", - "ugrave;": "\xf9", - "uharl;": "\u21bf", - "uharr;": "\u21be", - "uhblk;": "\u2580", - "ulcorn;": "\u231c", - "ulcorner;": "\u231c", - "ulcrop;": "\u230f", - "ultri;": "\u25f8", - "umacr;": "\u016b", - "uml": "\xa8", - "uml;": "\xa8", - "uogon;": "\u0173", - "uopf;": "\U0001d566", - "uparrow;": "\u2191", - "updownarrow;": "\u2195", - "upharpoonleft;": "\u21bf", - "upharpoonright;": "\u21be", - "uplus;": "\u228e", - "upsi;": "\u03c5", - "upsih;": "\u03d2", - "upsilon;": "\u03c5", - "upuparrows;": "\u21c8", - "urcorn;": "\u231d", - "urcorner;": "\u231d", - "urcrop;": "\u230e", - "uring;": "\u016f", - "urtri;": "\u25f9", - "uscr;": "\U0001d4ca", - "utdot;": "\u22f0", - "utilde;": "\u0169", - "utri;": "\u25b5", - "utrif;": "\u25b4", - "uuarr;": "\u21c8", - "uuml": "\xfc", - "uuml;": "\xfc", - "uwangle;": "\u29a7", - "vArr;": "\u21d5", - "vBar;": "\u2ae8", - "vBarv;": "\u2ae9", - "vDash;": "\u22a8", - "vangrt;": "\u299c", - "varepsilon;": "\u03f5", - "varkappa;": "\u03f0", - "varnothing;": "\u2205", - "varphi;": "\u03d5", - "varpi;": "\u03d6", - "varpropto;": "\u221d", - "varr;": "\u2195", - "varrho;": "\u03f1", - "varsigma;": "\u03c2", - "varsubsetneq;": "\u228a\ufe00", - "varsubsetneqq;": "\u2acb\ufe00", - "varsupsetneq;": "\u228b\ufe00", - "varsupsetneqq;": "\u2acc\ufe00", - "vartheta;": "\u03d1", - "vartriangleleft;": "\u22b2", - "vartriangleright;": "\u22b3", - "vcy;": "\u0432", - "vdash;": "\u22a2", - "vee;": "\u2228", - "veebar;": "\u22bb", - "veeeq;": "\u225a", - "vellip;": "\u22ee", - "verbar;": "|", - "vert;": "|", - "vfr;": "\U0001d533", - "vltri;": "\u22b2", - "vnsub;": "\u2282\u20d2", - "vnsup;": "\u2283\u20d2", - "vopf;": "\U0001d567", - "vprop;": "\u221d", - "vrtri;": "\u22b3", - "vscr;": "\U0001d4cb", - "vsubnE;": "\u2acb\ufe00", - "vsubne;": "\u228a\ufe00", - "vsupnE;": "\u2acc\ufe00", - "vsupne;": "\u228b\ufe00", - "vzigzag;": "\u299a", - "wcirc;": "\u0175", - "wedbar;": "\u2a5f", - "wedge;": "\u2227", - "wedgeq;": "\u2259", - "weierp;": "\u2118", - "wfr;": "\U0001d534", - "wopf;": "\U0001d568", - "wp;": "\u2118", - "wr;": "\u2240", - "wreath;": "\u2240", - "wscr;": "\U0001d4cc", - "xcap;": "\u22c2", - "xcirc;": "\u25ef", - "xcup;": "\u22c3", - "xdtri;": "\u25bd", - "xfr;": "\U0001d535", - "xhArr;": "\u27fa", - "xharr;": "\u27f7", - "xi;": "\u03be", - "xlArr;": "\u27f8", - "xlarr;": "\u27f5", - "xmap;": "\u27fc", - "xnis;": "\u22fb", - "xodot;": "\u2a00", - "xopf;": "\U0001d569", - "xoplus;": "\u2a01", - "xotime;": "\u2a02", - "xrArr;": "\u27f9", - "xrarr;": "\u27f6", - "xscr;": "\U0001d4cd", - "xsqcup;": "\u2a06", - "xuplus;": "\u2a04", - "xutri;": "\u25b3", - "xvee;": "\u22c1", - "xwedge;": "\u22c0", - "yacute": "\xfd", - "yacute;": "\xfd", - "yacy;": "\u044f", - "ycirc;": "\u0177", - "ycy;": "\u044b", - "yen": "\xa5", - "yen;": "\xa5", - "yfr;": "\U0001d536", - "yicy;": "\u0457", - "yopf;": "\U0001d56a", - "yscr;": "\U0001d4ce", - "yucy;": "\u044e", - "yuml": "\xff", - "yuml;": "\xff", - "zacute;": "\u017a", - "zcaron;": "\u017e", - "zcy;": "\u0437", - "zdot;": "\u017c", - "zeetrf;": "\u2128", - "zeta;": "\u03b6", - "zfr;": "\U0001d537", - "zhcy;": "\u0436", - "zigrarr;": "\u21dd", - "zopf;": "\U0001d56b", - "zscr;": "\U0001d4cf", - "zwj;": "\u200d", - "zwnj;": "\u200c", -} - -replacementCharacters = { - 0x0: "\uFFFD", - 0x0d: "\u000D", - 0x80: "\u20AC", - 0x81: "\u0081", - 0x81: "\u0081", - 0x82: "\u201A", - 0x83: "\u0192", - 0x84: "\u201E", - 0x85: "\u2026", - 0x86: "\u2020", - 0x87: "\u2021", - 0x88: "\u02C6", - 0x89: "\u2030", - 0x8A: "\u0160", - 0x8B: "\u2039", - 0x8C: "\u0152", - 0x8D: "\u008D", - 0x8E: "\u017D", - 0x8F: "\u008F", - 0x90: "\u0090", - 0x91: "\u2018", - 0x92: "\u2019", - 0x93: "\u201C", - 0x94: "\u201D", - 0x95: "\u2022", - 0x96: "\u2013", - 0x97: "\u2014", - 0x98: "\u02DC", - 0x99: "\u2122", - 0x9A: "\u0161", - 0x9B: "\u203A", - 0x9C: "\u0153", - 0x9D: "\u009D", - 0x9E: "\u017E", - 0x9F: "\u0178", -} - -encodings = { - '437': 'cp437', - '850': 'cp850', - '852': 'cp852', - '855': 'cp855', - '857': 'cp857', - '860': 'cp860', - '861': 'cp861', - '862': 'cp862', - '863': 'cp863', - '865': 'cp865', - '866': 'cp866', - '869': 'cp869', - 'ansix341968': 'ascii', - 'ansix341986': 'ascii', - 'arabic': 'iso8859-6', - 'ascii': 'ascii', - 'asmo708': 'iso8859-6', - 'big5': 'big5', - 'big5hkscs': 'big5hkscs', - 'chinese': 'gbk', - 'cp037': 'cp037', - 'cp1026': 'cp1026', - 'cp154': 'ptcp154', - 'cp367': 'ascii', - 'cp424': 'cp424', - 'cp437': 'cp437', - 'cp500': 'cp500', - 'cp775': 'cp775', - 'cp819': 'windows-1252', - 'cp850': 'cp850', - 'cp852': 'cp852', - 'cp855': 'cp855', - 'cp857': 'cp857', - 'cp860': 'cp860', - 'cp861': 'cp861', - 'cp862': 'cp862', - 'cp863': 'cp863', - 'cp864': 'cp864', - 'cp865': 'cp865', - 'cp866': 'cp866', - 'cp869': 'cp869', - 'cp936': 'gbk', - 'cpgr': 'cp869', - 'cpis': 'cp861', - 'csascii': 'ascii', - 'csbig5': 'big5', - 'cseuckr': 'cp949', - 'cseucpkdfmtjapanese': 'euc_jp', - 'csgb2312': 'gbk', - 'cshproman8': 'hp-roman8', - 'csibm037': 'cp037', - 'csibm1026': 'cp1026', - 'csibm424': 'cp424', - 'csibm500': 'cp500', - 'csibm855': 'cp855', - 'csibm857': 'cp857', - 'csibm860': 'cp860', - 'csibm861': 'cp861', - 'csibm863': 'cp863', - 'csibm864': 'cp864', - 'csibm865': 'cp865', - 'csibm866': 'cp866', - 'csibm869': 'cp869', - 'csiso2022jp': 'iso2022_jp', - 'csiso2022jp2': 'iso2022_jp_2', - 'csiso2022kr': 'iso2022_kr', - 'csiso58gb231280': 'gbk', - 'csisolatin1': 'windows-1252', - 'csisolatin2': 'iso8859-2', - 'csisolatin3': 'iso8859-3', - 'csisolatin4': 'iso8859-4', - 'csisolatin5': 'windows-1254', - 'csisolatin6': 'iso8859-10', - 'csisolatinarabic': 'iso8859-6', - 'csisolatincyrillic': 'iso8859-5', - 'csisolatingreek': 'iso8859-7', - 'csisolatinhebrew': 'iso8859-8', - 'cskoi8r': 'koi8-r', - 'csksc56011987': 'cp949', - 'cspc775baltic': 'cp775', - 'cspc850multilingual': 'cp850', - 'cspc862latinhebrew': 'cp862', - 'cspc8codepage437': 'cp437', - 'cspcp852': 'cp852', - 'csptcp154': 'ptcp154', - 'csshiftjis': 'shift_jis', - 'csunicode11utf7': 'utf-7', - 'cyrillic': 'iso8859-5', - 'cyrillicasian': 'ptcp154', - 'ebcdiccpbe': 'cp500', - 'ebcdiccpca': 'cp037', - 'ebcdiccpch': 'cp500', - 'ebcdiccphe': 'cp424', - 'ebcdiccpnl': 'cp037', - 'ebcdiccpus': 'cp037', - 'ebcdiccpwt': 'cp037', - 'ecma114': 'iso8859-6', - 'ecma118': 'iso8859-7', - 'elot928': 'iso8859-7', - 'eucjp': 'euc_jp', - 'euckr': 'cp949', - 'extendedunixcodepackedformatforjapanese': 'euc_jp', - 'gb18030': 'gb18030', - 'gb2312': 'gbk', - 'gb231280': 'gbk', - 'gbk': 'gbk', - 'greek': 'iso8859-7', - 'greek8': 'iso8859-7', - 'hebrew': 'iso8859-8', - 'hproman8': 'hp-roman8', - 'hzgb2312': 'hz', - 'ibm037': 'cp037', - 'ibm1026': 'cp1026', - 'ibm367': 'ascii', - 'ibm424': 'cp424', - 'ibm437': 'cp437', - 'ibm500': 'cp500', - 'ibm775': 'cp775', - 'ibm819': 'windows-1252', - 'ibm850': 'cp850', - 'ibm852': 'cp852', - 'ibm855': 'cp855', - 'ibm857': 'cp857', - 'ibm860': 'cp860', - 'ibm861': 'cp861', - 'ibm862': 'cp862', - 'ibm863': 'cp863', - 'ibm864': 'cp864', - 'ibm865': 'cp865', - 'ibm866': 'cp866', - 'ibm869': 'cp869', - 'iso2022jp': 'iso2022_jp', - 'iso2022jp2': 'iso2022_jp_2', - 'iso2022kr': 'iso2022_kr', - 'iso646irv1991': 'ascii', - 'iso646us': 'ascii', - 'iso88591': 'windows-1252', - 'iso885910': 'iso8859-10', - 'iso8859101992': 'iso8859-10', - 'iso885911987': 'windows-1252', - 'iso885913': 'iso8859-13', - 'iso885914': 'iso8859-14', - 'iso8859141998': 'iso8859-14', - 'iso885915': 'iso8859-15', - 'iso885916': 'iso8859-16', - 'iso8859162001': 'iso8859-16', - 'iso88592': 'iso8859-2', - 'iso885921987': 'iso8859-2', - 'iso88593': 'iso8859-3', - 'iso885931988': 'iso8859-3', - 'iso88594': 'iso8859-4', - 'iso885941988': 'iso8859-4', - 'iso88595': 'iso8859-5', - 'iso885951988': 'iso8859-5', - 'iso88596': 'iso8859-6', - 'iso885961987': 'iso8859-6', - 'iso88597': 'iso8859-7', - 'iso885971987': 'iso8859-7', - 'iso88598': 'iso8859-8', - 'iso885981988': 'iso8859-8', - 'iso88599': 'windows-1254', - 'iso885991989': 'windows-1254', - 'isoceltic': 'iso8859-14', - 'isoir100': 'windows-1252', - 'isoir101': 'iso8859-2', - 'isoir109': 'iso8859-3', - 'isoir110': 'iso8859-4', - 'isoir126': 'iso8859-7', - 'isoir127': 'iso8859-6', - 'isoir138': 'iso8859-8', - 'isoir144': 'iso8859-5', - 'isoir148': 'windows-1254', - 'isoir149': 'cp949', - 'isoir157': 'iso8859-10', - 'isoir199': 'iso8859-14', - 'isoir226': 'iso8859-16', - 'isoir58': 'gbk', - 'isoir6': 'ascii', - 'koi8r': 'koi8-r', - 'koi8u': 'koi8-u', - 'korean': 'cp949', - 'ksc5601': 'cp949', - 'ksc56011987': 'cp949', - 'ksc56011989': 'cp949', - 'l1': 'windows-1252', - 'l10': 'iso8859-16', - 'l2': 'iso8859-2', - 'l3': 'iso8859-3', - 'l4': 'iso8859-4', - 'l5': 'windows-1254', - 'l6': 'iso8859-10', - 'l8': 'iso8859-14', - 'latin1': 'windows-1252', - 'latin10': 'iso8859-16', - 'latin2': 'iso8859-2', - 'latin3': 'iso8859-3', - 'latin4': 'iso8859-4', - 'latin5': 'windows-1254', - 'latin6': 'iso8859-10', - 'latin8': 'iso8859-14', - 'latin9': 'iso8859-15', - 'ms936': 'gbk', - 'mskanji': 'shift_jis', - 'pt154': 'ptcp154', - 'ptcp154': 'ptcp154', - 'r8': 'hp-roman8', - 'roman8': 'hp-roman8', - 'shiftjis': 'shift_jis', - 'tis620': 'cp874', - 'unicode11utf7': 'utf-7', - 'us': 'ascii', - 'usascii': 'ascii', - 'utf16': 'utf-16', - 'utf16be': 'utf-16-be', - 'utf16le': 'utf-16-le', - 'utf8': 'utf-8', - 'windows1250': 'cp1250', - 'windows1251': 'cp1251', - 'windows1252': 'cp1252', - 'windows1253': 'cp1253', - 'windows1254': 'cp1254', - 'windows1255': 'cp1255', - 'windows1256': 'cp1256', - 'windows1257': 'cp1257', - 'windows1258': 'cp1258', - 'windows936': 'gbk', - 'x-x-big5': 'big5'} - -tokenTypes = { - "Doctype": 0, - "Characters": 1, - "SpaceCharacters": 2, - "StartTag": 3, - "EndTag": 4, - "EmptyTag": 5, - "Comment": 6, - "ParseError": 7 -} - -tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], - tokenTypes["EmptyTag"]]) - - -prefixes = dict([(v, k) for k, v in namespaces.items()]) -prefixes["http://www.w3.org/1998/Math/MathML"] = "math" - - -class DataLossWarning(UserWarning): - pass - - -class ReparseException(Exception): - pass diff --git a/lib/html5lib/filters/__init__.py b/lib/html5lib/filters/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/html5lib/filters/_base.py b/lib/html5lib/filters/_base.py deleted file mode 100644 index c7dbaed0..00000000 --- a/lib/html5lib/filters/_base.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - - -class Filter(object): - def __init__(self, source): - self.source = source - - def __iter__(self): - return iter(self.source) - - def __getattr__(self, name): - return getattr(self.source, name) diff --git a/lib/html5lib/filters/alphabeticalattributes.py b/lib/html5lib/filters/alphabeticalattributes.py deleted file mode 100644 index fed6996c..00000000 --- a/lib/html5lib/filters/alphabeticalattributes.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import _base - -try: - from collections import OrderedDict -except ImportError: - from ordereddict import OrderedDict - - -class Filter(_base.Filter): - def __iter__(self): - for token in _base.Filter.__iter__(self): - if token["type"] in ("StartTag", "EmptyTag"): - attrs = OrderedDict() - for name, value in sorted(token["data"].items(), - key=lambda x: x[0]): - attrs[name] = value - token["data"] = attrs - yield token diff --git a/lib/html5lib/filters/inject_meta_charset.py b/lib/html5lib/filters/inject_meta_charset.py deleted file mode 100644 index ca33b70b..00000000 --- a/lib/html5lib/filters/inject_meta_charset.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import _base - - -class Filter(_base.Filter): - def __init__(self, source, encoding): - _base.Filter.__init__(self, source) - self.encoding = encoding - - def __iter__(self): - state = "pre_head" - meta_found = (self.encoding is None) - pending = [] - - for token in _base.Filter.__iter__(self): - type = token["type"] - if type == "StartTag": - if token["name"].lower() == "head": - state = "in_head" - - elif type == "EmptyTag": - if token["name"].lower() == "meta": - # replace charset with actual encoding - has_http_equiv_content_type = False - for (namespace, name), value in token["data"].items(): - if namespace is not None: - continue - elif name.lower() == 'charset': - token["data"][(namespace, name)] = self.encoding - meta_found = True - break - elif name == 'http-equiv' and value.lower() == 'content-type': - has_http_equiv_content_type = True - else: - if has_http_equiv_content_type and (None, "content") in token["data"]: - token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding - meta_found = True - - elif token["name"].lower() == "head" and not meta_found: - # insert meta into empty head - yield {"type": "StartTag", "name": "head", - "data": token["data"]} - yield {"type": "EmptyTag", "name": "meta", - "data": {(None, "charset"): self.encoding}} - yield {"type": "EndTag", "name": "head"} - meta_found = True - continue - - elif type == "EndTag": - if token["name"].lower() == "head" and pending: - # insert meta into head (if necessary) and flush pending queue - yield pending.pop(0) - if not meta_found: - yield {"type": "EmptyTag", "name": "meta", - "data": {(None, "charset"): self.encoding}} - while pending: - yield pending.pop(0) - meta_found = True - state = "post_head" - - if state == "in_head": - pending.append(token) - else: - yield token diff --git a/lib/html5lib/filters/lint.py b/lib/html5lib/filters/lint.py deleted file mode 100644 index 8884696d..00000000 --- a/lib/html5lib/filters/lint.py +++ /dev/null @@ -1,90 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import _base -from ..constants import cdataElements, rcdataElements, voidElements - -from ..constants import spaceCharacters -spaceCharacters = "".join(spaceCharacters) - - -class LintError(Exception): - pass - - -class Filter(_base.Filter): - def __iter__(self): - open_elements = [] - contentModelFlag = "PCDATA" - for token in _base.Filter.__iter__(self): - type = token["type"] - if type in ("StartTag", "EmptyTag"): - name = token["name"] - if contentModelFlag != "PCDATA": - raise LintError("StartTag not in PCDATA content model flag: %(tag)s" % {"tag": name}) - if not isinstance(name, str): - raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) - if not name: - raise LintError("Empty tag name") - if type == "StartTag" and name in voidElements: - raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name}) - elif type == "EmptyTag" and name not in voidElements: - raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]}) - if type == "StartTag": - open_elements.append(name) - for name, value in token["data"]: - if not isinstance(name, str): - raise LintError("Attribute name is not a string: %(name)r" % {"name": name}) - if not name: - raise LintError("Empty attribute name") - if not isinstance(value, str): - raise LintError("Attribute value is not a string: %(value)r" % {"value": value}) - if name in cdataElements: - contentModelFlag = "CDATA" - elif name in rcdataElements: - contentModelFlag = "RCDATA" - elif name == "plaintext": - contentModelFlag = "PLAINTEXT" - - elif type == "EndTag": - name = token["name"] - if not isinstance(name, str): - raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) - if not name: - raise LintError("Empty tag name") - if name in voidElements: - raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name}) - start_name = open_elements.pop() - if start_name != name: - raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name}) - contentModelFlag = "PCDATA" - - elif type == "Comment": - if contentModelFlag != "PCDATA": - raise LintError("Comment not in PCDATA content model flag") - - elif type in ("Characters", "SpaceCharacters"): - data = token["data"] - if not isinstance(data, str): - raise LintError("Attribute name is not a string: %(name)r" % {"name": data}) - if not data: - raise LintError("%(type)s token with empty data" % {"type": type}) - if type == "SpaceCharacters": - data = data.strip(spaceCharacters) - if data: - raise LintError("Non-space character(s) found in SpaceCharacters token: %(token)r" % {"token": data}) - - elif type == "Doctype": - name = token["name"] - if contentModelFlag != "PCDATA": - raise LintError("Doctype not in PCDATA content model flag: %(name)s" % {"name": name}) - if not isinstance(name, str): - raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) - # XXX: what to do with token["data"] ? - - elif type in ("ParseError", "SerializeError"): - pass - - else: - raise LintError("Unknown token type: %(type)s" % {"type": type}) - - yield token diff --git a/lib/html5lib/filters/optionaltags.py b/lib/html5lib/filters/optionaltags.py deleted file mode 100644 index fefe0b30..00000000 --- a/lib/html5lib/filters/optionaltags.py +++ /dev/null @@ -1,205 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import _base - - -class Filter(_base.Filter): - def slider(self): - previous1 = previous2 = None - for token in self.source: - if previous1 is not None: - yield previous2, previous1, token - previous2 = previous1 - previous1 = token - yield previous2, previous1, None - - def __iter__(self): - for previous, token, next in self.slider(): - type = token["type"] - if type == "StartTag": - if (token["data"] or - not self.is_optional_start(token["name"], previous, next)): - yield token - elif type == "EndTag": - if not self.is_optional_end(token["name"], next): - yield token - else: - yield token - - def is_optional_start(self, tagname, previous, next): - type = next and next["type"] or None - if tagname in 'html': - # An html element's start tag may be omitted if the first thing - # inside the html element is not a space character or a comment. - return type not in ("Comment", "SpaceCharacters") - elif tagname == 'head': - # A head element's start tag may be omitted if the first thing - # inside the head element is an element. - # XXX: we also omit the start tag if the head element is empty - if type in ("StartTag", "EmptyTag"): - return True - elif type == "EndTag": - return next["name"] == "head" - elif tagname == 'body': - # A body element's start tag may be omitted if the first thing - # inside the body element is not a space character or a comment, - # except if the first thing inside the body element is a script - # or style element and the node immediately preceding the body - # element is a head element whose end tag has been omitted. - if type in ("Comment", "SpaceCharacters"): - return False - elif type == "StartTag": - # XXX: we do not look at the preceding event, so we never omit - # the body element's start tag if it's followed by a script or - # a style element. - return next["name"] not in ('script', 'style') - else: - return True - elif tagname == 'colgroup': - # A colgroup element's start tag may be omitted if the first thing - # inside the colgroup element is a col element, and if the element - # is not immediately preceeded by another colgroup element whose - # end tag has been omitted. - if type in ("StartTag", "EmptyTag"): - # XXX: we do not look at the preceding event, so instead we never - # omit the colgroup element's end tag when it is immediately - # followed by another colgroup element. See is_optional_end. - return next["name"] == "col" - else: - return False - elif tagname == 'tbody': - # A tbody element's start tag may be omitted if the first thing - # inside the tbody element is a tr element, and if the element is - # not immediately preceeded by a tbody, thead, or tfoot element - # whose end tag has been omitted. - if type == "StartTag": - # omit the thead and tfoot elements' end tag when they are - # immediately followed by a tbody element. See is_optional_end. - if previous and previous['type'] == 'EndTag' and \ - previous['name'] in ('tbody', 'thead', 'tfoot'): - return False - return next["name"] == 'tr' - else: - return False - return False - - def is_optional_end(self, tagname, next): - type = next and next["type"] or None - if tagname in ('html', 'head', 'body'): - # An html element's end tag may be omitted if the html element - # is not immediately followed by a space character or a comment. - return type not in ("Comment", "SpaceCharacters") - elif tagname in ('li', 'optgroup', 'tr'): - # A li element's end tag may be omitted if the li element is - # immediately followed by another li element or if there is - # no more content in the parent element. - # An optgroup element's end tag may be omitted if the optgroup - # element is immediately followed by another optgroup element, - # or if there is no more content in the parent element. - # A tr element's end tag may be omitted if the tr element is - # immediately followed by another tr element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] == tagname - else: - return type == "EndTag" or type is None - elif tagname in ('dt', 'dd'): - # A dt element's end tag may be omitted if the dt element is - # immediately followed by another dt element or a dd element. - # A dd element's end tag may be omitted if the dd element is - # immediately followed by another dd element or a dt element, - # or if there is no more content in the parent element. - if type == "StartTag": - return next["name"] in ('dt', 'dd') - elif tagname == 'dd': - return type == "EndTag" or type is None - else: - return False - elif tagname == 'p': - # A p element's end tag may be omitted if the p element is - # immediately followed by an address, article, aside, - # blockquote, datagrid, dialog, dir, div, dl, fieldset, - # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, - # nav, ol, p, pre, section, table, or ul, element, or if - # there is no more content in the parent element. - if type in ("StartTag", "EmptyTag"): - return next["name"] in ('address', 'article', 'aside', - 'blockquote', 'datagrid', 'dialog', - 'dir', 'div', 'dl', 'fieldset', 'footer', - 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', - 'header', 'hr', 'menu', 'nav', 'ol', - 'p', 'pre', 'section', 'table', 'ul') - else: - return type == "EndTag" or type is None - elif tagname == 'option': - # An option element's end tag may be omitted if the option - # element is immediately followed by another option element, - # or if it is immediately followed by an optgroup - # element, or if there is no more content in the parent - # element. - if type == "StartTag": - return next["name"] in ('option', 'optgroup') - else: - return type == "EndTag" or type is None - elif tagname in ('rt', 'rp'): - # An rt element's end tag may be omitted if the rt element is - # immediately followed by an rt or rp element, or if there is - # no more content in the parent element. - # An rp element's end tag may be omitted if the rp element is - # immediately followed by an rt or rp element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] in ('rt', 'rp') - else: - return type == "EndTag" or type is None - elif tagname == 'colgroup': - # A colgroup element's end tag may be omitted if the colgroup - # element is not immediately followed by a space character or - # a comment. - if type in ("Comment", "SpaceCharacters"): - return False - elif type == "StartTag": - # XXX: we also look for an immediately following colgroup - # element. See is_optional_start. - return next["name"] != 'colgroup' - else: - return True - elif tagname in ('thead', 'tbody'): - # A thead element's end tag may be omitted if the thead element - # is immediately followed by a tbody or tfoot element. - # A tbody element's end tag may be omitted if the tbody element - # is immediately followed by a tbody or tfoot element, or if - # there is no more content in the parent element. - # A tfoot element's end tag may be omitted if the tfoot element - # is immediately followed by a tbody element, or if there is no - # more content in the parent element. - # XXX: we never omit the end tag when the following element is - # a tbody. See is_optional_start. - if type == "StartTag": - return next["name"] in ['tbody', 'tfoot'] - elif tagname == 'tbody': - return type == "EndTag" or type is None - else: - return False - elif tagname == 'tfoot': - # A tfoot element's end tag may be omitted if the tfoot element - # is immediately followed by a tbody element, or if there is no - # more content in the parent element. - # XXX: we never omit the end tag when the following element is - # a tbody. See is_optional_start. - if type == "StartTag": - return next["name"] == 'tbody' - else: - return type == "EndTag" or type is None - elif tagname in ('td', 'th'): - # A td element's end tag may be omitted if the td element is - # immediately followed by a td or th element, or if there is - # no more content in the parent element. - # A th element's end tag may be omitted if the th element is - # immediately followed by a td or th element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] in ('td', 'th') - else: - return type == "EndTag" or type is None - return False diff --git a/lib/html5lib/filters/sanitizer.py b/lib/html5lib/filters/sanitizer.py deleted file mode 100644 index b206b54e..00000000 --- a/lib/html5lib/filters/sanitizer.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import _base -from ..sanitizer import HTMLSanitizerMixin - - -class Filter(_base.Filter, HTMLSanitizerMixin): - def __iter__(self): - for token in _base.Filter.__iter__(self): - token = self.sanitize_token(token) - if token: - yield token diff --git a/lib/html5lib/filters/whitespace.py b/lib/html5lib/filters/whitespace.py deleted file mode 100644 index dfc60eeb..00000000 --- a/lib/html5lib/filters/whitespace.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import re - -from . import _base -from ..constants import rcdataElements, spaceCharacters -spaceCharacters = "".join(spaceCharacters) - -SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) - - -class Filter(_base.Filter): - - spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) - - def __iter__(self): - preserve = 0 - for token in _base.Filter.__iter__(self): - type = token["type"] - if type == "StartTag" \ - and (preserve or token["name"] in self.spacePreserveElements): - preserve += 1 - - elif type == "EndTag" and preserve: - preserve -= 1 - - elif not preserve and type == "SpaceCharacters" and token["data"]: - # Test on token["data"] above to not introduce spaces where there were not - token["data"] = " " - - elif not preserve and type == "Characters": - token["data"] = collapse_spaces(token["data"]) - - yield token - - -def collapse_spaces(text): - return SPACES_REGEX.sub(' ', text) diff --git a/lib/html5lib/html5parser.py b/lib/html5lib/html5parser.py deleted file mode 100644 index 12aa6a35..00000000 --- a/lib/html5lib/html5parser.py +++ /dev/null @@ -1,2724 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from six import with_metaclass - -import types - -from . import inputstream -from . import tokenizer - -from . import treebuilders -from .treebuilders._base import Marker - -from . import utils -from . import constants -from .constants import spaceCharacters, asciiUpper2Lower -from .constants import specialElements -from .constants import headingElements -from .constants import cdataElements, rcdataElements -from .constants import tokenTypes, ReparseException, namespaces -from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements -from .constants import adjustForeignAttributes as adjustForeignAttributesMap -from .constants import E - - -def parse(doc, treebuilder="etree", encoding=None, - namespaceHTMLElements=True): - """Parse a string or file-like object into a tree""" - tb = treebuilders.getTreeBuilder(treebuilder) - p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parse(doc, encoding=encoding) - - -def parseFragment(doc, container="div", treebuilder="etree", encoding=None, - namespaceHTMLElements=True): - tb = treebuilders.getTreeBuilder(treebuilder) - p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parseFragment(doc, container=container, encoding=encoding) - - -def method_decorator_metaclass(function): - class Decorated(type): - def __new__(meta, classname, bases, classDict): - for attributeName, attribute in classDict.items(): - if isinstance(attribute, types.FunctionType): - attribute = function(attribute) - - classDict[attributeName] = attribute - return type.__new__(meta, classname, bases, classDict) - return Decorated - - -class HTMLParser(object): - """HTML parser. Generates a tree structure from a stream of (possibly - malformed) HTML""" - - def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer, - strict=False, namespaceHTMLElements=True, debug=False): - """ - strict - raise an exception when a parse error is encountered - - tree - a treebuilder class controlling the type of tree that will be - returned. Built in treebuilders can be accessed through - html5lib.treebuilders.getTreeBuilder(treeType) - - tokenizer - a class that provides a stream of tokens to the treebuilder. - This may be replaced for e.g. a sanitizer which converts some tags to - text - """ - - # Raise an exception on the first error encountered - self.strict = strict - - if tree is None: - tree = treebuilders.getTreeBuilder("etree") - self.tree = tree(namespaceHTMLElements) - self.tokenizer_class = tokenizer - self.errors = [] - - self.phases = dict([(name, cls(self, self.tree)) for name, cls in - getPhases(debug).items()]) - - def _parse(self, stream, innerHTML=False, container="div", - encoding=None, parseMeta=True, useChardet=True, **kwargs): - - self.innerHTMLMode = innerHTML - self.container = container - self.tokenizer = self.tokenizer_class(stream, encoding=encoding, - parseMeta=parseMeta, - useChardet=useChardet, - parser=self, **kwargs) - self.reset() - - while True: - try: - self.mainLoop() - break - except ReparseException: - self.reset() - - def reset(self): - self.tree.reset() - self.firstStartTag = False - self.errors = [] - self.log = [] # only used with debug mode - # "quirks" / "limited quirks" / "no quirks" - self.compatMode = "no quirks" - - if self.innerHTMLMode: - self.innerHTML = self.container.lower() - - if self.innerHTML in cdataElements: - self.tokenizer.state = self.tokenizer.rcdataState - elif self.innerHTML in rcdataElements: - self.tokenizer.state = self.tokenizer.rawtextState - elif self.innerHTML == 'plaintext': - self.tokenizer.state = self.tokenizer.plaintextState - else: - # state already is data state - # self.tokenizer.state = self.tokenizer.dataState - pass - self.phase = self.phases["beforeHtml"] - self.phase.insertHtmlElement() - self.resetInsertionMode() - else: - self.innerHTML = False - self.phase = self.phases["initial"] - - self.lastPhase = None - - self.beforeRCDataPhase = None - - self.framesetOK = True - - @property - def documentEncoding(self): - """The name of the character encoding - that was used to decode the input stream, - or :obj:`None` if that is not determined yet. - - """ - if not hasattr(self, 'tokenizer'): - return None - return self.tokenizer.stream.charEncoding[0] - - def isHTMLIntegrationPoint(self, element): - if (element.name == "annotation-xml" and - element.namespace == namespaces["mathml"]): - return ("encoding" in element.attributes and - element.attributes["encoding"].translate( - asciiUpper2Lower) in - ("text/html", "application/xhtml+xml")) - else: - return (element.namespace, element.name) in htmlIntegrationPointElements - - def isMathMLTextIntegrationPoint(self, element): - return (element.namespace, element.name) in mathmlTextIntegrationPointElements - - def mainLoop(self): - CharactersToken = tokenTypes["Characters"] - SpaceCharactersToken = tokenTypes["SpaceCharacters"] - StartTagToken = tokenTypes["StartTag"] - EndTagToken = tokenTypes["EndTag"] - CommentToken = tokenTypes["Comment"] - DoctypeToken = tokenTypes["Doctype"] - ParseErrorToken = tokenTypes["ParseError"] - - for token in self.normalizedTokens(): - new_token = token - while new_token is not None: - currentNode = self.tree.openElements[-1] if self.tree.openElements else None - currentNodeNamespace = currentNode.namespace if currentNode else None - currentNodeName = currentNode.name if currentNode else None - - type = new_token["type"] - - if type == ParseErrorToken: - self.parseError(new_token["data"], new_token.get("datavars", {})) - new_token = None - else: - if (len(self.tree.openElements) == 0 or - currentNodeNamespace == self.tree.defaultNamespace or - (self.isMathMLTextIntegrationPoint(currentNode) and - ((type == StartTagToken and - token["name"] not in frozenset(["mglyph", "malignmark"])) or - type in (CharactersToken, SpaceCharactersToken))) or - (currentNodeNamespace == namespaces["mathml"] and - currentNodeName == "annotation-xml" and - token["name"] == "svg") or - (self.isHTMLIntegrationPoint(currentNode) and - type in (StartTagToken, CharactersToken, SpaceCharactersToken))): - phase = self.phase - else: - phase = self.phases["inForeignContent"] - - if type == CharactersToken: - new_token = phase.processCharacters(new_token) - elif type == SpaceCharactersToken: - new_token = phase.processSpaceCharacters(new_token) - elif type == StartTagToken: - new_token = phase.processStartTag(new_token) - elif type == EndTagToken: - new_token = phase.processEndTag(new_token) - elif type == CommentToken: - new_token = phase.processComment(new_token) - elif type == DoctypeToken: - new_token = phase.processDoctype(new_token) - - if (type == StartTagToken and token["selfClosing"] - and not token["selfClosingAcknowledged"]): - self.parseError("non-void-element-with-trailing-solidus", - {"name": token["name"]}) - - # When the loop finishes it's EOF - reprocess = True - phases = [] - while reprocess: - phases.append(self.phase) - reprocess = self.phase.processEOF() - if reprocess: - assert self.phase not in phases - - def normalizedTokens(self): - for token in self.tokenizer: - yield self.normalizeToken(token) - - def parse(self, stream, encoding=None, parseMeta=True, useChardet=True): - """Parse a HTML document into a well-formed tree - - stream - a filelike object or string containing the HTML to be parsed - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - """ - self._parse(stream, innerHTML=False, encoding=encoding, - parseMeta=parseMeta, useChardet=useChardet) - return self.tree.getDocument() - - def parseFragment(self, stream, container="div", encoding=None, - parseMeta=False, useChardet=True): - """Parse a HTML fragment into a well-formed tree fragment - - container - name of the element we're setting the innerHTML property - if set to None, default to 'div' - - stream - a filelike object or string containing the HTML to be parsed - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - """ - self._parse(stream, True, container=container, encoding=encoding) - return self.tree.getFragment() - - def parseError(self, errorcode="XXX-undefined-error", datavars={}): - # XXX The idea is to make errorcode mandatory. - self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) - if self.strict: - raise ParseError(E[errorcode] % datavars) - - def normalizeToken(self, token): - """ HTML5 specific normalizations to the token stream """ - - if token["type"] == tokenTypes["StartTag"]: - token["data"] = dict(token["data"][::-1]) - - return token - - def adjustMathMLAttributes(self, token): - replacements = {"definitionurl": "definitionURL"} - for k, v in replacements.items(): - if k in token["data"]: - token["data"][v] = token["data"][k] - del token["data"][k] - - def adjustSVGAttributes(self, token): - replacements = { - "attributename": "attributeName", - "attributetype": "attributeType", - "basefrequency": "baseFrequency", - "baseprofile": "baseProfile", - "calcmode": "calcMode", - "clippathunits": "clipPathUnits", - "contentscripttype": "contentScriptType", - "contentstyletype": "contentStyleType", - "diffuseconstant": "diffuseConstant", - "edgemode": "edgeMode", - "externalresourcesrequired": "externalResourcesRequired", - "filterres": "filterRes", - "filterunits": "filterUnits", - "glyphref": "glyphRef", - "gradienttransform": "gradientTransform", - "gradientunits": "gradientUnits", - "kernelmatrix": "kernelMatrix", - "kernelunitlength": "kernelUnitLength", - "keypoints": "keyPoints", - "keysplines": "keySplines", - "keytimes": "keyTimes", - "lengthadjust": "lengthAdjust", - "limitingconeangle": "limitingConeAngle", - "markerheight": "markerHeight", - "markerunits": "markerUnits", - "markerwidth": "markerWidth", - "maskcontentunits": "maskContentUnits", - "maskunits": "maskUnits", - "numoctaves": "numOctaves", - "pathlength": "pathLength", - "patterncontentunits": "patternContentUnits", - "patterntransform": "patternTransform", - "patternunits": "patternUnits", - "pointsatx": "pointsAtX", - "pointsaty": "pointsAtY", - "pointsatz": "pointsAtZ", - "preservealpha": "preserveAlpha", - "preserveaspectratio": "preserveAspectRatio", - "primitiveunits": "primitiveUnits", - "refx": "refX", - "refy": "refY", - "repeatcount": "repeatCount", - "repeatdur": "repeatDur", - "requiredextensions": "requiredExtensions", - "requiredfeatures": "requiredFeatures", - "specularconstant": "specularConstant", - "specularexponent": "specularExponent", - "spreadmethod": "spreadMethod", - "startoffset": "startOffset", - "stddeviation": "stdDeviation", - "stitchtiles": "stitchTiles", - "surfacescale": "surfaceScale", - "systemlanguage": "systemLanguage", - "tablevalues": "tableValues", - "targetx": "targetX", - "targety": "targetY", - "textlength": "textLength", - "viewbox": "viewBox", - "viewtarget": "viewTarget", - "xchannelselector": "xChannelSelector", - "ychannelselector": "yChannelSelector", - "zoomandpan": "zoomAndPan" - } - for originalName in list(token["data"].keys()): - if originalName in replacements: - svgName = replacements[originalName] - token["data"][svgName] = token["data"][originalName] - del token["data"][originalName] - - def adjustForeignAttributes(self, token): - replacements = adjustForeignAttributesMap - - for originalName in token["data"].keys(): - if originalName in replacements: - foreignName = replacements[originalName] - token["data"][foreignName] = token["data"][originalName] - del token["data"][originalName] - - def reparseTokenNormal(self, token): - self.parser.phase() - - def resetInsertionMode(self): - # The name of this method is mostly historical. (It's also used in the - # specification.) - last = False - newModes = { - "select": "inSelect", - "td": "inCell", - "th": "inCell", - "tr": "inRow", - "tbody": "inTableBody", - "thead": "inTableBody", - "tfoot": "inTableBody", - "caption": "inCaption", - "colgroup": "inColumnGroup", - "table": "inTable", - "head": "inBody", - "body": "inBody", - "frameset": "inFrameset", - "html": "beforeHead" - } - for node in self.tree.openElements[::-1]: - nodeName = node.name - new_phase = None - if node == self.tree.openElements[0]: - assert self.innerHTML - last = True - nodeName = self.innerHTML - # Check for conditions that should only happen in the innerHTML - # case - if nodeName in ("select", "colgroup", "head", "html"): - assert self.innerHTML - - if not last and node.namespace != self.tree.defaultNamespace: - continue - - if nodeName in newModes: - new_phase = self.phases[newModes[nodeName]] - break - elif last: - new_phase = self.phases["inBody"] - break - - self.phase = new_phase - - def parseRCDataRawtext(self, token, contentType): - """Generic RCDATA/RAWTEXT Parsing algorithm - contentType - RCDATA or RAWTEXT - """ - assert contentType in ("RAWTEXT", "RCDATA") - - self.tree.insertElement(token) - - if contentType == "RAWTEXT": - self.tokenizer.state = self.tokenizer.rawtextState - else: - self.tokenizer.state = self.tokenizer.rcdataState - - self.originalPhase = self.phase - - self.phase = self.phases["text"] - - -def getPhases(debug): - def log(function): - """Logger that records which phase processes each token""" - type_names = dict((value, key) for key, value in - constants.tokenTypes.items()) - - def wrapped(self, *args, **kwargs): - if function.__name__.startswith("process") and len(args) > 0: - token = args[0] - try: - info = {"type": type_names[token['type']]} - except: - raise - if token['type'] in constants.tagTokenTypes: - info["name"] = token['name'] - - self.parser.log.append((self.parser.tokenizer.state.__name__, - self.parser.phase.__class__.__name__, - self.__class__.__name__, - function.__name__, - info)) - return function(self, *args, **kwargs) - else: - return function(self, *args, **kwargs) - return wrapped - - def getMetaclass(use_metaclass, metaclass_func): - if use_metaclass: - return method_decorator_metaclass(metaclass_func) - else: - return type - - class Phase(with_metaclass(getMetaclass(debug, log))): - """Base class for helper object that implements each phase of processing - """ - - def __init__(self, parser, tree): - self.parser = parser - self.tree = tree - - def processEOF(self): - raise NotImplementedError - - def processComment(self, token): - # For most phases the following is correct. Where it's not it will be - # overridden. - self.tree.insertComment(token, self.tree.openElements[-1]) - - def processDoctype(self, token): - self.parser.parseError("unexpected-doctype") - - def processCharacters(self, token): - self.tree.insertText(token["data"]) - - def processSpaceCharacters(self, token): - self.tree.insertText(token["data"]) - - def processStartTag(self, token): - return self.startTagHandler[token["name"]](token) - - def startTagHtml(self, token): - if not self.parser.firstStartTag and token["name"] == "html": - self.parser.parseError("non-html-root") - # XXX Need a check here to see if the first start tag token emitted is - # this token... If it's not, invoke self.parser.parseError(). - for attr, value in token["data"].items(): - if attr not in self.tree.openElements[0].attributes: - self.tree.openElements[0].attributes[attr] = value - self.parser.firstStartTag = False - - def processEndTag(self, token): - return self.endTagHandler[token["name"]](token) - - class InitialPhase(Phase): - def processSpaceCharacters(self, token): - pass - - def processComment(self, token): - self.tree.insertComment(token, self.tree.document) - - def processDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - correct = token["correct"] - - if (name != "html" or publicId is not None or - systemId is not None and systemId != "about:legacy-compat"): - self.parser.parseError("unknown-doctype") - - if publicId is None: - publicId = "" - - self.tree.insertDoctype(token) - - if publicId != "": - publicId = publicId.translate(asciiUpper2Lower) - - if (not correct or token["name"] != "html" - or publicId.startswith( - ("+//silmaril//dtd html pro v0r11 19970101//", - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", - "-//as//dtd html 3.0 aswedit + extensions//", - "-//ietf//dtd html 2.0 level 1//", - "-//ietf//dtd html 2.0 level 2//", - "-//ietf//dtd html 2.0 strict level 1//", - "-//ietf//dtd html 2.0 strict level 2//", - "-//ietf//dtd html 2.0 strict//", - "-//ietf//dtd html 2.0//", - "-//ietf//dtd html 2.1e//", - "-//ietf//dtd html 3.0//", - "-//ietf//dtd html 3.2 final//", - "-//ietf//dtd html 3.2//", - "-//ietf//dtd html 3//", - "-//ietf//dtd html level 0//", - "-//ietf//dtd html level 1//", - "-//ietf//dtd html level 2//", - "-//ietf//dtd html level 3//", - "-//ietf//dtd html strict level 0//", - "-//ietf//dtd html strict level 1//", - "-//ietf//dtd html strict level 2//", - "-//ietf//dtd html strict level 3//", - "-//ietf//dtd html strict//", - "-//ietf//dtd html//", - "-//metrius//dtd metrius presentational//", - "-//microsoft//dtd internet explorer 2.0 html strict//", - "-//microsoft//dtd internet explorer 2.0 html//", - "-//microsoft//dtd internet explorer 2.0 tables//", - "-//microsoft//dtd internet explorer 3.0 html strict//", - "-//microsoft//dtd internet explorer 3.0 html//", - "-//microsoft//dtd internet explorer 3.0 tables//", - "-//netscape comm. corp.//dtd html//", - "-//netscape comm. corp.//dtd strict html//", - "-//o'reilly and associates//dtd html 2.0//", - "-//o'reilly and associates//dtd html extended 1.0//", - "-//o'reilly and associates//dtd html extended relaxed 1.0//", - "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", - "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", - "-//spyglass//dtd html 2.0 extended//", - "-//sq//dtd html 2.0 hotmetal + extensions//", - "-//sun microsystems corp.//dtd hotjava html//", - "-//sun microsystems corp.//dtd hotjava strict html//", - "-//w3c//dtd html 3 1995-03-24//", - "-//w3c//dtd html 3.2 draft//", - "-//w3c//dtd html 3.2 final//", - "-//w3c//dtd html 3.2//", - "-//w3c//dtd html 3.2s draft//", - "-//w3c//dtd html 4.0 frameset//", - "-//w3c//dtd html 4.0 transitional//", - "-//w3c//dtd html experimental 19960712//", - "-//w3c//dtd html experimental 970421//", - "-//w3c//dtd w3 html//", - "-//w3o//dtd w3 html 3.0//", - "-//webtechs//dtd mozilla html 2.0//", - "-//webtechs//dtd mozilla html//")) - or publicId in - ("-//w3o//dtd w3 html strict 3.0//en//", - "-/w3c/dtd html 4.0 transitional/en", - "html") - or publicId.startswith( - ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId is None - or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): - self.parser.compatMode = "quirks" - elif (publicId.startswith( - ("-//w3c//dtd xhtml 1.0 frameset//", - "-//w3c//dtd xhtml 1.0 transitional//")) - or publicId.startswith( - ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId is not None): - self.parser.compatMode = "limited quirks" - - self.parser.phase = self.parser.phases["beforeHtml"] - - def anythingElse(self): - self.parser.compatMode = "quirks" - self.parser.phase = self.parser.phases["beforeHtml"] - - def processCharacters(self, token): - self.parser.parseError("expected-doctype-but-got-chars") - self.anythingElse() - return token - - def processStartTag(self, token): - self.parser.parseError("expected-doctype-but-got-start-tag", - {"name": token["name"]}) - self.anythingElse() - return token - - def processEndTag(self, token): - self.parser.parseError("expected-doctype-but-got-end-tag", - {"name": token["name"]}) - self.anythingElse() - return token - - def processEOF(self): - self.parser.parseError("expected-doctype-but-got-eof") - self.anythingElse() - return True - - class BeforeHtmlPhase(Phase): - # helper methods - def insertHtmlElement(self): - self.tree.insertRoot(impliedTagToken("html", "StartTag")) - self.parser.phase = self.parser.phases["beforeHead"] - - # other - def processEOF(self): - self.insertHtmlElement() - return True - - def processComment(self, token): - self.tree.insertComment(token, self.tree.document) - - def processSpaceCharacters(self, token): - pass - - def processCharacters(self, token): - self.insertHtmlElement() - return token - - def processStartTag(self, token): - if token["name"] == "html": - self.parser.firstStartTag = True - self.insertHtmlElement() - return token - - def processEndTag(self, token): - if token["name"] not in ("head", "body", "html", "br"): - self.parser.parseError("unexpected-end-tag-before-html", - {"name": token["name"]}) - else: - self.insertHtmlElement() - return token - - class BeforeHeadPhase(Phase): - def __init__(self, parser, tree): - Phase.__init__(self, parser, tree) - - self.startTagHandler = utils.MethodDispatcher([ - ("html", self.startTagHtml), - ("head", self.startTagHead) - ]) - self.startTagHandler.default = self.startTagOther - - self.endTagHandler = utils.MethodDispatcher([ - (("head", "body", "html", "br"), self.endTagImplyHead) - ]) - self.endTagHandler.default = self.endTagOther - - def processEOF(self): - self.startTagHead(impliedTagToken("head", "StartTag")) - return True - - def processSpaceCharacters(self, token): - pass - - def processCharacters(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagHead(self, token): - self.tree.insertElement(token) - self.tree.headPointer = self.tree.openElements[-1] - self.parser.phase = self.parser.phases["inHead"] - - def startTagOther(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def endTagImplyHead(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def endTagOther(self, token): - self.parser.parseError("end-tag-after-implied-root", - {"name": token["name"]}) - - class InHeadPhase(Phase): - def __init__(self, parser, tree): - Phase.__init__(self, parser, tree) - - self.startTagHandler = utils.MethodDispatcher([ - ("html", self.startTagHtml), - ("title", self.startTagTitle), - (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle), - ("script", self.startTagScript), - (("base", "basefont", "bgsound", "command", "link"), - self.startTagBaseLinkCommand), - ("meta", self.startTagMeta), - ("head", self.startTagHead) - ]) - self.startTagHandler.default = self.startTagOther - - self. endTagHandler = utils.MethodDispatcher([ - ("head", self.endTagHead), - (("br", "html", "body"), self.endTagHtmlBodyBr) - ]) - self.endTagHandler.default = self.endTagOther - - # the real thing - def processEOF(self): - self.anythingElse() - return True - - def processCharacters(self, token): - self.anythingElse() - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagHead(self, token): - self.parser.parseError("two-heads-are-not-better-than-one") - - def startTagBaseLinkCommand(self, token): - self.tree.insertElement(token) - self.tree.openElements.pop() - token["selfClosingAcknowledged"] = True - - def startTagMeta(self, token): - self.tree.insertElement(token) - self.tree.openElements.pop() - token["selfClosingAcknowledged"] = True - - attributes = token["data"] - if self.parser.tokenizer.stream.charEncoding[1] == "tentative": - if "charset" in attributes: - self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) - elif ("content" in attributes and - "http-equiv" in attributes and - attributes["http-equiv"].lower() == "content-type"): - # Encoding it as UTF-8 here is a hack, as really we should pass - # the abstract Unicode string, and just use the - # ContentAttrParser on that, but using UTF-8 allows all chars - # to be encoded and as a ASCII-superset works. - data = inputstream.EncodingBytes(attributes["content"].encode("utf-8")) - parser = inputstream.ContentAttrParser(data) - codec = parser.parse() - self.parser.tokenizer.stream.changeEncoding(codec) - - def startTagTitle(self, token): - self.parser.parseRCDataRawtext(token, "RCDATA") - - def startTagNoScriptNoFramesStyle(self, token): - # Need to decide whether to implement the scripting-disabled case - self.parser.parseRCDataRawtext(token, "RAWTEXT") - - def startTagScript(self, token): - self.tree.insertElement(token) - self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState - self.parser.originalPhase = self.parser.phase - self.parser.phase = self.parser.phases["text"] - - def startTagOther(self, token): - self.anythingElse() - return token - - def endTagHead(self, token): - node = self.parser.tree.openElements.pop() - assert node.name == "head", "Expected head got %s" % node.name - self.parser.phase = self.parser.phases["afterHead"] - - def endTagHtmlBodyBr(self, token): - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - self.endTagHead(impliedTagToken("head")) - - # XXX If we implement a parser for which scripting is disabled we need to - # implement this phase. - # - # class InHeadNoScriptPhase(Phase): - class AfterHeadPhase(Phase): - def __init__(self, parser, tree): - Phase.__init__(self, parser, tree) - - self.startTagHandler = utils.MethodDispatcher([ - ("html", self.startTagHtml), - ("body", self.startTagBody), - ("frameset", self.startTagFrameset), - (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", - "style", "title"), - self.startTagFromHead), - ("head", self.startTagHead) - ]) - self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"), - self.endTagHtmlBodyBr)]) - self.endTagHandler.default = self.endTagOther - - def processEOF(self): - self.anythingElse() - return True - - def processCharacters(self, token): - self.anythingElse() - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagBody(self, token): - self.parser.framesetOK = False - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inBody"] - - def startTagFrameset(self, token): - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inFrameset"] - - def startTagFromHead(self, token): - self.parser.parseError("unexpected-start-tag-out-of-my-head", - {"name": token["name"]}) - self.tree.openElements.append(self.tree.headPointer) - self.parser.phases["inHead"].processStartTag(token) - for node in self.tree.openElements[::-1]: - if node.name == "head": - self.tree.openElements.remove(node) - break - - def startTagHead(self, token): - self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) - - def startTagOther(self, token): - self.anythingElse() - return token - - def endTagHtmlBodyBr(self, token): - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - self.tree.insertElement(impliedTagToken("body", "StartTag")) - self.parser.phase = self.parser.phases["inBody"] - self.parser.framesetOK = True - - class InBodyPhase(Phase): - # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody - # the really-really-really-very crazy mode - def __init__(self, parser, tree): - Phase.__init__(self, parser, tree) - - # Keep a ref to this for special handling of whitespace in
-            self.processSpaceCharactersNonPre = self.processSpaceCharacters
-
-            self.startTagHandler = utils.MethodDispatcher([
-                ("html", self.startTagHtml),
-                (("base", "basefont", "bgsound", "command", "link", "meta",
-                  "script", "style", "title"),
-                 self.startTagProcessInHead),
-                ("body", self.startTagBody),
-                ("frameset", self.startTagFrameset),
-                (("address", "article", "aside", "blockquote", "center", "details",
-                  "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
-                  "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
-                  "section", "summary", "ul"),
-                 self.startTagCloseP),
-                (headingElements, self.startTagHeading),
-                (("pre", "listing"), self.startTagPreListing),
-                ("form", self.startTagForm),
-                (("li", "dd", "dt"), self.startTagListItem),
-                ("plaintext", self.startTagPlaintext),
-                ("a", self.startTagA),
-                (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
-                  "strong", "tt", "u"), self.startTagFormatting),
-                ("nobr", self.startTagNobr),
-                ("button", self.startTagButton),
-                (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
-                ("xmp", self.startTagXmp),
-                ("table", self.startTagTable),
-                (("area", "br", "embed", "img", "keygen", "wbr"),
-                 self.startTagVoidFormatting),
-                (("param", "source", "track"), self.startTagParamSource),
-                ("input", self.startTagInput),
-                ("hr", self.startTagHr),
-                ("image", self.startTagImage),
-                ("isindex", self.startTagIsIndex),
-                ("textarea", self.startTagTextarea),
-                ("iframe", self.startTagIFrame),
-                (("noembed", "noframes", "noscript"), self.startTagRawtext),
-                ("select", self.startTagSelect),
-                (("rp", "rt"), self.startTagRpRt),
-                (("option", "optgroup"), self.startTagOpt),
-                (("math"), self.startTagMath),
-                (("svg"), self.startTagSvg),
-                (("caption", "col", "colgroup", "frame", "head",
-                  "tbody", "td", "tfoot", "th", "thead",
-                  "tr"), self.startTagMisplaced)
-            ])
-            self.startTagHandler.default = self.startTagOther
-
-            self.endTagHandler = utils.MethodDispatcher([
-                ("body", self.endTagBody),
-                ("html", self.endTagHtml),
-                (("address", "article", "aside", "blockquote", "button", "center",
-                  "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
-                  "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
-                  "section", "summary", "ul"), self.endTagBlock),
-                ("form", self.endTagForm),
-                ("p", self.endTagP),
-                (("dd", "dt", "li"), self.endTagListItem),
-                (headingElements, self.endTagHeading),
-                (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
-                  "strike", "strong", "tt", "u"), self.endTagFormatting),
-                (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
-                ("br", self.endTagBr),
-            ])
-            self.endTagHandler.default = self.endTagOther
-
-        def isMatchingFormattingElement(self, node1, node2):
-            if node1.name != node2.name or node1.namespace != node2.namespace:
-                return False
-            elif len(node1.attributes) != len(node2.attributes):
-                return False
-            else:
-                attributes1 = sorted(node1.attributes.items())
-                attributes2 = sorted(node2.attributes.items())
-                for attr1, attr2 in zip(attributes1, attributes2):
-                    if attr1 != attr2:
-                        return False
-            return True
-
-        # helper
-        def addFormattingElement(self, token):
-            self.tree.insertElement(token)
-            element = self.tree.openElements[-1]
-
-            matchingElements = []
-            for node in self.tree.activeFormattingElements[::-1]:
-                if node is Marker:
-                    break
-                elif self.isMatchingFormattingElement(node, element):
-                    matchingElements.append(node)
-
-            assert len(matchingElements) <= 3
-            if len(matchingElements) == 3:
-                self.tree.activeFormattingElements.remove(matchingElements[-1])
-            self.tree.activeFormattingElements.append(element)
-
-        # the real deal
-        def processEOF(self):
-            allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
-                                          "tfoot", "th", "thead", "tr", "body",
-                                          "html"))
-            for node in self.tree.openElements[::-1]:
-                if node.name not in allowed_elements:
-                    self.parser.parseError("expected-closing-tag-but-got-eof")
-                    break
-            # Stop parsing
-
-        def processSpaceCharactersDropNewline(self, token):
-            # Sometimes (start of 
, , and ")
+
+    def test_nested_inline_elements(self):
+        """Inline elements can be nested indefinitely."""
+        b_tag = "Inside a B tag"
+        self.assertSoupEquals(b_tag)
+
+        nested_b_tag = "

A nested tag

" + self.assertSoupEquals(nested_b_tag) + + double_nested_b_tag = "

A doubly nested tag

" + self.assertSoupEquals(nested_b_tag) + + def test_nested_block_level_elements(self): + """Block elements can be nested.""" + soup = self.soup('

Foo

') + blockquote = soup.blockquote + self.assertEqual(blockquote.p.b.string, 'Foo') + self.assertEqual(blockquote.b.string, 'Foo') + + def test_correctly_nested_tables(self): + """One table can go inside another one.""" + markup = ('' + '' + "') + + self.assertSoupEquals( + markup, + '
Here's another table:" + '' + '' + '
foo
Here\'s another table:' + '
foo
' + '
') + + self.assertSoupEquals( + "" + "" + "
Foo
Bar
Baz
") + + def test_deeply_nested_multivalued_attribute(self): + # html5lib can set the attributes of the same tag many times + # as it rearranges the tree. This has caused problems with + # multivalued attributes. + markup = '
' + soup = self.soup(markup) + self.assertEqual(["css"], soup.div.div['class']) + + def test_multivalued_attribute_on_html(self): + # html5lib uses a different API to set the attributes ot the + # tag. This has caused problems with multivalued + # attributes. + markup = '' + soup = self.soup(markup) + self.assertEqual(["a", "b"], soup.html['class']) + + def test_angle_brackets_in_attribute_values_are_escaped(self): + self.assertSoupEquals('', '') + + def test_entities_in_attributes_converted_to_unicode(self): + expect = u'

' + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + + def test_entities_in_text_converted_to_unicode(self): + expect = u'

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + + def test_quot_entity_converted_to_quotation_mark(self): + self.assertSoupEquals("

I said "good day!"

", + '

I said "good day!"

') + + def test_out_of_range_entity(self): + expect = u"\N{REPLACEMENT CHARACTER}" + self.assertSoupEquals("�", expect) + self.assertSoupEquals("�", expect) + self.assertSoupEquals("�", expect) + + def test_multipart_strings(self): + "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." + soup = self.soup("

\nfoo

") + self.assertEqual("p", soup.h2.string.next_element.name) + self.assertEqual("p", soup.p.name) + self.assertConnectedness(soup) + + def test_head_tag_between_head_and_body(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """ + + foo + +""" + soup = self.soup(content) + self.assertNotEqual(None, soup.html.body) + self.assertConnectedness(soup) + + def test_multiple_copies_of_a_tag(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """ + + + + + +""" + soup = self.soup(content) + self.assertConnectedness(soup.article) + + def test_basic_namespaces(self): + """Parsers don't need to *understand* namespaces, but at the + very least they should not choke on namespaces or lose + data.""" + + markup = b'4' + soup = self.soup(markup) + self.assertEqual(markup, soup.encode()) + html = soup.html + self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns']) + self.assertEqual( + 'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml']) + self.assertEqual( + 'http://www.w3.org/2000/svg', soup.html['xmlns:svg']) + + def test_multivalued_attribute_value_becomes_list(self): + markup = b'' + soup = self.soup(markup) + self.assertEqual(['foo', 'bar'], soup.a['class']) + + # + # Generally speaking, tests below this point are more tests of + # Beautiful Soup than tests of the tree builders. But parsers are + # weird, so we run these tests separately for every tree builder + # to detect any differences between them. + # + + def test_can_parse_unicode_document(self): + # A seemingly innocuous document... but it's in Unicode! And + # it contains characters that can't be represented in the + # encoding found in the declaration! The horror! + markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string) + + def test_soupstrainer(self): + """Parsers should be able to work with SoupStrainers.""" + strainer = SoupStrainer("b") + soup = self.soup("A bold statement", + parse_only=strainer) + self.assertEqual(soup.decode(), "bold") + + def test_single_quote_attribute_values_become_double_quotes(self): + self.assertSoupEquals("", + '') + + def test_attribute_values_with_nested_quotes_are_left_alone(self): + text = """a""" + self.assertSoupEquals(text) + + def test_attribute_values_with_double_nested_quotes_get_quoted(self): + text = """a""" + soup = self.soup(text) + soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' + self.assertSoupEquals( + soup.foo.decode(), + """a""") + + def test_ampersand_in_attribute_value_gets_escaped(self): + self.assertSoupEquals('', + '') + + self.assertSoupEquals( + 'foo', + 'foo') + + def test_escaped_ampersand_in_attribute_value_is_left_alone(self): + self.assertSoupEquals('') + + def test_entities_in_strings_converted_during_parsing(self): + # Both XML and HTML entities are converted to Unicode characters + # during parsing. + text = "

<<sacré bleu!>>

" + expected = u"

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" + self.assertSoupEquals(text, expected) + + def test_smart_quotes_converted_on_the_way_in(self): + # Microsoft smart quotes are converted to Unicode characters during + # parsing. + quote = b"

\x91Foo\x92

" + soup = self.soup(quote) + self.assertEqual( + soup.p.string, + u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") + + def test_non_breaking_spaces_converted_on_the_way_in(self): + soup = self.soup("  ") + self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) + + def test_entities_converted_on_the_way_out(self): + text = "

<<sacré bleu!>>

" + expected = u"

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") + soup = self.soup(text) + self.assertEqual(soup.p.encode("utf-8"), expected) + + def test_real_iso_latin_document(self): + # Smoke test of interrelated functionality, using an + # easy-to-understand document. + + # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. + unicode_html = u'

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' + + # That's because we're going to encode it into ISO-Latin-1, and use + # that to test. + iso_latin_html = unicode_html.encode("iso-8859-1") + + # Parse the ISO-Latin-1 HTML. + soup = self.soup(iso_latin_html) + # Encode it to UTF-8. + result = soup.encode("utf-8") + + # What do we expect the result to look like? Well, it would + # look like unicode_html, except that the META tag would say + # UTF-8 instead of ISO-Latin-1. + expected = unicode_html.replace("ISO-Latin-1", "utf-8") + + # And, of course, it would be in UTF-8, not Unicode. + expected = expected.encode("utf-8") + + # Ta-da! + self.assertEqual(result, expected) + + def test_real_shift_jis_document(self): + # Smoke test to make sure the parser can handle a document in + # Shift-JIS encoding, without choking. + shift_jis_html = ( + b'
'
+            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
+            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
+            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
+            b'
') + unicode_html = shift_jis_html.decode("shift-jis") + soup = self.soup(unicode_html) + + # Make sure the parse tree is correctly encoded to various + # encodings. + self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8")) + self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) + + def test_real_hebrew_document(self): + # A real-world test to make sure we can convert ISO-8859-9 (a + # Hebrew encoding) to UTF-8. + hebrew_document = b'Hebrew (ISO 8859-8) in Visual Directionality

Hebrew (ISO 8859-8) in Visual Directionality

\xed\xe5\xec\xf9' + soup = self.soup( + hebrew_document, from_encoding="iso8859-8") + self.assertEqual(soup.original_encoding, 'iso8859-8') + self.assertEqual( + soup.encode('utf-8'), + hebrew_document.decode("iso8859-8").encode("utf-8")) + + def test_meta_tag_reflects_current_encoding(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '\n%s\n' + '' + 'Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is seemingly unaffected. + parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) + content = parsed_meta['content'] + self.assertEqual('text/html; charset=x-sjis', content) + + # But that value is actually a ContentMetaAttributeValue object. + self.assertTrue(isinstance(content, ContentMetaAttributeValue)) + + # And it will take on a value that reflects its current + # encoding. + self.assertEqual('text/html; charset=utf8', content.encode("utf8")) + + # For the rest of the story, see TestSubstitutions in + # test_tree.py. + + def test_html5_style_meta_tag_reflects_current_encoding(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '\n%s\n' + '' + 'Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is seemingly unaffected. + parsed_meta = soup.find('meta', id="encoding") + charset = parsed_meta['charset'] + self.assertEqual('x-sjis', charset) + + # But that value is actually a CharsetMetaAttributeValue object. + self.assertTrue(isinstance(charset, CharsetMetaAttributeValue)) + + # And it will take on a value that reflects its current + # encoding. + self.assertEqual('utf8', charset.encode("utf8")) + + def test_tag_with_no_attributes_can_have_attributes_added(self): + data = self.soup("text") + data.a['foo'] = 'bar' + self.assertEqual('text', data.a.decode()) + +class XMLTreeBuilderSmokeTest(object): + + def test_pickle_and_unpickle_identity(self): + # Pickling a tree, then unpickling it, yields a tree identical + # to the original. + tree = self.soup("foo") + dumped = pickle.dumps(tree, 2) + loaded = pickle.loads(dumped) + self.assertEqual(loaded.__class__, BeautifulSoup) + self.assertEqual(loaded.decode(), tree.decode()) + + def test_docstring_generated(self): + soup = self.soup("") + self.assertEqual( + soup.encode(), b'\n') + + def test_real_xhtml_document(self): + """A real XHTML document should come out *exactly* the same as it went in.""" + markup = b""" + + +Hello. +Goodbye. +""" + soup = self.soup(markup) + self.assertEqual( + soup.encode("utf-8"), markup) + + def test_formatter_processes_script_tag_for_xml_documents(self): + doc = """ + +""" + soup = BeautifulSoup(doc, "lxml-xml") + # lxml would have stripped this while parsing, but we can add + # it later. + soup.script.string = 'console.log("< < hey > > ");' + encoded = soup.encode() + self.assertTrue(b"< < hey > >" in encoded) + + def test_can_parse_unicode_document(self): + markup = u'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string) + + def test_popping_namespaced_tag(self): + markup = 'b2012-07-02T20:33:42Zcd' + soup = self.soup(markup) + self.assertEqual( + unicode(soup.rss), markup) + + def test_docstring_includes_correct_encoding(self): + soup = self.soup("") + self.assertEqual( + soup.encode("latin1"), + b'\n') + + def test_large_xml_document(self): + """A large XML document should come out the same as it went in.""" + markup = (b'\n' + + b'0' * (2**12) + + b'') + soup = self.soup(markup) + self.assertEqual(soup.encode("utf-8"), markup) + + + def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): + self.assertSoupEquals("

", "

") + self.assertSoupEquals("

foo

") + + def test_namespaces_are_preserved(self): + markup = 'This tag is in the a namespaceThis tag is in the b namespace' + soup = self.soup(markup) + root = soup.root + self.assertEqual("http://example.com/", root['xmlns:a']) + self.assertEqual("http://example.net/", root['xmlns:b']) + + def test_closing_namespaced_tag(self): + markup = '

20010504

' + soup = self.soup(markup) + self.assertEqual(unicode(soup.p), markup) + + def test_namespaced_attributes(self): + markup = '' + soup = self.soup(markup) + self.assertEqual(unicode(soup.foo), markup) + + def test_namespaced_attributes_xml_namespace(self): + markup = 'bar' + soup = self.soup(markup) + self.assertEqual(unicode(soup.foo), markup) + +class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): + """Smoke test for a tree builder that supports HTML5.""" + + def test_real_xhtml_document(self): + # Since XHTML is not HTML5, HTML5 parsers are not tested to handle + # XHTML documents in any particular way. + pass + + def test_html_tags_have_namespace(self): + markup = "" + soup = self.soup(markup) + self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace) + + def test_svg_tags_have_namespace(self): + markup = '' + soup = self.soup(markup) + namespace = "http://www.w3.org/2000/svg" + self.assertEqual(namespace, soup.svg.namespace) + self.assertEqual(namespace, soup.circle.namespace) + + + def test_mathml_tags_have_namespace(self): + markup = '5' + soup = self.soup(markup) + namespace = 'http://www.w3.org/1998/Math/MathML' + self.assertEqual(namespace, soup.math.namespace) + self.assertEqual(namespace, soup.msqrt.namespace) + + def test_xml_declaration_becomes_comment(self): + markup = '' + soup = self.soup(markup) + self.assertTrue(isinstance(soup.contents[0], Comment)) + self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?') + self.assertEqual("html", soup.contents[0].next_element.name) + +def skipIf(condition, reason): + def nothing(test, *args, **kwargs): + return None + + def decorator(test_item): + if condition: + return nothing + else: + return test_item + + return decorator diff --git a/headphones/certgen.py b/lib/certgen.py similarity index 98% rename from headphones/certgen.py rename to lib/certgen.py index 079424b8..1b941161 100644 --- a/headphones/certgen.py +++ b/lib/certgen.py @@ -8,9 +8,8 @@ Certificate generation module. """ -import time - from OpenSSL import crypto +import time TYPE_RSA = crypto.TYPE_RSA TYPE_DSA = crypto.TYPE_DSA @@ -30,7 +29,6 @@ def createKeyPair(type, bits): pkey.generate_key(type, bits) return pkey - def createCertRequest(pkey, digest="md5", **name): """ Create a certificate request. @@ -51,14 +49,13 @@ def createCertRequest(pkey, digest="md5", **name): req = crypto.X509Req() subj = req.get_subject() - for (key, value) in name.items(): + for (key,value) in name.items(): setattr(subj, key, value) req.set_pubkey(pkey) req.sign(pkey, digest) return req - def createCertificate(req, (issuerCert, issuerKey), serial, (notBefore, notAfter), digest="md5"): """ Generate a certificate given a certificate request. diff --git a/lib/cherrypy/LICENSE.txt b/lib/cherrypy/LICENSE.txt new file mode 100644 index 00000000..3816f933 --- /dev/null +++ b/lib/cherrypy/LICENSE.txt @@ -0,0 +1,25 @@ +Copyright (c) 2004-2011, CherryPy Team (team@cherrypy.org) +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the CherryPy Team nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/lib/cherrypy/__init__.py b/lib/cherrypy/__init__.py new file mode 100644 index 00000000..7870ef0d --- /dev/null +++ b/lib/cherrypy/__init__.py @@ -0,0 +1,652 @@ +"""CherryPy is a pythonic, object-oriented HTTP framework. + + +CherryPy consists of not one, but four separate API layers. + +The APPLICATION LAYER is the simplest. CherryPy applications are written as +a tree of classes and methods, where each branch in the tree corresponds to +a branch in the URL path. Each method is a 'page handler', which receives +GET and POST params as keyword arguments, and returns or yields the (HTML) +body of the response. The special method name 'index' is used for paths +that end in a slash, and the special method name 'default' is used to +handle multiple paths via a single handler. This layer also includes: + + * the 'exposed' attribute (and cherrypy.expose) + * cherrypy.quickstart() + * _cp_config attributes + * cherrypy.tools (including cherrypy.session) + * cherrypy.url() + +The ENVIRONMENT LAYER is used by developers at all levels. It provides +information about the current request and response, plus the application +and server environment, via a (default) set of top-level objects: + + * cherrypy.request + * cherrypy.response + * cherrypy.engine + * cherrypy.server + * cherrypy.tree + * cherrypy.config + * cherrypy.thread_data + * cherrypy.log + * cherrypy.HTTPError, NotFound, and HTTPRedirect + * cherrypy.lib + +The EXTENSION LAYER allows advanced users to construct and share their own +plugins. It consists of: + + * Hook API + * Tool API + * Toolbox API + * Dispatch API + * Config Namespace API + +Finally, there is the CORE LAYER, which uses the core API's to construct +the default components which are available at higher layers. You can think +of the default components as the 'reference implementation' for CherryPy. +Megaframeworks (and advanced users) may replace the default components +with customized or extended components. The core API's are: + + * Application API + * Engine API + * Request API + * Server API + * WSGI API + +These API's are described in the `CherryPy specification `_. +""" + +__version__ = "3.6.0" + +from cherrypy._cpcompat import urljoin as _urljoin, urlencode as _urlencode +from cherrypy._cpcompat import basestring, unicodestr, set + +from cherrypy._cperror import HTTPError, HTTPRedirect, InternalRedirect +from cherrypy._cperror import NotFound, CherryPyException, TimeoutError + +from cherrypy import _cpdispatch as dispatch + +from cherrypy import _cptools +tools = _cptools.default_toolbox +Tool = _cptools.Tool + +from cherrypy import _cprequest +from cherrypy.lib import httputil as _httputil + +from cherrypy import _cptree +tree = _cptree.Tree() +from cherrypy._cptree import Application +from cherrypy import _cpwsgi as wsgi + +from cherrypy import process +try: + from cherrypy.process import win32 + engine = win32.Win32Bus() + engine.console_control_handler = win32.ConsoleCtrlHandler(engine) + del win32 +except ImportError: + engine = process.bus + + +# Timeout monitor. We add two channels to the engine +# to which cherrypy.Application will publish. +engine.listeners['before_request'] = set() +engine.listeners['after_request'] = set() + + +class _TimeoutMonitor(process.plugins.Monitor): + + def __init__(self, bus): + self.servings = [] + process.plugins.Monitor.__init__(self, bus, self.run) + + def before_request(self): + self.servings.append((serving.request, serving.response)) + + def after_request(self): + try: + self.servings.remove((serving.request, serving.response)) + except ValueError: + pass + + def run(self): + """Check timeout on all responses. (Internal)""" + for req, resp in self.servings: + resp.check_timeout() +engine.timeout_monitor = _TimeoutMonitor(engine) +engine.timeout_monitor.subscribe() + +engine.autoreload = process.plugins.Autoreloader(engine) +engine.autoreload.subscribe() + +engine.thread_manager = process.plugins.ThreadManager(engine) +engine.thread_manager.subscribe() + +engine.signal_handler = process.plugins.SignalHandler(engine) + + +class _HandleSignalsPlugin(object): + + """Handle signals from other processes based on the configured + platform handlers above.""" + + def __init__(self, bus): + self.bus = bus + + def subscribe(self): + """Add the handlers based on the platform""" + if hasattr(self.bus, "signal_handler"): + self.bus.signal_handler.subscribe() + if hasattr(self.bus, "console_control_handler"): + self.bus.console_control_handler.subscribe() + +engine.signals = _HandleSignalsPlugin(engine) + + +from cherrypy import _cpserver +server = _cpserver.Server() +server.subscribe() + + +def quickstart(root=None, script_name="", config=None): + """Mount the given root, start the builtin server (and engine), then block. + + root: an instance of a "controller class" (a collection of page handler + methods) which represents the root of the application. + script_name: a string containing the "mount point" of the application. + This should start with a slash, and be the path portion of the URL + at which to mount the given root. For example, if root.index() will + handle requests to "http://www.example.com:8080/dept/app1/", then + the script_name argument would be "/dept/app1". + + It MUST NOT end in a slash. If the script_name refers to the root + of the URI, it MUST be an empty string (not "/"). + config: a file or dict containing application config. If this contains + a [global] section, those entries will be used in the global + (site-wide) config. + """ + if config: + _global_conf_alias.update(config) + + tree.mount(root, script_name, config) + + engine.signals.subscribe() + engine.start() + engine.block() + + +from cherrypy._cpcompat import threadlocal as _local + + +class _Serving(_local): + + """An interface for registering request and response objects. + + Rather than have a separate "thread local" object for the request and + the response, this class works as a single threadlocal container for + both objects (and any others which developers wish to define). In this + way, we can easily dump those objects when we stop/start a new HTTP + conversation, yet still refer to them as module-level globals in a + thread-safe way. + """ + + request = _cprequest.Request(_httputil.Host("127.0.0.1", 80), + _httputil.Host("127.0.0.1", 1111)) + """ + The request object for the current thread. In the main thread, + and any threads which are not receiving HTTP requests, this is None.""" + + response = _cprequest.Response() + """ + The response object for the current thread. In the main thread, + and any threads which are not receiving HTTP requests, this is None.""" + + def load(self, request, response): + self.request = request + self.response = response + + def clear(self): + """Remove all attributes of self.""" + self.__dict__.clear() + +serving = _Serving() + + +class _ThreadLocalProxy(object): + + __slots__ = ['__attrname__', '__dict__'] + + def __init__(self, attrname): + self.__attrname__ = attrname + + def __getattr__(self, name): + child = getattr(serving, self.__attrname__) + return getattr(child, name) + + def __setattr__(self, name, value): + if name in ("__attrname__", ): + object.__setattr__(self, name, value) + else: + child = getattr(serving, self.__attrname__) + setattr(child, name, value) + + def __delattr__(self, name): + child = getattr(serving, self.__attrname__) + delattr(child, name) + + def _get_dict(self): + child = getattr(serving, self.__attrname__) + d = child.__class__.__dict__.copy() + d.update(child.__dict__) + return d + __dict__ = property(_get_dict) + + def __getitem__(self, key): + child = getattr(serving, self.__attrname__) + return child[key] + + def __setitem__(self, key, value): + child = getattr(serving, self.__attrname__) + child[key] = value + + def __delitem__(self, key): + child = getattr(serving, self.__attrname__) + del child[key] + + def __contains__(self, key): + child = getattr(serving, self.__attrname__) + return key in child + + def __len__(self): + child = getattr(serving, self.__attrname__) + return len(child) + + def __nonzero__(self): + child = getattr(serving, self.__attrname__) + return bool(child) + # Python 3 + __bool__ = __nonzero__ + +# Create request and response object (the same objects will be used +# throughout the entire life of the webserver, but will redirect +# to the "serving" object) +request = _ThreadLocalProxy('request') +response = _ThreadLocalProxy('response') + +# Create thread_data object as a thread-specific all-purpose storage + + +class _ThreadData(_local): + + """A container for thread-specific data.""" +thread_data = _ThreadData() + + +# Monkeypatch pydoc to allow help() to go through the threadlocal proxy. +# Jan 2007: no Googleable examples of anyone else replacing pydoc.resolve. +# The only other way would be to change what is returned from type(request) +# and that's not possible in pure Python (you'd have to fake ob_type). +def _cherrypy_pydoc_resolve(thing, forceload=0): + """Given an object or a path to an object, get the object and its name.""" + if isinstance(thing, _ThreadLocalProxy): + thing = getattr(serving, thing.__attrname__) + return _pydoc._builtin_resolve(thing, forceload) + +try: + import pydoc as _pydoc + _pydoc._builtin_resolve = _pydoc.resolve + _pydoc.resolve = _cherrypy_pydoc_resolve +except ImportError: + pass + + +from cherrypy import _cplogging + + +class _GlobalLogManager(_cplogging.LogManager): + + """A site-wide LogManager; routes to app.log or global log as appropriate. + + This :class:`LogManager` implements + cherrypy.log() and cherrypy.log.access(). If either + function is called during a request, the message will be sent to the + logger for the current Application. If they are called outside of a + request, the message will be sent to the site-wide logger. + """ + + def __call__(self, *args, **kwargs): + """Log the given message to the app.log or global log as appropriate. + """ + # Do NOT use try/except here. See + # https://bitbucket.org/cherrypy/cherrypy/issue/945 + if hasattr(request, 'app') and hasattr(request.app, 'log'): + log = request.app.log + else: + log = self + return log.error(*args, **kwargs) + + def access(self): + """Log an access message to the app.log or global log as appropriate. + """ + try: + return request.app.log.access() + except AttributeError: + return _cplogging.LogManager.access(self) + + +log = _GlobalLogManager() +# Set a default screen handler on the global log. +log.screen = True +log.error_file = '' +# Using an access file makes CP about 10% slower. Leave off by default. +log.access_file = '' + + +def _buslog(msg, level): + log.error(msg, 'ENGINE', severity=level) +engine.subscribe('log', _buslog) + +# Helper functions for CP apps # + + +def expose(func=None, alias=None): + """Expose the function, optionally providing an alias or set of aliases.""" + def expose_(func): + func.exposed = True + if alias is not None: + if isinstance(alias, basestring): + parents[alias.replace(".", "_")] = func + else: + for a in alias: + parents[a.replace(".", "_")] = func + return func + + import sys + import types + if isinstance(func, (types.FunctionType, types.MethodType)): + if alias is None: + # @expose + func.exposed = True + return func + else: + # func = expose(func, alias) + parents = sys._getframe(1).f_locals + return expose_(func) + elif func is None: + if alias is None: + # @expose() + parents = sys._getframe(1).f_locals + return expose_ + else: + # @expose(alias="alias") or + # @expose(alias=["alias1", "alias2"]) + parents = sys._getframe(1).f_locals + return expose_ + else: + # @expose("alias") or + # @expose(["alias1", "alias2"]) + parents = sys._getframe(1).f_locals + alias = func + return expose_ + + +def popargs(*args, **kwargs): + """A decorator for _cp_dispatch + (cherrypy.dispatch.Dispatcher.dispatch_method_name). + + Optional keyword argument: handler=(Object or Function) + + Provides a _cp_dispatch function that pops off path segments into + cherrypy.request.params under the names specified. The dispatch + is then forwarded on to the next vpath element. + + Note that any existing (and exposed) member function of the class that + popargs is applied to will override that value of the argument. For + instance, if you have a method named "list" on the class decorated with + popargs, then accessing "/list" will call that function instead of popping + it off as the requested parameter. This restriction applies to all + _cp_dispatch functions. The only way around this restriction is to create + a "blank class" whose only function is to provide _cp_dispatch. + + If there are path elements after the arguments, or more arguments + are requested than are available in the vpath, then the 'handler' + keyword argument specifies the next object to handle the parameterized + request. If handler is not specified or is None, then self is used. + If handler is a function rather than an instance, then that function + will be called with the args specified and the return value from that + function used as the next object INSTEAD of adding the parameters to + cherrypy.request.args. + + This decorator may be used in one of two ways: + + As a class decorator: + @cherrypy.popargs('year', 'month', 'day') + class Blog: + def index(self, year=None, month=None, day=None): + #Process the parameters here; any url like + #/, /2009, /2009/12, or /2009/12/31 + #will fill in the appropriate parameters. + + def create(self): + #This link will still be available at /create. Defined functions + #take precedence over arguments. + + Or as a member of a class: + class Blog: + _cp_dispatch = cherrypy.popargs('year', 'month', 'day') + #... + + The handler argument may be used to mix arguments with built in functions. + For instance, the following setup allows different activities at the + day, month, and year level: + + class DayHandler: + def index(self, year, month, day): + #Do something with this day; probably list entries + + def delete(self, year, month, day): + #Delete all entries for this day + + @cherrypy.popargs('day', handler=DayHandler()) + class MonthHandler: + def index(self, year, month): + #Do something with this month; probably list entries + + def delete(self, year, month): + #Delete all entries for this month + + @cherrypy.popargs('month', handler=MonthHandler()) + class YearHandler: + def index(self, year): + #Do something with this year + + #... + + @cherrypy.popargs('year', handler=YearHandler()) + class Root: + def index(self): + #... + + """ + + # Since keyword arg comes after *args, we have to process it ourselves + # for lower versions of python. + + handler = None + handler_call = False + for k, v in kwargs.items(): + if k == 'handler': + handler = v + else: + raise TypeError( + "cherrypy.popargs() got an unexpected keyword argument '{0}'" + .format(k) + ) + + import inspect + + if handler is not None \ + and (hasattr(handler, '__call__') or inspect.isclass(handler)): + handler_call = True + + def decorated(cls_or_self=None, vpath=None): + if inspect.isclass(cls_or_self): + # cherrypy.popargs is a class decorator + cls = cls_or_self + setattr(cls, dispatch.Dispatcher.dispatch_method_name, decorated) + return cls + + # We're in the actual function + self = cls_or_self + parms = {} + for arg in args: + if not vpath: + break + parms[arg] = vpath.pop(0) + + if handler is not None: + if handler_call: + return handler(**parms) + else: + request.params.update(parms) + return handler + + request.params.update(parms) + + # If we are the ultimate handler, then to prevent our _cp_dispatch + # from being called again, we will resolve remaining elements through + # getattr() directly. + if vpath: + return getattr(self, vpath.pop(0), None) + else: + return self + + return decorated + + +def url(path="", qs="", script_name=None, base=None, relative=None): + """Create an absolute URL for the given path. + + If 'path' starts with a slash ('/'), this will return + (base + script_name + path + qs). + If it does not start with a slash, this returns + (base + script_name [+ request.path_info] + path + qs). + + If script_name is None, cherrypy.request will be used + to find a script_name, if available. + + If base is None, cherrypy.request.base will be used (if available). + Note that you can use cherrypy.tools.proxy to change this. + + Finally, note that this function can be used to obtain an absolute URL + for the current request path (minus the querystring) by passing no args. + If you call url(qs=cherrypy.request.query_string), you should get the + original browser URL (assuming no internal redirections). + + If relative is None or not provided, request.app.relative_urls will + be used (if available, else False). If False, the output will be an + absolute URL (including the scheme, host, vhost, and script_name). + If True, the output will instead be a URL that is relative to the + current request path, perhaps including '..' atoms. If relative is + the string 'server', the output will instead be a URL that is + relative to the server root; i.e., it will start with a slash. + """ + if isinstance(qs, (tuple, list, dict)): + qs = _urlencode(qs) + if qs: + qs = '?' + qs + + if request.app: + if not path.startswith("/"): + # Append/remove trailing slash from path_info as needed + # (this is to support mistyped URL's without redirecting; + # if you want to redirect, use tools.trailing_slash). + pi = request.path_info + if request.is_index is True: + if not pi.endswith('/'): + pi = pi + '/' + elif request.is_index is False: + if pi.endswith('/') and pi != '/': + pi = pi[:-1] + + if path == "": + path = pi + else: + path = _urljoin(pi, path) + + if script_name is None: + script_name = request.script_name + if base is None: + base = request.base + + newurl = base + script_name + path + qs + else: + # No request.app (we're being called outside a request). + # We'll have to guess the base from server.* attributes. + # This will produce very different results from the above + # if you're using vhosts or tools.proxy. + if base is None: + base = server.base() + + path = (script_name or "") + path + newurl = base + path + qs + + if './' in newurl: + # Normalize the URL by removing ./ and ../ + atoms = [] + for atom in newurl.split('/'): + if atom == '.': + pass + elif atom == '..': + atoms.pop() + else: + atoms.append(atom) + newurl = '/'.join(atoms) + + # At this point, we should have a fully-qualified absolute URL. + + if relative is None: + relative = getattr(request.app, "relative_urls", False) + + # See http://www.ietf.org/rfc/rfc2396.txt + if relative == 'server': + # "A relative reference beginning with a single slash character is + # termed an absolute-path reference, as defined by ..." + # This is also sometimes called "server-relative". + newurl = '/' + '/'.join(newurl.split('/', 3)[3:]) + elif relative: + # "A relative reference that does not begin with a scheme name + # or a slash character is termed a relative-path reference." + old = url(relative=False).split('/')[:-1] + new = newurl.split('/') + while old and new: + a, b = old[0], new[0] + if a != b: + break + old.pop(0) + new.pop(0) + new = (['..'] * len(old)) + new + newurl = '/'.join(new) + + return newurl + + +# import _cpconfig last so it can reference other top-level objects +from cherrypy import _cpconfig +# Use _global_conf_alias so quickstart can use 'config' as an arg +# without shadowing cherrypy.config. +config = _global_conf_alias = _cpconfig.Config() +config.defaults = { + 'tools.log_tracebacks.on': True, + 'tools.log_headers.on': True, + 'tools.trailing_slash.on': True, + 'tools.encode.on': True +} +config.namespaces["log"] = lambda k, v: setattr(log, k, v) +config.namespaces["checker"] = lambda k, v: setattr(checker, k, v) +# Must reset to get our defaults applied. +config.reset() + +from cherrypy import _cpchecker +checker = _cpchecker.Checker() +engine.subscribe('start', checker) diff --git a/lib/cherrypy/_cpchecker.py b/lib/cherrypy/_cpchecker.py new file mode 100644 index 00000000..4ef82597 --- /dev/null +++ b/lib/cherrypy/_cpchecker.py @@ -0,0 +1,332 @@ +import os +import warnings + +import cherrypy +from cherrypy._cpcompat import iteritems, copykeys, builtins + + +class Checker(object): + + """A checker for CherryPy sites and their mounted applications. + + When this object is called at engine startup, it executes each + of its own methods whose names start with ``check_``. If you wish + to disable selected checks, simply add a line in your global + config which sets the appropriate method to False:: + + [global] + checker.check_skipped_app_config = False + + You may also dynamically add or replace ``check_*`` methods in this way. + """ + + on = True + """If True (the default), run all checks; if False, turn off all checks.""" + + def __init__(self): + self._populate_known_types() + + def __call__(self): + """Run all check_* methods.""" + if self.on: + oldformatwarning = warnings.formatwarning + warnings.formatwarning = self.formatwarning + try: + for name in dir(self): + if name.startswith("check_"): + method = getattr(self, name) + if method and hasattr(method, '__call__'): + method() + finally: + warnings.formatwarning = oldformatwarning + + def formatwarning(self, message, category, filename, lineno, line=None): + """Function to format a warning.""" + return "CherryPy Checker:\n%s\n\n" % message + + # This value should be set inside _cpconfig. + global_config_contained_paths = False + + def check_app_config_entries_dont_start_with_script_name(self): + """Check for Application config with sections that repeat script_name. + """ + for sn, app in cherrypy.tree.apps.items(): + if not isinstance(app, cherrypy.Application): + continue + if not app.config: + continue + if sn == '': + continue + sn_atoms = sn.strip("/").split("/") + for key in app.config.keys(): + key_atoms = key.strip("/").split("/") + if key_atoms[:len(sn_atoms)] == sn_atoms: + warnings.warn( + "The application mounted at %r has config " + "entries that start with its script name: %r" % (sn, + key)) + + def check_site_config_entries_in_app_config(self): + """Check for mounted Applications that have site-scoped config.""" + for sn, app in iteritems(cherrypy.tree.apps): + if not isinstance(app, cherrypy.Application): + continue + + msg = [] + for section, entries in iteritems(app.config): + if section.startswith('/'): + for key, value in iteritems(entries): + for n in ("engine.", "server.", "tree.", "checker."): + if key.startswith(n): + msg.append("[%s] %s = %s" % + (section, key, value)) + if msg: + msg.insert(0, + "The application mounted at %r contains the " + "following config entries, which are only allowed " + "in site-wide config. Move them to a [global] " + "section and pass them to cherrypy.config.update() " + "instead of tree.mount()." % sn) + warnings.warn(os.linesep.join(msg)) + + def check_skipped_app_config(self): + """Check for mounted Applications that have no config.""" + for sn, app in cherrypy.tree.apps.items(): + if not isinstance(app, cherrypy.Application): + continue + if not app.config: + msg = "The Application mounted at %r has an empty config." % sn + if self.global_config_contained_paths: + msg += (" It looks like the config you passed to " + "cherrypy.config.update() contains application-" + "specific sections. You must explicitly pass " + "application config via " + "cherrypy.tree.mount(..., config=app_config)") + warnings.warn(msg) + return + + def check_app_config_brackets(self): + """Check for Application config with extraneous brackets in section + names. + """ + for sn, app in cherrypy.tree.apps.items(): + if not isinstance(app, cherrypy.Application): + continue + if not app.config: + continue + for key in app.config.keys(): + if key.startswith("[") or key.endswith("]"): + warnings.warn( + "The application mounted at %r has config " + "section names with extraneous brackets: %r. " + "Config *files* need brackets; config *dicts* " + "(e.g. passed to tree.mount) do not." % (sn, key)) + + def check_static_paths(self): + """Check Application config for incorrect static paths.""" + # Use the dummy Request object in the main thread. + request = cherrypy.request + for sn, app in cherrypy.tree.apps.items(): + if not isinstance(app, cherrypy.Application): + continue + request.app = app + for section in app.config: + # get_resource will populate request.config + request.get_resource(section + "/dummy.html") + conf = request.config.get + + if conf("tools.staticdir.on", False): + msg = "" + root = conf("tools.staticdir.root") + dir = conf("tools.staticdir.dir") + if dir is None: + msg = "tools.staticdir.dir is not set." + else: + fulldir = "" + if os.path.isabs(dir): + fulldir = dir + if root: + msg = ("dir is an absolute path, even " + "though a root is provided.") + testdir = os.path.join(root, dir[1:]) + if os.path.exists(testdir): + msg += ( + "\nIf you meant to serve the " + "filesystem folder at %r, remove the " + "leading slash from dir." % (testdir,)) + else: + if not root: + msg = ( + "dir is a relative path and " + "no root provided.") + else: + fulldir = os.path.join(root, dir) + if not os.path.isabs(fulldir): + msg = ("%r is not an absolute path." % ( + fulldir,)) + + if fulldir and not os.path.exists(fulldir): + if msg: + msg += "\n" + msg += ("%r (root + dir) is not an existing " + "filesystem path." % fulldir) + + if msg: + warnings.warn("%s\nsection: [%s]\nroot: %r\ndir: %r" + % (msg, section, root, dir)) + + # -------------------------- Compatibility -------------------------- # + obsolete = { + 'server.default_content_type': 'tools.response_headers.headers', + 'log_access_file': 'log.access_file', + 'log_config_options': None, + 'log_file': 'log.error_file', + 'log_file_not_found': None, + 'log_request_headers': 'tools.log_headers.on', + 'log_to_screen': 'log.screen', + 'show_tracebacks': 'request.show_tracebacks', + 'throw_errors': 'request.throw_errors', + 'profiler.on': ('cherrypy.tree.mount(profiler.make_app(' + 'cherrypy.Application(Root())))'), + } + + deprecated = {} + + def _compat(self, config): + """Process config and warn on each obsolete or deprecated entry.""" + for section, conf in config.items(): + if isinstance(conf, dict): + for k, v in conf.items(): + if k in self.obsolete: + warnings.warn("%r is obsolete. Use %r instead.\n" + "section: [%s]" % + (k, self.obsolete[k], section)) + elif k in self.deprecated: + warnings.warn("%r is deprecated. Use %r instead.\n" + "section: [%s]" % + (k, self.deprecated[k], section)) + else: + if section in self.obsolete: + warnings.warn("%r is obsolete. Use %r instead." + % (section, self.obsolete[section])) + elif section in self.deprecated: + warnings.warn("%r is deprecated. Use %r instead." + % (section, self.deprecated[section])) + + def check_compatibility(self): + """Process config and warn on each obsolete or deprecated entry.""" + self._compat(cherrypy.config) + for sn, app in cherrypy.tree.apps.items(): + if not isinstance(app, cherrypy.Application): + continue + self._compat(app.config) + + # ------------------------ Known Namespaces ------------------------ # + extra_config_namespaces = [] + + def _known_ns(self, app): + ns = ["wsgi"] + ns.extend(copykeys(app.toolboxes)) + ns.extend(copykeys(app.namespaces)) + ns.extend(copykeys(app.request_class.namespaces)) + ns.extend(copykeys(cherrypy.config.namespaces)) + ns += self.extra_config_namespaces + + for section, conf in app.config.items(): + is_path_section = section.startswith("/") + if is_path_section and isinstance(conf, dict): + for k, v in conf.items(): + atoms = k.split(".") + if len(atoms) > 1: + if atoms[0] not in ns: + # Spit out a special warning if a known + # namespace is preceded by "cherrypy." + if atoms[0] == "cherrypy" and atoms[1] in ns: + msg = ( + "The config entry %r is invalid; " + "try %r instead.\nsection: [%s]" + % (k, ".".join(atoms[1:]), section)) + else: + msg = ( + "The config entry %r is invalid, " + "because the %r config namespace " + "is unknown.\n" + "section: [%s]" % (k, atoms[0], section)) + warnings.warn(msg) + elif atoms[0] == "tools": + if atoms[1] not in dir(cherrypy.tools): + msg = ( + "The config entry %r may be invalid, " + "because the %r tool was not found.\n" + "section: [%s]" % (k, atoms[1], section)) + warnings.warn(msg) + + def check_config_namespaces(self): + """Process config and warn on each unknown config namespace.""" + for sn, app in cherrypy.tree.apps.items(): + if not isinstance(app, cherrypy.Application): + continue + self._known_ns(app) + + # -------------------------- Config Types -------------------------- # + known_config_types = {} + + def _populate_known_types(self): + b = [x for x in vars(builtins).values() + if type(x) is type(str)] + + def traverse(obj, namespace): + for name in dir(obj): + # Hack for 3.2's warning about body_params + if name == 'body_params': + continue + vtype = type(getattr(obj, name, None)) + if vtype in b: + self.known_config_types[namespace + "." + name] = vtype + + traverse(cherrypy.request, "request") + traverse(cherrypy.response, "response") + traverse(cherrypy.server, "server") + traverse(cherrypy.engine, "engine") + traverse(cherrypy.log, "log") + + def _known_types(self, config): + msg = ("The config entry %r in section %r is of type %r, " + "which does not match the expected type %r.") + + for section, conf in config.items(): + if isinstance(conf, dict): + for k, v in conf.items(): + if v is not None: + expected_type = self.known_config_types.get(k, None) + vtype = type(v) + if expected_type and vtype != expected_type: + warnings.warn(msg % (k, section, vtype.__name__, + expected_type.__name__)) + else: + k, v = section, conf + if v is not None: + expected_type = self.known_config_types.get(k, None) + vtype = type(v) + if expected_type and vtype != expected_type: + warnings.warn(msg % (k, section, vtype.__name__, + expected_type.__name__)) + + def check_config_types(self): + """Assert that config values are of the same type as default values.""" + self._known_types(cherrypy.config) + for sn, app in cherrypy.tree.apps.items(): + if not isinstance(app, cherrypy.Application): + continue + self._known_types(app.config) + + # -------------------- Specific config warnings -------------------- # + def check_localhost(self): + """Warn if any socket_host is 'localhost'. See #711.""" + for k, v in cherrypy.config.items(): + if k == 'server.socket_host' and v == 'localhost': + warnings.warn("The use of 'localhost' as a socket host can " + "cause problems on newer systems, since " + "'localhost' can map to either an IPv4 or an " + "IPv6 address. You should use '127.0.0.1' " + "or '[::1]' instead.") diff --git a/lib/cherrypy/_cpcompat.py b/lib/cherrypy/_cpcompat.py new file mode 100644 index 00000000..8a98b38b --- /dev/null +++ b/lib/cherrypy/_cpcompat.py @@ -0,0 +1,383 @@ +"""Compatibility code for using CherryPy with various versions of Python. + +CherryPy 3.2 is compatible with Python versions 2.3+. This module provides a +useful abstraction over the differences between Python versions, sometimes by +preferring a newer idiom, sometimes an older one, and sometimes a custom one. + +In particular, Python 2 uses str and '' for byte strings, while Python 3 +uses str and '' for unicode strings. We will call each of these the 'native +string' type for each version. Because of this major difference, this module +provides new 'bytestr', 'unicodestr', and 'nativestr' attributes, as well as +two functions: 'ntob', which translates native strings (of type 'str') into +byte strings regardless of Python version, and 'ntou', which translates native +strings to unicode strings. This also provides a 'BytesIO' name for dealing +specifically with bytes, and a 'StringIO' name for dealing with native strings. +It also provides a 'base64_decode' function with native strings as input and +output. +""" +import os +import re +import sys +import threading + +if sys.version_info >= (3, 0): + py3k = True + bytestr = bytes + unicodestr = str + nativestr = unicodestr + basestring = (bytes, str) + + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given + encoding. + """ + assert_native(n) + # In Python 3, the native string type is unicode + return n.encode(encoding) + + def ntou(n, encoding='ISO-8859-1'): + """Return the given native string as a unicode string with the given + encoding. + """ + assert_native(n) + # In Python 3, the native string type is unicode + return n + + def tonative(n, encoding='ISO-8859-1'): + """Return the given string as a native string in the given encoding.""" + # In Python 3, the native string type is unicode + if isinstance(n, bytes): + return n.decode(encoding) + return n + # type("") + from io import StringIO + # bytes: + from io import BytesIO as BytesIO +else: + # Python 2 + py3k = False + bytestr = str + unicodestr = unicode + nativestr = bytestr + basestring = basestring + + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given + encoding. + """ + assert_native(n) + # In Python 2, the native string type is bytes. Assume it's already + # in the given encoding, which for ISO-8859-1 is almost always what + # was intended. + return n + + def ntou(n, encoding='ISO-8859-1'): + """Return the given native string as a unicode string with the given + encoding. + """ + assert_native(n) + # In Python 2, the native string type is bytes. + # First, check for the special encoding 'escape'. The test suite uses + # this to signal that it wants to pass a string with embedded \uXXXX + # escapes, but without having to prefix it with u'' for Python 2, + # but no prefix for Python 3. + if encoding == 'escape': + return unicode( + re.sub(r'\\u([0-9a-zA-Z]{4})', + lambda m: unichr(int(m.group(1), 16)), + n.decode('ISO-8859-1'))) + # Assume it's already in the given encoding, which for ISO-8859-1 + # is almost always what was intended. + return n.decode(encoding) + + def tonative(n, encoding='ISO-8859-1'): + """Return the given string as a native string in the given encoding.""" + # In Python 2, the native string type is bytes. + if isinstance(n, unicode): + return n.encode(encoding) + return n + try: + # type("") + from cStringIO import StringIO + except ImportError: + # type("") + from StringIO import StringIO + # bytes: + BytesIO = StringIO + + +def assert_native(n): + if not isinstance(n, nativestr): + raise TypeError("n must be a native str (got %s)" % type(n).__name__) + +try: + set = set +except NameError: + from sets import Set as set + +try: + # Python 3.1+ + from base64 import decodebytes as _base64_decodebytes +except ImportError: + # Python 3.0- + # since CherryPy claims compability with Python 2.3, we must use + # the legacy API of base64 + from base64 import decodestring as _base64_decodebytes + + +def base64_decode(n, encoding='ISO-8859-1'): + """Return the native string base64-decoded (as a native string).""" + if isinstance(n, unicodestr): + b = n.encode(encoding) + else: + b = n + b = _base64_decodebytes(b) + if nativestr is unicodestr: + return b.decode(encoding) + else: + return b + +try: + # Python 2.5+ + from hashlib import md5 +except ImportError: + from md5 import new as md5 + +try: + # Python 2.5+ + from hashlib import sha1 as sha +except ImportError: + from sha import new as sha + +try: + sorted = sorted +except NameError: + def sorted(i): + i = i[:] + i.sort() + return i + +try: + reversed = reversed +except NameError: + def reversed(x): + i = len(x) + while i > 0: + i -= 1 + yield x[i] + +try: + # Python 3 + from urllib.parse import urljoin, urlencode + from urllib.parse import quote, quote_plus + from urllib.request import unquote, urlopen + from urllib.request import parse_http_list, parse_keqv_list +except ImportError: + # Python 2 + from urlparse import urljoin + from urllib import urlencode, urlopen + from urllib import quote, quote_plus + from urllib import unquote + from urllib2 import parse_http_list, parse_keqv_list + +try: + from threading import local as threadlocal +except ImportError: + from cherrypy._cpthreadinglocal import local as threadlocal + +try: + dict.iteritems + # Python 2 + iteritems = lambda d: d.iteritems() + copyitems = lambda d: d.items() +except AttributeError: + # Python 3 + iteritems = lambda d: d.items() + copyitems = lambda d: list(d.items()) + +try: + dict.iterkeys + # Python 2 + iterkeys = lambda d: d.iterkeys() + copykeys = lambda d: d.keys() +except AttributeError: + # Python 3 + iterkeys = lambda d: d.keys() + copykeys = lambda d: list(d.keys()) + +try: + dict.itervalues + # Python 2 + itervalues = lambda d: d.itervalues() + copyvalues = lambda d: d.values() +except AttributeError: + # Python 3 + itervalues = lambda d: d.values() + copyvalues = lambda d: list(d.values()) + +try: + # Python 3 + import builtins +except ImportError: + # Python 2 + import __builtin__ as builtins + +try: + # Python 2. We try Python 2 first clients on Python 2 + # don't try to import the 'http' module from cherrypy.lib + from Cookie import SimpleCookie, CookieError + from httplib import BadStatusLine, HTTPConnection, IncompleteRead + from httplib import NotConnected + from BaseHTTPServer import BaseHTTPRequestHandler +except ImportError: + # Python 3 + from http.cookies import SimpleCookie, CookieError + from http.client import BadStatusLine, HTTPConnection, IncompleteRead + from http.client import NotConnected + from http.server import BaseHTTPRequestHandler + +# Some platforms don't expose HTTPSConnection, so handle it separately +if py3k: + try: + from http.client import HTTPSConnection + except ImportError: + # Some platforms which don't have SSL don't expose HTTPSConnection + HTTPSConnection = None +else: + try: + from httplib import HTTPSConnection + except ImportError: + HTTPSConnection = None + +try: + # Python 2 + xrange = xrange +except NameError: + # Python 3 + xrange = range + +import threading +if hasattr(threading.Thread, "daemon"): + # Python 2.6+ + def get_daemon(t): + return t.daemon + + def set_daemon(t, val): + t.daemon = val +else: + def get_daemon(t): + return t.isDaemon() + + def set_daemon(t, val): + t.setDaemon(val) + +try: + from email.utils import formatdate + + def HTTPDate(timeval=None): + return formatdate(timeval, usegmt=True) +except ImportError: + from rfc822 import formatdate as HTTPDate + +try: + # Python 3 + from urllib.parse import unquote as parse_unquote + + def unquote_qs(atom, encoding, errors='strict'): + return parse_unquote( + atom.replace('+', ' '), + encoding=encoding, + errors=errors) +except ImportError: + # Python 2 + from urllib import unquote as parse_unquote + + def unquote_qs(atom, encoding, errors='strict'): + return parse_unquote(atom.replace('+', ' ')).decode(encoding, errors) + +try: + # Prefer simplejson, which is usually more advanced than the builtin + # module. + import simplejson as json + json_decode = json.JSONDecoder().decode + _json_encode = json.JSONEncoder().iterencode +except ImportError: + if sys.version_info >= (2, 6): + # Python >=2.6 : json is part of the standard library + import json + json_decode = json.JSONDecoder().decode + _json_encode = json.JSONEncoder().iterencode + else: + json = None + + def json_decode(s): + raise ValueError('No JSON library is available') + + def _json_encode(s): + raise ValueError('No JSON library is available') +finally: + if json and py3k: + # The two Python 3 implementations (simplejson/json) + # outputs str. We need bytes. + def json_encode(value): + for chunk in _json_encode(value): + yield chunk.encode('utf8') + else: + json_encode = _json_encode + + +try: + import cPickle as pickle +except ImportError: + # In Python 2, pickle is a Python version. + # In Python 3, pickle is the sped-up C version. + import pickle + +try: + os.urandom(20) + import binascii + + def random20(): + return binascii.hexlify(os.urandom(20)).decode('ascii') +except (AttributeError, NotImplementedError): + import random + # os.urandom not available until Python 2.4. Fall back to random.random. + + def random20(): + return sha('%s' % random.random()).hexdigest() + +try: + from _thread import get_ident as get_thread_ident +except ImportError: + from thread import get_ident as get_thread_ident + +try: + # Python 3 + next = next +except NameError: + # Python 2 + def next(i): + return i.next() + +if sys.version_info >= (3, 3): + Timer = threading.Timer + Event = threading.Event +else: + # Python 3.2 and earlier + Timer = threading._Timer + Event = threading._Event + +# Prior to Python 2.6, the Thread class did not have a .daemon property. +# This mix-in adds that property. + + +class SetDaemonProperty: + + def __get_daemon(self): + return self.isDaemon() + + def __set_daemon(self, daemon): + self.setDaemon(daemon) + + if sys.version_info < (2, 6): + daemon = property(__get_daemon, __set_daemon) diff --git a/lib/cherrypy/_cpcompat_subprocess.py b/lib/cherrypy/_cpcompat_subprocess.py new file mode 100644 index 00000000..478f4a74 --- /dev/null +++ b/lib/cherrypy/_cpcompat_subprocess.py @@ -0,0 +1,1544 @@ +# subprocess - Subprocesses with accessible I/O streams +# +# For more information about this module, see PEP 324. +# +# This module should remain compatible with Python 2.2, see PEP 291. +# +# Copyright (c) 2003-2005 by Peter Astrand +# +# Licensed to PSF under a Contributor Agreement. +# See http://www.python.org/2.4/license for licensing details. + +r"""subprocess - Subprocesses with accessible I/O streams + +This module allows you to spawn processes, connect to their +input/output/error pipes, and obtain their return codes. This module +intends to replace several other, older modules and functions, like: + +os.system +os.spawn* +os.popen* +popen2.* +commands.* + +Information about how the subprocess module can be used to replace these +modules and functions can be found below. + + + +Using the subprocess module +=========================== +This module defines one class called Popen: + +class Popen(args, bufsize=0, executable=None, + stdin=None, stdout=None, stderr=None, + preexec_fn=None, close_fds=False, shell=False, + cwd=None, env=None, universal_newlines=False, + startupinfo=None, creationflags=0): + + +Arguments are: + +args should be a string, or a sequence of program arguments. The +program to execute is normally the first item in the args sequence or +string, but can be explicitly set by using the executable argument. + +On UNIX, with shell=False (default): In this case, the Popen class +uses os.execvp() to execute the child program. args should normally +be a sequence. A string will be treated as a sequence with the string +as the only item (the program to execute). + +On UNIX, with shell=True: If args is a string, it specifies the +command string to execute through the shell. If args is a sequence, +the first item specifies the command string, and any additional items +will be treated as additional shell arguments. + +On Windows: the Popen class uses CreateProcess() to execute the child +program, which operates on strings. If args is a sequence, it will be +converted to a string using the list2cmdline method. Please note that +not all MS Windows applications interpret the command line the same +way: The list2cmdline is designed for applications using the same +rules as the MS C runtime. + +bufsize, if given, has the same meaning as the corresponding argument +to the built-in open() function: 0 means unbuffered, 1 means line +buffered, any other positive value means use a buffer of +(approximately) that size. A negative bufsize means to use the system +default, which usually means fully buffered. The default value for +bufsize is 0 (unbuffered). + +stdin, stdout and stderr specify the executed programs' standard +input, standard output and standard error file handles, respectively. +Valid values are PIPE, an existing file descriptor (a positive +integer), an existing file object, and None. PIPE indicates that a +new pipe to the child should be created. With None, no redirection +will occur; the child's file handles will be inherited from the +parent. Additionally, stderr can be STDOUT, which indicates that the +stderr data from the applications should be captured into the same +file handle as for stdout. + +If preexec_fn is set to a callable object, this object will be called +in the child process just before the child is executed. + +If close_fds is true, all file descriptors except 0, 1 and 2 will be +closed before the child process is executed. + +if shell is true, the specified command will be executed through the +shell. + +If cwd is not None, the current directory will be changed to cwd +before the child is executed. + +If env is not None, it defines the environment variables for the new +process. + +If universal_newlines is true, the file objects stdout and stderr are +opened as a text files, but lines may be terminated by any of '\n', +the Unix end-of-line convention, '\r', the Macintosh convention or +'\r\n', the Windows convention. All of these external representations +are seen as '\n' by the Python program. Note: This feature is only +available if Python is built with universal newline support (the +default). Also, the newlines attribute of the file objects stdout, +stdin and stderr are not updated by the communicate() method. + +The startupinfo and creationflags, if given, will be passed to the +underlying CreateProcess() function. They can specify things such as +appearance of the main window and priority for the new process. +(Windows only) + + +This module also defines some shortcut functions: + +call(*popenargs, **kwargs): + Run command with arguments. Wait for command to complete, then + return the returncode attribute. + + The arguments are the same as for the Popen constructor. Example: + + retcode = call(["ls", "-l"]) + +check_call(*popenargs, **kwargs): + Run command with arguments. Wait for command to complete. If the + exit code was zero then return, otherwise raise + CalledProcessError. The CalledProcessError object will have the + return code in the returncode attribute. + + The arguments are the same as for the Popen constructor. Example: + + check_call(["ls", "-l"]) + +check_output(*popenargs, **kwargs): + Run command with arguments and return its output as a byte string. + + If the exit code was non-zero it raises a CalledProcessError. The + CalledProcessError object will have the return code in the returncode + attribute and output in the output attribute. + + The arguments are the same as for the Popen constructor. Example: + + output = check_output(["ls", "-l", "/dev/null"]) + + +Exceptions +---------- +Exceptions raised in the child process, before the new program has +started to execute, will be re-raised in the parent. Additionally, +the exception object will have one extra attribute called +'child_traceback', which is a string containing traceback information +from the childs point of view. + +The most common exception raised is OSError. This occurs, for +example, when trying to execute a non-existent file. Applications +should prepare for OSErrors. + +A ValueError will be raised if Popen is called with invalid arguments. + +check_call() and check_output() will raise CalledProcessError, if the +called process returns a non-zero return code. + + +Security +-------- +Unlike some other popen functions, this implementation will never call +/bin/sh implicitly. This means that all characters, including shell +metacharacters, can safely be passed to child processes. + + +Popen objects +============= +Instances of the Popen class have the following methods: + +poll() + Check if child process has terminated. Returns returncode + attribute. + +wait() + Wait for child process to terminate. Returns returncode attribute. + +communicate(input=None) + Interact with process: Send data to stdin. Read data from stdout + and stderr, until end-of-file is reached. Wait for process to + terminate. The optional input argument should be a string to be + sent to the child process, or None, if no data should be sent to + the child. + + communicate() returns a tuple (stdout, stderr). + + Note: The data read is buffered in memory, so do not use this + method if the data size is large or unlimited. + +The following attributes are also available: + +stdin + If the stdin argument is PIPE, this attribute is a file object + that provides input to the child process. Otherwise, it is None. + +stdout + If the stdout argument is PIPE, this attribute is a file object + that provides output from the child process. Otherwise, it is + None. + +stderr + If the stderr argument is PIPE, this attribute is file object that + provides error output from the child process. Otherwise, it is + None. + +pid + The process ID of the child process. + +returncode + The child return code. A None value indicates that the process + hasn't terminated yet. A negative value -N indicates that the + child was terminated by signal N (UNIX only). + + +Replacing older functions with the subprocess module +==================================================== +In this section, "a ==> b" means that b can be used as a replacement +for a. + +Note: All functions in this section fail (more or less) silently if +the executed program cannot be found; this module raises an OSError +exception. + +In the following examples, we assume that the subprocess module is +imported with "from subprocess import *". + + +Replacing /bin/sh shell backquote +--------------------------------- +output=`mycmd myarg` +==> +output = Popen(["mycmd", "myarg"], stdout=PIPE).communicate()[0] + + +Replacing shell pipe line +------------------------- +output=`dmesg | grep hda` +==> +p1 = Popen(["dmesg"], stdout=PIPE) +p2 = Popen(["grep", "hda"], stdin=p1.stdout, stdout=PIPE) +output = p2.communicate()[0] + + +Replacing os.system() +--------------------- +sts = os.system("mycmd" + " myarg") +==> +p = Popen("mycmd" + " myarg", shell=True) +pid, sts = os.waitpid(p.pid, 0) + +Note: + +* Calling the program through the shell is usually not required. + +* It's easier to look at the returncode attribute than the + exitstatus. + +A more real-world example would look like this: + +try: + retcode = call("mycmd" + " myarg", shell=True) + if retcode < 0: + print >>sys.stderr, "Child was terminated by signal", -retcode + else: + print >>sys.stderr, "Child returned", retcode +except OSError, e: + print >>sys.stderr, "Execution failed:", e + + +Replacing os.spawn* +------------------- +P_NOWAIT example: + +pid = os.spawnlp(os.P_NOWAIT, "/bin/mycmd", "mycmd", "myarg") +==> +pid = Popen(["/bin/mycmd", "myarg"]).pid + + +P_WAIT example: + +retcode = os.spawnlp(os.P_WAIT, "/bin/mycmd", "mycmd", "myarg") +==> +retcode = call(["/bin/mycmd", "myarg"]) + + +Vector example: + +os.spawnvp(os.P_NOWAIT, path, args) +==> +Popen([path] + args[1:]) + + +Environment example: + +os.spawnlpe(os.P_NOWAIT, "/bin/mycmd", "mycmd", "myarg", env) +==> +Popen(["/bin/mycmd", "myarg"], env={"PATH": "/usr/bin"}) + + +Replacing os.popen* +------------------- +pipe = os.popen("cmd", mode='r', bufsize) +==> +pipe = Popen("cmd", shell=True, bufsize=bufsize, stdout=PIPE).stdout + +pipe = os.popen("cmd", mode='w', bufsize) +==> +pipe = Popen("cmd", shell=True, bufsize=bufsize, stdin=PIPE).stdin + + +(child_stdin, child_stdout) = os.popen2("cmd", mode, bufsize) +==> +p = Popen("cmd", shell=True, bufsize=bufsize, + stdin=PIPE, stdout=PIPE, close_fds=True) +(child_stdin, child_stdout) = (p.stdin, p.stdout) + + +(child_stdin, + child_stdout, + child_stderr) = os.popen3("cmd", mode, bufsize) +==> +p = Popen("cmd", shell=True, bufsize=bufsize, + stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True) +(child_stdin, + child_stdout, + child_stderr) = (p.stdin, p.stdout, p.stderr) + + +(child_stdin, child_stdout_and_stderr) = os.popen4("cmd", mode, + bufsize) +==> +p = Popen("cmd", shell=True, bufsize=bufsize, + stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) +(child_stdin, child_stdout_and_stderr) = (p.stdin, p.stdout) + +On Unix, os.popen2, os.popen3 and os.popen4 also accept a sequence as +the command to execute, in which case arguments will be passed +directly to the program without shell intervention. This usage can be +replaced as follows: + +(child_stdin, child_stdout) = os.popen2(["/bin/ls", "-l"], mode, + bufsize) +==> +p = Popen(["/bin/ls", "-l"], bufsize=bufsize, stdin=PIPE, stdout=PIPE) +(child_stdin, child_stdout) = (p.stdin, p.stdout) + +Return code handling translates as follows: + +pipe = os.popen("cmd", 'w') +... +rc = pipe.close() +if rc is not None and rc % 256: + print "There were some errors" +==> +process = Popen("cmd", 'w', shell=True, stdin=PIPE) +... +process.stdin.close() +if process.wait() != 0: + print "There were some errors" + + +Replacing popen2.* +------------------ +(child_stdout, child_stdin) = popen2.popen2("somestring", bufsize, mode) +==> +p = Popen(["somestring"], shell=True, bufsize=bufsize + stdin=PIPE, stdout=PIPE, close_fds=True) +(child_stdout, child_stdin) = (p.stdout, p.stdin) + +On Unix, popen2 also accepts a sequence as the command to execute, in +which case arguments will be passed directly to the program without +shell intervention. This usage can be replaced as follows: + +(child_stdout, child_stdin) = popen2.popen2(["mycmd", "myarg"], bufsize, + mode) +==> +p = Popen(["mycmd", "myarg"], bufsize=bufsize, + stdin=PIPE, stdout=PIPE, close_fds=True) +(child_stdout, child_stdin) = (p.stdout, p.stdin) + +The popen2.Popen3 and popen2.Popen4 basically works as subprocess.Popen, +except that: + +* subprocess.Popen raises an exception if the execution fails +* the capturestderr argument is replaced with the stderr argument. +* stdin=PIPE and stdout=PIPE must be specified. +* popen2 closes all filedescriptors by default, but you have to specify + close_fds=True with subprocess.Popen. +""" + +import sys +mswindows = (sys.platform == "win32") + +import os +import types +import traceback +import gc +import signal +import errno + +try: + set +except NameError: + from sets import Set as set + +# Exception classes used by this module. + + +class CalledProcessError(Exception): + + """This exception is raised when a process run by check_call() or + check_output() returns a non-zero exit status. + The exit status will be stored in the returncode attribute; + check_output() will also store the output in the output attribute. + """ + + def __init__(self, returncode, cmd, output=None): + self.returncode = returncode + self.cmd = cmd + self.output = output + + def __str__(self): + return "Command '%s' returned non-zero exit status %d" % ( + self.cmd, self.returncode) + + +if mswindows: + import threading + import msvcrt + import _subprocess + + class STARTUPINFO: + dwFlags = 0 + hStdInput = None + hStdOutput = None + hStdError = None + wShowWindow = 0 + + class pywintypes: + error = IOError +else: + import select + _has_poll = hasattr(select, 'poll') + import fcntl + import pickle + + # When select or poll has indicated that the file is writable, + # we can write up to _PIPE_BUF bytes without risk of blocking. + # POSIX defines PIPE_BUF as >= 512. + _PIPE_BUF = getattr(select, 'PIPE_BUF', 512) + + +__all__ = ["Popen", "PIPE", "STDOUT", "call", "check_call", + "check_output", "CalledProcessError"] + +if mswindows: + from _subprocess import CREATE_NEW_CONSOLE, CREATE_NEW_PROCESS_GROUP, \ + STD_INPUT_HANDLE, STD_OUTPUT_HANDLE, \ + STD_ERROR_HANDLE, SW_HIDE, \ + STARTF_USESTDHANDLES, STARTF_USESHOWWINDOW + + __all__.extend(["CREATE_NEW_CONSOLE", "CREATE_NEW_PROCESS_GROUP", + "STD_INPUT_HANDLE", "STD_OUTPUT_HANDLE", + "STD_ERROR_HANDLE", "SW_HIDE", + "STARTF_USESTDHANDLES", "STARTF_USESHOWWINDOW"]) +try: + MAXFD = os.sysconf("SC_OPEN_MAX") +except: + MAXFD = 256 + +_active = [] + + +def _cleanup(): + for inst in _active[:]: + res = inst._internal_poll(_deadstate=sys.maxint) + if res is not None: + try: + _active.remove(inst) + except ValueError: + # This can happen if two threads create a new Popen instance. + # It's harmless that it was already removed, so ignore. + pass + +PIPE = -1 +STDOUT = -2 + + +def _eintr_retry_call(func, *args): + while True: + try: + return func(*args) + except (OSError, IOError), e: + if e.errno == errno.EINTR: + continue + raise + + +def call(*popenargs, **kwargs): + """Run command with arguments. Wait for command to complete, then + return the returncode attribute. + + The arguments are the same as for the Popen constructor. Example: + + retcode = call(["ls", "-l"]) + """ + return Popen(*popenargs, **kwargs).wait() + + +def check_call(*popenargs, **kwargs): + """Run command with arguments. Wait for command to complete. If + the exit code was zero then return, otherwise raise + CalledProcessError. The CalledProcessError object will have the + return code in the returncode attribute. + + The arguments are the same as for the Popen constructor. Example: + + check_call(["ls", "-l"]) + """ + retcode = call(*popenargs, **kwargs) + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + raise CalledProcessError(retcode, cmd) + return 0 + + +def check_output(*popenargs, **kwargs): + r"""Run command with arguments and return its output as a byte string. + + If the exit code was non-zero it raises a CalledProcessError. The + CalledProcessError object will have the return code in the returncode + attribute and output in the output attribute. + + The arguments are the same as for the Popen constructor. Example: + + >>> check_output(["ls", "-l", "/dev/null"]) + 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n' + + The stdout argument is not allowed as it is used internally. + To capture standard error in the result, use stderr=STDOUT. + + >>> check_output(["/bin/sh", "-c", + ... "ls -l non_existent_file ; exit 0"], + ... stderr=STDOUT) + 'ls: non_existent_file: No such file or directory\n' + """ + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = Popen(stdout=PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + raise CalledProcessError(retcode, cmd, output=output) + return output + + +def list2cmdline(seq): + """ + Translate a sequence of arguments into a command line + string, using the same rules as the MS C runtime: + + 1) Arguments are delimited by white space, which is either a + space or a tab. + + 2) A string surrounded by double quotation marks is + interpreted as a single argument, regardless of white space + contained within. A quoted string can be embedded in an + argument. + + 3) A double quotation mark preceded by a backslash is + interpreted as a literal double quotation mark. + + 4) Backslashes are interpreted literally, unless they + immediately precede a double quotation mark. + + 5) If backslashes immediately precede a double quotation mark, + every pair of backslashes is interpreted as a literal + backslash. If the number of backslashes is odd, the last + backslash escapes the next double quotation mark as + described in rule 3. + """ + + # See + # http://msdn.microsoft.com/en-us/library/17w5ykft.aspx + # or search http://msdn.microsoft.com for + # "Parsing C++ Command-Line Arguments" + result = [] + needquote = False + for arg in seq: + bs_buf = [] + + # Add a space to separate this argument from the others + if result: + result.append(' ') + + needquote = (" " in arg) or ("\t" in arg) or not arg + if needquote: + result.append('"') + + for c in arg: + if c == '\\': + # Don't know if we need to double yet. + bs_buf.append(c) + elif c == '"': + # Double backslashes. + result.append('\\' * len(bs_buf) * 2) + bs_buf = [] + result.append('\\"') + else: + # Normal char + if bs_buf: + result.extend(bs_buf) + bs_buf = [] + result.append(c) + + # Add remaining backslashes, if any. + if bs_buf: + result.extend(bs_buf) + + if needquote: + result.extend(bs_buf) + result.append('"') + + return ''.join(result) + + +class Popen(object): + + def __init__(self, args, bufsize=0, executable=None, + stdin=None, stdout=None, stderr=None, + preexec_fn=None, close_fds=False, shell=False, + cwd=None, env=None, universal_newlines=False, + startupinfo=None, creationflags=0): + """Create new Popen instance.""" + _cleanup() + + self._child_created = False + if not isinstance(bufsize, (int, long)): + raise TypeError("bufsize must be an integer") + + if mswindows: + if preexec_fn is not None: + raise ValueError("preexec_fn is not supported on Windows " + "platforms") + if close_fds and (stdin is not None or stdout is not None or + stderr is not None): + raise ValueError("close_fds is not supported on Windows " + "platforms if you redirect " + "stdin/stdout/stderr") + else: + # POSIX + if startupinfo is not None: + raise ValueError("startupinfo is only supported on Windows " + "platforms") + if creationflags != 0: + raise ValueError("creationflags is only supported on Windows " + "platforms") + + self.stdin = None + self.stdout = None + self.stderr = None + self.pid = None + self.returncode = None + self.universal_newlines = universal_newlines + + # Input and output objects. The general principle is like + # this: + # + # Parent Child + # ------ ----- + # p2cwrite ---stdin---> p2cread + # c2pread <--stdout--- c2pwrite + # errread <--stderr--- errwrite + # + # On POSIX, the child objects are file descriptors. On + # Windows, these are Windows file handles. The parent objects + # are file descriptors on both platforms. The parent objects + # are None when not using PIPEs. The child objects are None + # when not redirecting. + + (p2cread, p2cwrite, + c2pread, c2pwrite, + errread, errwrite) = self._get_handles(stdin, stdout, stderr) + + self._execute_child(args, executable, preexec_fn, close_fds, + cwd, env, universal_newlines, + startupinfo, creationflags, shell, + p2cread, p2cwrite, + c2pread, c2pwrite, + errread, errwrite) + + if mswindows: + if p2cwrite is not None: + p2cwrite = msvcrt.open_osfhandle(p2cwrite.Detach(), 0) + if c2pread is not None: + c2pread = msvcrt.open_osfhandle(c2pread.Detach(), 0) + if errread is not None: + errread = msvcrt.open_osfhandle(errread.Detach(), 0) + + if p2cwrite is not None: + self.stdin = os.fdopen(p2cwrite, 'wb', bufsize) + if c2pread is not None: + if universal_newlines: + self.stdout = os.fdopen(c2pread, 'rU', bufsize) + else: + self.stdout = os.fdopen(c2pread, 'rb', bufsize) + if errread is not None: + if universal_newlines: + self.stderr = os.fdopen(errread, 'rU', bufsize) + else: + self.stderr = os.fdopen(errread, 'rb', bufsize) + + def _translate_newlines(self, data): + data = data.replace("\r\n", "\n") + data = data.replace("\r", "\n") + return data + + def __del__(self, _maxint=sys.maxint, _active=_active): + # If __init__ hasn't had a chance to execute (e.g. if it + # was passed an undeclared keyword argument), we don't + # have a _child_created attribute at all. + if not getattr(self, '_child_created', False): + # We didn't get to successfully create a child process. + return + # In case the child hasn't been waited on, check if it's done. + self._internal_poll(_deadstate=_maxint) + if self.returncode is None and _active is not None: + # Child is still running, keep us alive until we can wait on it. + _active.append(self) + + def communicate(self, input=None): + """Interact with process: Send data to stdin. Read data from + stdout and stderr, until end-of-file is reached. Wait for + process to terminate. The optional input argument should be a + string to be sent to the child process, or None, if no data + should be sent to the child. + + communicate() returns a tuple (stdout, stderr).""" + + # Optimization: If we are only using one pipe, or no pipe at + # all, using select() or threads is unnecessary. + if [self.stdin, self.stdout, self.stderr].count(None) >= 2: + stdout = None + stderr = None + if self.stdin: + if input: + try: + self.stdin.write(input) + except IOError, e: + if e.errno != errno.EPIPE and e.errno != errno.EINVAL: + raise + self.stdin.close() + elif self.stdout: + stdout = _eintr_retry_call(self.stdout.read) + self.stdout.close() + elif self.stderr: + stderr = _eintr_retry_call(self.stderr.read) + self.stderr.close() + self.wait() + return (stdout, stderr) + + return self._communicate(input) + + def poll(self): + return self._internal_poll() + + if mswindows: + # + # Windows methods + # + def _get_handles(self, stdin, stdout, stderr): + """Construct and return tuple with IO objects: + p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite + """ + if stdin is None and stdout is None and stderr is None: + return (None, None, None, None, None, None) + + p2cread, p2cwrite = None, None + c2pread, c2pwrite = None, None + errread, errwrite = None, None + + if stdin is None: + p2cread = _subprocess.GetStdHandle( + _subprocess.STD_INPUT_HANDLE) + if p2cread is None: + p2cread, _ = _subprocess.CreatePipe(None, 0) + elif stdin == PIPE: + p2cread, p2cwrite = _subprocess.CreatePipe(None, 0) + elif isinstance(stdin, int): + p2cread = msvcrt.get_osfhandle(stdin) + else: + # Assuming file-like object + p2cread = msvcrt.get_osfhandle(stdin.fileno()) + p2cread = self._make_inheritable(p2cread) + + if stdout is None: + c2pwrite = _subprocess.GetStdHandle( + _subprocess.STD_OUTPUT_HANDLE) + if c2pwrite is None: + _, c2pwrite = _subprocess.CreatePipe(None, 0) + elif stdout == PIPE: + c2pread, c2pwrite = _subprocess.CreatePipe(None, 0) + elif isinstance(stdout, int): + c2pwrite = msvcrt.get_osfhandle(stdout) + else: + # Assuming file-like object + c2pwrite = msvcrt.get_osfhandle(stdout.fileno()) + c2pwrite = self._make_inheritable(c2pwrite) + + if stderr is None: + errwrite = _subprocess.GetStdHandle( + _subprocess.STD_ERROR_HANDLE) + if errwrite is None: + _, errwrite = _subprocess.CreatePipe(None, 0) + elif stderr == PIPE: + errread, errwrite = _subprocess.CreatePipe(None, 0) + elif stderr == STDOUT: + errwrite = c2pwrite + elif isinstance(stderr, int): + errwrite = msvcrt.get_osfhandle(stderr) + else: + # Assuming file-like object + errwrite = msvcrt.get_osfhandle(stderr.fileno()) + errwrite = self._make_inheritable(errwrite) + + return (p2cread, p2cwrite, + c2pread, c2pwrite, + errread, errwrite) + + def _make_inheritable(self, handle): + """Return a duplicate of handle, which is inheritable""" + return _subprocess.DuplicateHandle( + _subprocess.GetCurrentProcess(), + handle, + _subprocess.GetCurrentProcess(), + 0, + 1, + _subprocess.DUPLICATE_SAME_ACCESS + ) + + def _find_w9xpopen(self): + """Find and return absolut path to w9xpopen.exe""" + w9xpopen = os.path.join( + os.path.dirname(_subprocess.GetModuleFileName(0)), + "w9xpopen.exe") + if not os.path.exists(w9xpopen): + # Eeek - file-not-found - possibly an embedding + # situation - see if we can locate it in sys.exec_prefix + w9xpopen = os.path.join(os.path.dirname(sys.exec_prefix), + "w9xpopen.exe") + if not os.path.exists(w9xpopen): + raise RuntimeError("Cannot locate w9xpopen.exe, which is " + "needed for Popen to work with your " + "shell or platform.") + return w9xpopen + + def _execute_child(self, args, executable, preexec_fn, close_fds, + cwd, env, universal_newlines, + startupinfo, creationflags, shell, + p2cread, p2cwrite, + c2pread, c2pwrite, + errread, errwrite): + """Execute program (MS Windows version)""" + + if not isinstance(args, types.StringTypes): + args = list2cmdline(args) + + # Process startup details + if startupinfo is None: + startupinfo = STARTUPINFO() + if None not in (p2cread, c2pwrite, errwrite): + startupinfo.dwFlags |= _subprocess.STARTF_USESTDHANDLES + startupinfo.hStdInput = p2cread + startupinfo.hStdOutput = c2pwrite + startupinfo.hStdError = errwrite + + if shell: + startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW + startupinfo.wShowWindow = _subprocess.SW_HIDE + comspec = os.environ.get("COMSPEC", "cmd.exe") + args = '{} /c "{}"'.format(comspec, args) + if (_subprocess.GetVersion() >= 0x80000000 or + os.path.basename(comspec).lower() == "command.com"): + # Win9x, or using command.com on NT. We need to + # use the w9xpopen intermediate program. For more + # information, see KB Q150956 + # (http://web.archive.org/web/20011105084002/http://support.microsoft.com/support/kb/articles/Q150/9/56.asp) + w9xpopen = self._find_w9xpopen() + args = '"%s" %s' % (w9xpopen, args) + # Not passing CREATE_NEW_CONSOLE has been known to + # cause random failures on win9x. Specifically a + # dialog: "Your program accessed mem currently in + # use at xxx" and a hopeful warning about the + # stability of your system. Cost is Ctrl+C wont + # kill children. + creationflags |= _subprocess.CREATE_NEW_CONSOLE + + # Start the process + try: + try: + hp, ht, pid, tid = _subprocess.CreateProcess( + executable, args, + # no special + # security + None, None, + int(not close_fds), + creationflags, + env, + cwd, + startupinfo) + except pywintypes.error, e: + # Translate pywintypes.error to WindowsError, which is + # a subclass of OSError. FIXME: We should really + # translate errno using _sys_errlist (or similar), but + # how can this be done from Python? + raise WindowsError(*e.args) + finally: + # Child is launched. Close the parent's copy of those pipe + # handles that only the child should have open. You need + # to make sure that no handles to the write end of the + # output pipe are maintained in this process or else the + # pipe will not close when the child process exits and the + # ReadFile will hang. + if p2cread is not None: + p2cread.Close() + if c2pwrite is not None: + c2pwrite.Close() + if errwrite is not None: + errwrite.Close() + + # Retain the process handle, but close the thread handle + self._child_created = True + self._handle = hp + self.pid = pid + ht.Close() + + def _internal_poll( + self, _deadstate=None, + _WaitForSingleObject=_subprocess.WaitForSingleObject, + _WAIT_OBJECT_0=_subprocess.WAIT_OBJECT_0, + _GetExitCodeProcess=_subprocess.GetExitCodeProcess + ): + """Check if child process has terminated. Returns returncode + attribute. + + This method is called by __del__, so it can only refer to objects + in its local scope. + + """ + if self.returncode is None: + if _WaitForSingleObject(self._handle, 0) == _WAIT_OBJECT_0: + self.returncode = _GetExitCodeProcess(self._handle) + return self.returncode + + def wait(self): + """Wait for child process to terminate. Returns returncode + attribute.""" + if self.returncode is None: + _subprocess.WaitForSingleObject(self._handle, + _subprocess.INFINITE) + self.returncode = _subprocess.GetExitCodeProcess(self._handle) + return self.returncode + + def _readerthread(self, fh, buffer): + buffer.append(fh.read()) + + def _communicate(self, input): + stdout = None # Return + stderr = None # Return + + if self.stdout: + stdout = [] + stdout_thread = threading.Thread(target=self._readerthread, + args=(self.stdout, stdout)) + stdout_thread.setDaemon(True) + stdout_thread.start() + if self.stderr: + stderr = [] + stderr_thread = threading.Thread(target=self._readerthread, + args=(self.stderr, stderr)) + stderr_thread.setDaemon(True) + stderr_thread.start() + + if self.stdin: + if input is not None: + try: + self.stdin.write(input) + except IOError, e: + if e.errno != errno.EPIPE: + raise + self.stdin.close() + + if self.stdout: + stdout_thread.join() + if self.stderr: + stderr_thread.join() + + # All data exchanged. Translate lists into strings. + if stdout is not None: + stdout = stdout[0] + if stderr is not None: + stderr = stderr[0] + + # Translate newlines, if requested. We cannot let the file + # object do the translation: It is based on stdio, which is + # impossible to combine with select (unless forcing no + # buffering). + if self.universal_newlines and hasattr(file, 'newlines'): + if stdout: + stdout = self._translate_newlines(stdout) + if stderr: + stderr = self._translate_newlines(stderr) + + self.wait() + return (stdout, stderr) + + def send_signal(self, sig): + """Send a signal to the process + """ + if sig == signal.SIGTERM: + self.terminate() + elif sig == signal.CTRL_C_EVENT: + os.kill(self.pid, signal.CTRL_C_EVENT) + elif sig == signal.CTRL_BREAK_EVENT: + os.kill(self.pid, signal.CTRL_BREAK_EVENT) + else: + raise ValueError("Unsupported signal: {}".format(sig)) + + def terminate(self): + """Terminates the process + """ + _subprocess.TerminateProcess(self._handle, 1) + + kill = terminate + + else: + # + # POSIX methods + # + def _get_handles(self, stdin, stdout, stderr): + """Construct and return tuple with IO objects: + p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite + """ + p2cread, p2cwrite = None, None + c2pread, c2pwrite = None, None + errread, errwrite = None, None + + if stdin is None: + pass + elif stdin == PIPE: + p2cread, p2cwrite = self.pipe_cloexec() + elif isinstance(stdin, int): + p2cread = stdin + else: + # Assuming file-like object + p2cread = stdin.fileno() + + if stdout is None: + pass + elif stdout == PIPE: + c2pread, c2pwrite = self.pipe_cloexec() + elif isinstance(stdout, int): + c2pwrite = stdout + else: + # Assuming file-like object + c2pwrite = stdout.fileno() + + if stderr is None: + pass + elif stderr == PIPE: + errread, errwrite = self.pipe_cloexec() + elif stderr == STDOUT: + errwrite = c2pwrite + elif isinstance(stderr, int): + errwrite = stderr + else: + # Assuming file-like object + errwrite = stderr.fileno() + + return (p2cread, p2cwrite, + c2pread, c2pwrite, + errread, errwrite) + + def _set_cloexec_flag(self, fd, cloexec=True): + try: + cloexec_flag = fcntl.FD_CLOEXEC + except AttributeError: + cloexec_flag = 1 + + old = fcntl.fcntl(fd, fcntl.F_GETFD) + if cloexec: + fcntl.fcntl(fd, fcntl.F_SETFD, old | cloexec_flag) + else: + fcntl.fcntl(fd, fcntl.F_SETFD, old & ~cloexec_flag) + + def pipe_cloexec(self): + """Create a pipe with FDs set CLOEXEC.""" + # Pipes' FDs are set CLOEXEC by default because we don't want them + # to be inherited by other subprocesses: the CLOEXEC flag is + # removed from the child's FDs by _dup2(), between fork() and + # exec(). + # This is not atomic: we would need the pipe2() syscall for that. + r, w = os.pipe() + self._set_cloexec_flag(r) + self._set_cloexec_flag(w) + return r, w + + def _close_fds(self, but): + if hasattr(os, 'closerange'): + os.closerange(3, but) + os.closerange(but + 1, MAXFD) + else: + for i in xrange(3, MAXFD): + if i == but: + continue + try: + os.close(i) + except: + pass + + def _execute_child(self, args, executable, preexec_fn, close_fds, + cwd, env, universal_newlines, + startupinfo, creationflags, shell, + p2cread, p2cwrite, + c2pread, c2pwrite, + errread, errwrite): + """Execute program (POSIX version)""" + + if isinstance(args, types.StringTypes): + args = [args] + else: + args = list(args) + + if shell: + args = ["/bin/sh", "-c"] + args + if executable: + args[0] = executable + + if executable is None: + executable = args[0] + + # For transferring possible exec failure from child to parent + # The first char specifies the exception type: 0 means + # OSError, 1 means some other error. + errpipe_read, errpipe_write = self.pipe_cloexec() + try: + try: + gc_was_enabled = gc.isenabled() + # Disable gc to avoid bug where gc -> file_dealloc -> + # write to stderr -> hang. + # http://bugs.python.org/issue1336 + gc.disable() + try: + self.pid = os.fork() + except: + if gc_was_enabled: + gc.enable() + raise + self._child_created = True + if self.pid == 0: + # Child + try: + # Close parent's pipe ends + if p2cwrite is not None: + os.close(p2cwrite) + if c2pread is not None: + os.close(c2pread) + if errread is not None: + os.close(errread) + os.close(errpipe_read) + + # When duping fds, if there arises a situation + # where one of the fds is either 0, 1 or 2, it + # is possible that it is overwritten (#12607). + if c2pwrite == 0: + c2pwrite = os.dup(c2pwrite) + if errwrite == 0 or errwrite == 1: + errwrite = os.dup(errwrite) + + # Dup fds for child + def _dup2(a, b): + # dup2() removes the CLOEXEC flag but + # we must do it ourselves if dup2() + # would be a no-op (issue #10806). + if a == b: + self._set_cloexec_flag(a, False) + elif a is not None: + os.dup2(a, b) + _dup2(p2cread, 0) + _dup2(c2pwrite, 1) + _dup2(errwrite, 2) + + # Close pipe fds. Make sure we don't close the + # same fd more than once, or standard fds. + closed = set([None]) + for fd in [p2cread, c2pwrite, errwrite]: + if fd not in closed and fd > 2: + os.close(fd) + closed.add(fd) + + # Close all other fds, if asked for + if close_fds: + self._close_fds(but=errpipe_write) + + if cwd is not None: + os.chdir(cwd) + + if preexec_fn: + preexec_fn() + + if env is None: + os.execvp(executable, args) + else: + os.execvpe(executable, args, env) + + except: + exc_type, exc_value, tb = sys.exc_info() + # Save the traceback and attach it to the exception + # object + exc_lines = traceback.format_exception(exc_type, + exc_value, + tb) + exc_value.child_traceback = ''.join(exc_lines) + os.write(errpipe_write, pickle.dumps(exc_value)) + + # This exitcode won't be reported to applications, + # so it really doesn't matter what we return. + os._exit(255) + + # Parent + if gc_was_enabled: + gc.enable() + finally: + # be sure the FD is closed no matter what + os.close(errpipe_write) + + if p2cread is not None and p2cwrite is not None: + os.close(p2cread) + if c2pwrite is not None and c2pread is not None: + os.close(c2pwrite) + if errwrite is not None and errread is not None: + os.close(errwrite) + + # Wait for exec to fail or succeed; possibly raising exception + # Exception limited to 1M + data = _eintr_retry_call(os.read, errpipe_read, 1048576) + finally: + # be sure the FD is closed no matter what + os.close(errpipe_read) + + if data != "": + try: + _eintr_retry_call(os.waitpid, self.pid, 0) + except OSError, e: + if e.errno != errno.ECHILD: + raise + child_exception = pickle.loads(data) + for fd in (p2cwrite, c2pread, errread): + if fd is not None: + os.close(fd) + raise child_exception + + def _handle_exitstatus(self, sts, _WIFSIGNALED=os.WIFSIGNALED, + _WTERMSIG=os.WTERMSIG, _WIFEXITED=os.WIFEXITED, + _WEXITSTATUS=os.WEXITSTATUS): + # This method is called (indirectly) by __del__, so it cannot + # refer to anything outside of its local scope.""" + if _WIFSIGNALED(sts): + self.returncode = -_WTERMSIG(sts) + elif _WIFEXITED(sts): + self.returncode = _WEXITSTATUS(sts) + else: + # Should never happen + raise RuntimeError("Unknown child exit status!") + + def _internal_poll(self, _deadstate=None, _waitpid=os.waitpid, + _WNOHANG=os.WNOHANG, _os_error=os.error): + """Check if child process has terminated. Returns returncode + attribute. + + This method is called by __del__, so it cannot reference anything + outside of the local scope (nor can any methods it calls). + + """ + if self.returncode is None: + try: + pid, sts = _waitpid(self.pid, _WNOHANG) + if pid == self.pid: + self._handle_exitstatus(sts) + except _os_error: + if _deadstate is not None: + self.returncode = _deadstate + return self.returncode + + def wait(self): + """Wait for child process to terminate. Returns returncode + attribute.""" + if self.returncode is None: + try: + pid, sts = _eintr_retry_call(os.waitpid, self.pid, 0) + except OSError, e: + if e.errno != errno.ECHILD: + raise + # This happens if SIGCLD is set to be ignored or waiting + # for child processes has otherwise been disabled for our + # process. This child is dead, we can't get the status. + sts = 0 + self._handle_exitstatus(sts) + return self.returncode + + def _communicate(self, input): + if self.stdin: + # Flush stdio buffer. This might block, if the user has + # been writing to .stdin in an uncontrolled fashion. + self.stdin.flush() + if not input: + self.stdin.close() + + if _has_poll: + stdout, stderr = self._communicate_with_poll(input) + else: + stdout, stderr = self._communicate_with_select(input) + + # All data exchanged. Translate lists into strings. + if stdout is not None: + stdout = ''.join(stdout) + if stderr is not None: + stderr = ''.join(stderr) + + # Translate newlines, if requested. We cannot let the file + # object do the translation: It is based on stdio, which is + # impossible to combine with select (unless forcing no + # buffering). + if self.universal_newlines and hasattr(file, 'newlines'): + if stdout: + stdout = self._translate_newlines(stdout) + if stderr: + stderr = self._translate_newlines(stderr) + + self.wait() + return (stdout, stderr) + + def _communicate_with_poll(self, input): + stdout = None # Return + stderr = None # Return + fd2file = {} + fd2output = {} + + poller = select.poll() + + def register_and_append(file_obj, eventmask): + poller.register(file_obj.fileno(), eventmask) + fd2file[file_obj.fileno()] = file_obj + + def close_unregister_and_remove(fd): + poller.unregister(fd) + fd2file[fd].close() + fd2file.pop(fd) + + if self.stdin and input: + register_and_append(self.stdin, select.POLLOUT) + + select_POLLIN_POLLPRI = select.POLLIN | select.POLLPRI + if self.stdout: + register_and_append(self.stdout, select_POLLIN_POLLPRI) + fd2output[self.stdout.fileno()] = stdout = [] + if self.stderr: + register_and_append(self.stderr, select_POLLIN_POLLPRI) + fd2output[self.stderr.fileno()] = stderr = [] + + input_offset = 0 + while fd2file: + try: + ready = poller.poll() + except select.error, e: + if e.args[0] == errno.EINTR: + continue + raise + + for fd, mode in ready: + if mode & select.POLLOUT: + chunk = input[input_offset: input_offset + _PIPE_BUF] + try: + input_offset += os.write(fd, chunk) + except OSError, e: + if e.errno == errno.EPIPE: + close_unregister_and_remove(fd) + else: + raise + else: + if input_offset >= len(input): + close_unregister_and_remove(fd) + elif mode & select_POLLIN_POLLPRI: + data = os.read(fd, 4096) + if not data: + close_unregister_and_remove(fd) + fd2output[fd].append(data) + else: + # Ignore hang up or errors. + close_unregister_and_remove(fd) + + return (stdout, stderr) + + def _communicate_with_select(self, input): + read_set = [] + write_set = [] + stdout = None # Return + stderr = None # Return + + if self.stdin and input: + write_set.append(self.stdin) + if self.stdout: + read_set.append(self.stdout) + stdout = [] + if self.stderr: + read_set.append(self.stderr) + stderr = [] + + input_offset = 0 + while read_set or write_set: + try: + rlist, wlist, xlist = select.select( + read_set, write_set, []) + except select.error, e: + if e.args[0] == errno.EINTR: + continue + raise + + if self.stdin in wlist: + chunk = input[input_offset: input_offset + _PIPE_BUF] + try: + bytes_written = os.write(self.stdin.fileno(), chunk) + except OSError, e: + if e.errno == errno.EPIPE: + self.stdin.close() + write_set.remove(self.stdin) + else: + raise + else: + input_offset += bytes_written + if input_offset >= len(input): + self.stdin.close() + write_set.remove(self.stdin) + + if self.stdout in rlist: + data = os.read(self.stdout.fileno(), 1024) + if data == "": + self.stdout.close() + read_set.remove(self.stdout) + stdout.append(data) + + if self.stderr in rlist: + data = os.read(self.stderr.fileno(), 1024) + if data == "": + self.stderr.close() + read_set.remove(self.stderr) + stderr.append(data) + + return (stdout, stderr) + + def send_signal(self, sig): + """Send a signal to the process + """ + os.kill(self.pid, sig) + + def terminate(self): + """Terminate the process with SIGTERM + """ + self.send_signal(signal.SIGTERM) + + def kill(self): + """Kill the process with SIGKILL + """ + self.send_signal(signal.SIGKILL) + + +def _demo_posix(): + # + # Example 1: Simple redirection: Get process list + # + plist = Popen(["ps"], stdout=PIPE).communicate()[0] + print "Process list:" + print plist + + # + # Example 2: Change uid before executing child + # + if os.getuid() == 0: + p = Popen(["id"], preexec_fn=lambda: os.setuid(100)) + p.wait() + + # + # Example 3: Connecting several subprocesses + # + print "Looking for 'hda'..." + p1 = Popen(["dmesg"], stdout=PIPE) + p2 = Popen(["grep", "hda"], stdin=p1.stdout, stdout=PIPE) + print repr(p2.communicate()[0]) + + # + # Example 4: Catch execution error + # + print + print "Trying a weird file..." + try: + print Popen(["/this/path/does/not/exist"]).communicate() + except OSError, e: + if e.errno == errno.ENOENT: + print "The file didn't exist. I thought so..." + print "Child traceback:" + print e.child_traceback + else: + print "Error", e.errno + else: + print >>sys.stderr, "Gosh. No error." + + +def _demo_windows(): + # + # Example 1: Connecting several subprocesses + # + print "Looking for 'PROMPT' in set output..." + p1 = Popen("set", stdout=PIPE, shell=True) + p2 = Popen('find "PROMPT"', stdin=p1.stdout, stdout=PIPE) + print repr(p2.communicate()[0]) + + # + # Example 2: Simple execution of program + # + print "Executing calc..." + p = Popen("calc") + p.wait() + + +if __name__ == "__main__": + if mswindows: + _demo_windows() + else: + _demo_posix() diff --git a/lib/cherrypy/_cpconfig.py b/lib/cherrypy/_cpconfig.py new file mode 100644 index 00000000..c11bc1d1 --- /dev/null +++ b/lib/cherrypy/_cpconfig.py @@ -0,0 +1,317 @@ +""" +Configuration system for CherryPy. + +Configuration in CherryPy is implemented via dictionaries. Keys are strings +which name the mapped value, which may be of any type. + + +Architecture +------------ + +CherryPy Requests are part of an Application, which runs in a global context, +and configuration data may apply to any of those three scopes: + +Global + Configuration entries which apply everywhere are stored in + cherrypy.config. + +Application + Entries which apply to each mounted application are stored + on the Application object itself, as 'app.config'. This is a two-level + dict where each key is a path, or "relative URL" (for example, "/" or + "/path/to/my/page"), and each value is a config dict. Usually, this + data is provided in the call to tree.mount(root(), config=conf), + although you may also use app.merge(conf). + +Request + Each Request object possesses a single 'Request.config' dict. + Early in the request process, this dict is populated by merging global + config entries, Application entries (whose path equals or is a parent + of Request.path_info), and any config acquired while looking up the + page handler (see next). + + +Declaration +----------- + +Configuration data may be supplied as a Python dictionary, as a filename, +or as an open file object. When you supply a filename or file, CherryPy +uses Python's builtin ConfigParser; you declare Application config by +writing each path as a section header:: + + [/path/to/my/page] + request.stream = True + +To declare global configuration entries, place them in a [global] section. + +You may also declare config entries directly on the classes and methods +(page handlers) that make up your CherryPy application via the ``_cp_config`` +attribute. For example:: + + class Demo: + _cp_config = {'tools.gzip.on': True} + + def index(self): + return "Hello world" + index.exposed = True + index._cp_config = {'request.show_tracebacks': False} + +.. note:: + + This behavior is only guaranteed for the default dispatcher. + Other dispatchers may have different restrictions on where + you can attach _cp_config attributes. + + +Namespaces +---------- + +Configuration keys are separated into namespaces by the first "." in the key. +Current namespaces: + +engine + Controls the 'application engine', including autoreload. + These can only be declared in the global config. + +tree + Grafts cherrypy.Application objects onto cherrypy.tree. + These can only be declared in the global config. + +hooks + Declares additional request-processing functions. + +log + Configures the logging for each application. + These can only be declared in the global or / config. + +request + Adds attributes to each Request. + +response + Adds attributes to each Response. + +server + Controls the default HTTP server via cherrypy.server. + These can only be declared in the global config. + +tools + Runs and configures additional request-processing packages. + +wsgi + Adds WSGI middleware to an Application's "pipeline". + These can only be declared in the app's root config ("/"). + +checker + Controls the 'checker', which looks for common errors in + app state (including config) when the engine starts. + Global config only. + +The only key that does not exist in a namespace is the "environment" entry. +This special entry 'imports' other config entries from a template stored in +cherrypy._cpconfig.environments[environment]. It only applies to the global +config, and only when you use cherrypy.config.update. + +You can define your own namespaces to be called at the Global, Application, +or Request level, by adding a named handler to cherrypy.config.namespaces, +app.namespaces, or app.request_class.namespaces. The name can +be any string, and the handler must be either a callable or a (Python 2.5 +style) context manager. +""" + +import cherrypy +from cherrypy._cpcompat import set, basestring +from cherrypy.lib import reprconf + +# Deprecated in CherryPy 3.2--remove in 3.3 +NamespaceSet = reprconf.NamespaceSet + + +def merge(base, other): + """Merge one app config (from a dict, file, or filename) into another. + + If the given config is a filename, it will be appended to + the list of files to monitor for "autoreload" changes. + """ + if isinstance(other, basestring): + cherrypy.engine.autoreload.files.add(other) + + # Load other into base + for section, value_map in reprconf.as_dict(other).items(): + if not isinstance(value_map, dict): + raise ValueError( + "Application config must include section headers, but the " + "config you tried to merge doesn't have any sections. " + "Wrap your config in another dict with paths as section " + "headers, for example: {'/': config}.") + base.setdefault(section, {}).update(value_map) + + +class Config(reprconf.Config): + + """The 'global' configuration data for the entire CherryPy process.""" + + def update(self, config): + """Update self from a dict, file or filename.""" + if isinstance(config, basestring): + # Filename + cherrypy.engine.autoreload.files.add(config) + reprconf.Config.update(self, config) + + def _apply(self, config): + """Update self from a dict.""" + if isinstance(config.get("global"), dict): + if len(config) > 1: + cherrypy.checker.global_config_contained_paths = True + config = config["global"] + if 'tools.staticdir.dir' in config: + config['tools.staticdir.section'] = "global" + reprconf.Config._apply(self, config) + + def __call__(self, *args, **kwargs): + """Decorator for page handlers to set _cp_config.""" + if args: + raise TypeError( + "The cherrypy.config decorator does not accept positional " + "arguments; you must use keyword arguments.") + + def tool_decorator(f): + if not hasattr(f, "_cp_config"): + f._cp_config = {} + for k, v in kwargs.items(): + f._cp_config[k] = v + return f + return tool_decorator + + +# Sphinx begin config.environments +Config.environments = environments = { + "staging": { + 'engine.autoreload.on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': False, + 'request.show_mismatched_params': False, + }, + "production": { + 'engine.autoreload.on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': False, + 'request.show_mismatched_params': False, + 'log.screen': False, + }, + "embedded": { + # For use with CherryPy embedded in another deployment stack. + 'engine.autoreload.on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': False, + 'request.show_mismatched_params': False, + 'log.screen': False, + 'engine.SIGHUP': None, + 'engine.SIGTERM': None, + }, + "test_suite": { + 'engine.autoreload.on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'request.show_tracebacks': True, + 'request.show_mismatched_params': True, + 'log.screen': False, + }, +} +# Sphinx end config.environments + + +def _server_namespace_handler(k, v): + """Config handler for the "server" namespace.""" + atoms = k.split(".", 1) + if len(atoms) > 1: + # Special-case config keys of the form 'server.servername.socket_port' + # to configure additional HTTP servers. + if not hasattr(cherrypy, "servers"): + cherrypy.servers = {} + + servername, k = atoms + if servername not in cherrypy.servers: + from cherrypy import _cpserver + cherrypy.servers[servername] = _cpserver.Server() + # On by default, but 'on = False' can unsubscribe it (see below). + cherrypy.servers[servername].subscribe() + + if k == 'on': + if v: + cherrypy.servers[servername].subscribe() + else: + cherrypy.servers[servername].unsubscribe() + else: + setattr(cherrypy.servers[servername], k, v) + else: + setattr(cherrypy.server, k, v) +Config.namespaces["server"] = _server_namespace_handler + + +def _engine_namespace_handler(k, v): + """Backward compatibility handler for the "engine" namespace.""" + engine = cherrypy.engine + + deprecated = { + 'autoreload_on': 'autoreload.on', + 'autoreload_frequency': 'autoreload.frequency', + 'autoreload_match': 'autoreload.match', + 'reload_files': 'autoreload.files', + 'deadlock_poll_freq': 'timeout_monitor.frequency' + } + + if k in deprecated: + engine.log( + 'WARNING: Use of engine.%s is deprecated and will be removed in a ' + 'future version. Use engine.%s instead.' % (k, deprecated[k])) + + if k == 'autoreload_on': + if v: + engine.autoreload.subscribe() + else: + engine.autoreload.unsubscribe() + elif k == 'autoreload_frequency': + engine.autoreload.frequency = v + elif k == 'autoreload_match': + engine.autoreload.match = v + elif k == 'reload_files': + engine.autoreload.files = set(v) + elif k == 'deadlock_poll_freq': + engine.timeout_monitor.frequency = v + elif k == 'SIGHUP': + engine.listeners['SIGHUP'] = set([v]) + elif k == 'SIGTERM': + engine.listeners['SIGTERM'] = set([v]) + elif "." in k: + plugin, attrname = k.split(".", 1) + plugin = getattr(engine, plugin) + if attrname == 'on': + if v and hasattr(getattr(plugin, 'subscribe', None), '__call__'): + plugin.subscribe() + return + elif ( + (not v) and + hasattr(getattr(plugin, 'unsubscribe', None), '__call__') + ): + plugin.unsubscribe() + return + setattr(plugin, attrname, v) + else: + setattr(engine, k, v) +Config.namespaces["engine"] = _engine_namespace_handler + + +def _tree_namespace_handler(k, v): + """Namespace handler for the 'tree' config namespace.""" + if isinstance(v, dict): + for script_name, app in v.items(): + cherrypy.tree.graft(app, script_name) + cherrypy.engine.log("Mounted: %s on %s" % + (app, script_name or "/")) + else: + cherrypy.tree.graft(v, v.script_name) + cherrypy.engine.log("Mounted: %s on %s" % (v, v.script_name or "/")) +Config.namespaces["tree"] = _tree_namespace_handler diff --git a/lib/cherrypy/_cpdispatch.py b/lib/cherrypy/_cpdispatch.py new file mode 100644 index 00000000..1c2d7df8 --- /dev/null +++ b/lib/cherrypy/_cpdispatch.py @@ -0,0 +1,686 @@ +"""CherryPy dispatchers. + +A 'dispatcher' is the object which looks up the 'page handler' callable +and collects config for the current request based on the path_info, other +request attributes, and the application architecture. The core calls the +dispatcher as early as possible, passing it a 'path_info' argument. + +The default dispatcher discovers the page handler by matching path_info +to a hierarchical arrangement of objects, starting at request.app.root. +""" + +import string +import sys +import types +try: + classtype = (type, types.ClassType) +except AttributeError: + classtype = type + +import cherrypy +from cherrypy._cpcompat import set + + +class PageHandler(object): + + """Callable which sets response.body.""" + + def __init__(self, callable, *args, **kwargs): + self.callable = callable + self.args = args + self.kwargs = kwargs + + def get_args(self): + return cherrypy.serving.request.args + + def set_args(self, args): + cherrypy.serving.request.args = args + return cherrypy.serving.request.args + + args = property( + get_args, + set_args, + doc="The ordered args should be accessible from post dispatch hooks" + ) + + def get_kwargs(self): + return cherrypy.serving.request.kwargs + + def set_kwargs(self, kwargs): + cherrypy.serving.request.kwargs = kwargs + return cherrypy.serving.request.kwargs + + kwargs = property( + get_kwargs, + set_kwargs, + doc="The named kwargs should be accessible from post dispatch hooks" + ) + + def __call__(self): + try: + return self.callable(*self.args, **self.kwargs) + except TypeError: + x = sys.exc_info()[1] + try: + test_callable_spec(self.callable, self.args, self.kwargs) + except cherrypy.HTTPError: + raise sys.exc_info()[1] + except: + raise x + raise + + +def test_callable_spec(callable, callable_args, callable_kwargs): + """ + Inspect callable and test to see if the given args are suitable for it. + + When an error occurs during the handler's invoking stage there are 2 + erroneous cases: + 1. Too many parameters passed to a function which doesn't define + one of *args or **kwargs. + 2. Too little parameters are passed to the function. + + There are 3 sources of parameters to a cherrypy handler. + 1. query string parameters are passed as keyword parameters to the + handler. + 2. body parameters are also passed as keyword parameters. + 3. when partial matching occurs, the final path atoms are passed as + positional args. + Both the query string and path atoms are part of the URI. If they are + incorrect, then a 404 Not Found should be raised. Conversely the body + parameters are part of the request; if they are invalid a 400 Bad Request. + """ + show_mismatched_params = getattr( + cherrypy.serving.request, 'show_mismatched_params', False) + try: + (args, varargs, varkw, defaults) = getargspec(callable) + except TypeError: + if isinstance(callable, object) and hasattr(callable, '__call__'): + (args, varargs, varkw, + defaults) = getargspec(callable.__call__) + else: + # If it wasn't one of our own types, re-raise + # the original error + raise + + if args and args[0] == 'self': + args = args[1:] + + arg_usage = dict([(arg, 0,) for arg in args]) + vararg_usage = 0 + varkw_usage = 0 + extra_kwargs = set() + + for i, value in enumerate(callable_args): + try: + arg_usage[args[i]] += 1 + except IndexError: + vararg_usage += 1 + + for key in callable_kwargs.keys(): + try: + arg_usage[key] += 1 + except KeyError: + varkw_usage += 1 + extra_kwargs.add(key) + + # figure out which args have defaults. + args_with_defaults = args[-len(defaults or []):] + for i, val in enumerate(defaults or []): + # Defaults take effect only when the arg hasn't been used yet. + if arg_usage[args_with_defaults[i]] == 0: + arg_usage[args_with_defaults[i]] += 1 + + missing_args = [] + multiple_args = [] + for key, usage in arg_usage.items(): + if usage == 0: + missing_args.append(key) + elif usage > 1: + multiple_args.append(key) + + if missing_args: + # In the case where the method allows body arguments + # there are 3 potential errors: + # 1. not enough query string parameters -> 404 + # 2. not enough body parameters -> 400 + # 3. not enough path parts (partial matches) -> 404 + # + # We can't actually tell which case it is, + # so I'm raising a 404 because that covers 2/3 of the + # possibilities + # + # In the case where the method does not allow body + # arguments it's definitely a 404. + message = None + if show_mismatched_params: + message = "Missing parameters: %s" % ",".join(missing_args) + raise cherrypy.HTTPError(404, message=message) + + # the extra positional arguments come from the path - 404 Not Found + if not varargs and vararg_usage > 0: + raise cherrypy.HTTPError(404) + + body_params = cherrypy.serving.request.body.params or {} + body_params = set(body_params.keys()) + qs_params = set(callable_kwargs.keys()) - body_params + + if multiple_args: + if qs_params.intersection(set(multiple_args)): + # If any of the multiple parameters came from the query string then + # it's a 404 Not Found + error = 404 + else: + # Otherwise it's a 400 Bad Request + error = 400 + + message = None + if show_mismatched_params: + message = "Multiple values for parameters: "\ + "%s" % ",".join(multiple_args) + raise cherrypy.HTTPError(error, message=message) + + if not varkw and varkw_usage > 0: + + # If there were extra query string parameters, it's a 404 Not Found + extra_qs_params = set(qs_params).intersection(extra_kwargs) + if extra_qs_params: + message = None + if show_mismatched_params: + message = "Unexpected query string "\ + "parameters: %s" % ", ".join(extra_qs_params) + raise cherrypy.HTTPError(404, message=message) + + # If there were any extra body parameters, it's a 400 Not Found + extra_body_params = set(body_params).intersection(extra_kwargs) + if extra_body_params: + message = None + if show_mismatched_params: + message = "Unexpected body parameters: "\ + "%s" % ", ".join(extra_body_params) + raise cherrypy.HTTPError(400, message=message) + + +try: + import inspect +except ImportError: + test_callable_spec = lambda callable, args, kwargs: None +else: + getargspec = inspect.getargspec + # Python 3 requires using getfullargspec if keyword-only arguments are present + if hasattr(inspect, 'getfullargspec'): + def getargspec(callable): + return inspect.getfullargspec(callable)[:4] + + +class LateParamPageHandler(PageHandler): + + """When passing cherrypy.request.params to the page handler, we do not + want to capture that dict too early; we want to give tools like the + decoding tool a chance to modify the params dict in-between the lookup + of the handler and the actual calling of the handler. This subclass + takes that into account, and allows request.params to be 'bound late' + (it's more complicated than that, but that's the effect). + """ + + def _get_kwargs(self): + kwargs = cherrypy.serving.request.params.copy() + if self._kwargs: + kwargs.update(self._kwargs) + return kwargs + + def _set_kwargs(self, kwargs): + cherrypy.serving.request.kwargs = kwargs + self._kwargs = kwargs + + kwargs = property(_get_kwargs, _set_kwargs, + doc='page handler kwargs (with ' + 'cherrypy.request.params copied in)') + + +if sys.version_info < (3, 0): + punctuation_to_underscores = string.maketrans( + string.punctuation, '_' * len(string.punctuation)) + + def validate_translator(t): + if not isinstance(t, str) or len(t) != 256: + raise ValueError( + "The translate argument must be a str of len 256.") +else: + punctuation_to_underscores = str.maketrans( + string.punctuation, '_' * len(string.punctuation)) + + def validate_translator(t): + if not isinstance(t, dict): + raise ValueError("The translate argument must be a dict.") + + +class Dispatcher(object): + + """CherryPy Dispatcher which walks a tree of objects to find a handler. + + The tree is rooted at cherrypy.request.app.root, and each hierarchical + component in the path_info argument is matched to a corresponding nested + attribute of the root object. Matching handlers must have an 'exposed' + attribute which evaluates to True. The special method name "index" + matches a URI which ends in a slash ("/"). The special method name + "default" may match a portion of the path_info (but only when no longer + substring of the path_info matches some other object). + + This is the default, built-in dispatcher for CherryPy. + """ + + dispatch_method_name = '_cp_dispatch' + """ + The name of the dispatch method that nodes may optionally implement + to provide their own dynamic dispatch algorithm. + """ + + def __init__(self, dispatch_method_name=None, + translate=punctuation_to_underscores): + validate_translator(translate) + self.translate = translate + if dispatch_method_name: + self.dispatch_method_name = dispatch_method_name + + def __call__(self, path_info): + """Set handler and config for the current request.""" + request = cherrypy.serving.request + func, vpath = self.find_handler(path_info) + + if func: + # Decode any leftover %2F in the virtual_path atoms. + vpath = [x.replace("%2F", "/") for x in vpath] + request.handler = LateParamPageHandler(func, *vpath) + else: + request.handler = cherrypy.NotFound() + + def find_handler(self, path): + """Return the appropriate page handler, plus any virtual path. + + This will return two objects. The first will be a callable, + which can be used to generate page output. Any parameters from + the query string or request body will be sent to that callable + as keyword arguments. + + The callable is found by traversing the application's tree, + starting from cherrypy.request.app.root, and matching path + components to successive objects in the tree. For example, the + URL "/path/to/handler" might return root.path.to.handler. + + The second object returned will be a list of names which are + 'virtual path' components: parts of the URL which are dynamic, + and were not used when looking up the handler. + These virtual path components are passed to the handler as + positional arguments. + """ + request = cherrypy.serving.request + app = request.app + root = app.root + dispatch_name = self.dispatch_method_name + + # Get config for the root object/path. + fullpath = [x for x in path.strip('/').split('/') if x] + ['index'] + fullpath_len = len(fullpath) + segleft = fullpath_len + nodeconf = {} + if hasattr(root, "_cp_config"): + nodeconf.update(root._cp_config) + if "/" in app.config: + nodeconf.update(app.config["/"]) + object_trail = [['root', root, nodeconf, segleft]] + + node = root + iternames = fullpath[:] + while iternames: + name = iternames[0] + # map to legal Python identifiers (e.g. replace '.' with '_') + objname = name.translate(self.translate) + + nodeconf = {} + subnode = getattr(node, objname, None) + pre_len = len(iternames) + if subnode is None: + dispatch = getattr(node, dispatch_name, None) + if dispatch and hasattr(dispatch, '__call__') and not \ + getattr(dispatch, 'exposed', False) and \ + pre_len > 1: + # Don't expose the hidden 'index' token to _cp_dispatch + # We skip this if pre_len == 1 since it makes no sense + # to call a dispatcher when we have no tokens left. + index_name = iternames.pop() + subnode = dispatch(vpath=iternames) + iternames.append(index_name) + else: + # We didn't find a path, but keep processing in case there + # is a default() handler. + iternames.pop(0) + else: + # We found the path, remove the vpath entry + iternames.pop(0) + segleft = len(iternames) + if segleft > pre_len: + # No path segment was removed. Raise an error. + raise cherrypy.CherryPyException( + "A vpath segment was added. Custom dispatchers may only " + + "remove elements. While trying to process " + + "{0} in {1}".format(name, fullpath) + ) + elif segleft == pre_len: + # Assume that the handler used the current path segment, but + # did not pop it. This allows things like + # return getattr(self, vpath[0], None) + iternames.pop(0) + segleft -= 1 + node = subnode + + if node is not None: + # Get _cp_config attached to this node. + if hasattr(node, "_cp_config"): + nodeconf.update(node._cp_config) + + # Mix in values from app.config for this path. + existing_len = fullpath_len - pre_len + if existing_len != 0: + curpath = '/' + '/'.join(fullpath[0:existing_len]) + else: + curpath = '' + new_segs = fullpath[fullpath_len - pre_len:fullpath_len - segleft] + for seg in new_segs: + curpath += '/' + seg + if curpath in app.config: + nodeconf.update(app.config[curpath]) + + object_trail.append([name, node, nodeconf, segleft]) + + def set_conf(): + """Collapse all object_trail config into cherrypy.request.config. + """ + base = cherrypy.config.copy() + # Note that we merge the config from each node + # even if that node was None. + for name, obj, conf, segleft in object_trail: + base.update(conf) + if 'tools.staticdir.dir' in conf: + base['tools.staticdir.section'] = '/' + \ + '/'.join(fullpath[0:fullpath_len - segleft]) + return base + + # Try successive objects (reverse order) + num_candidates = len(object_trail) - 1 + for i in range(num_candidates, -1, -1): + + name, candidate, nodeconf, segleft = object_trail[i] + if candidate is None: + continue + + # Try a "default" method on the current leaf. + if hasattr(candidate, "default"): + defhandler = candidate.default + if getattr(defhandler, 'exposed', False): + # Insert any extra _cp_config from the default handler. + conf = getattr(defhandler, "_cp_config", {}) + object_trail.insert( + i + 1, ["default", defhandler, conf, segleft]) + request.config = set_conf() + # See https://bitbucket.org/cherrypy/cherrypy/issue/613 + request.is_index = path.endswith("/") + return defhandler, fullpath[fullpath_len - segleft:-1] + + # Uncomment the next line to restrict positional params to + # "default". + # if i < num_candidates - 2: continue + + # Try the current leaf. + if getattr(candidate, 'exposed', False): + request.config = set_conf() + if i == num_candidates: + # We found the extra ".index". Mark request so tools + # can redirect if path_info has no trailing slash. + request.is_index = True + else: + # We're not at an 'index' handler. Mark request so tools + # can redirect if path_info has NO trailing slash. + # Note that this also includes handlers which take + # positional parameters (virtual paths). + request.is_index = False + return candidate, fullpath[fullpath_len - segleft:-1] + + # We didn't find anything + request.config = set_conf() + return None, [] + + +class MethodDispatcher(Dispatcher): + + """Additional dispatch based on cherrypy.request.method.upper(). + + Methods named GET, POST, etc will be called on an exposed class. + The method names must be all caps; the appropriate Allow header + will be output showing all capitalized method names as allowable + HTTP verbs. + + Note that the containing class must be exposed, not the methods. + """ + + def __call__(self, path_info): + """Set handler and config for the current request.""" + request = cherrypy.serving.request + resource, vpath = self.find_handler(path_info) + + if resource: + # Set Allow header + avail = [m for m in dir(resource) if m.isupper()] + if "GET" in avail and "HEAD" not in avail: + avail.append("HEAD") + avail.sort() + cherrypy.serving.response.headers['Allow'] = ", ".join(avail) + + # Find the subhandler + meth = request.method.upper() + func = getattr(resource, meth, None) + if func is None and meth == "HEAD": + func = getattr(resource, "GET", None) + if func: + # Grab any _cp_config on the subhandler. + if hasattr(func, "_cp_config"): + request.config.update(func._cp_config) + + # Decode any leftover %2F in the virtual_path atoms. + vpath = [x.replace("%2F", "/") for x in vpath] + request.handler = LateParamPageHandler(func, *vpath) + else: + request.handler = cherrypy.HTTPError(405) + else: + request.handler = cherrypy.NotFound() + + +class RoutesDispatcher(object): + + """A Routes based dispatcher for CherryPy.""" + + def __init__(self, full_result=False, **mapper_options): + """ + Routes dispatcher + + Set full_result to True if you wish the controller + and the action to be passed on to the page handler + parameters. By default they won't be. + """ + import routes + self.full_result = full_result + self.controllers = {} + self.mapper = routes.Mapper(**mapper_options) + self.mapper.controller_scan = self.controllers.keys + + def connect(self, name, route, controller, **kwargs): + self.controllers[name] = controller + self.mapper.connect(name, route, controller=name, **kwargs) + + def redirect(self, url): + raise cherrypy.HTTPRedirect(url) + + def __call__(self, path_info): + """Set handler and config for the current request.""" + func = self.find_handler(path_info) + if func: + cherrypy.serving.request.handler = LateParamPageHandler(func) + else: + cherrypy.serving.request.handler = cherrypy.NotFound() + + def find_handler(self, path_info): + """Find the right page handler, and set request.config.""" + import routes + + request = cherrypy.serving.request + + config = routes.request_config() + config.mapper = self.mapper + if hasattr(request, 'wsgi_environ'): + config.environ = request.wsgi_environ + config.host = request.headers.get('Host', None) + config.protocol = request.scheme + config.redirect = self.redirect + + result = self.mapper.match(path_info) + + config.mapper_dict = result + params = {} + if result: + params = result.copy() + if not self.full_result: + params.pop('controller', None) + params.pop('action', None) + request.params.update(params) + + # Get config for the root object/path. + request.config = base = cherrypy.config.copy() + curpath = "" + + def merge(nodeconf): + if 'tools.staticdir.dir' in nodeconf: + nodeconf['tools.staticdir.section'] = curpath or "/" + base.update(nodeconf) + + app = request.app + root = app.root + if hasattr(root, "_cp_config"): + merge(root._cp_config) + if "/" in app.config: + merge(app.config["/"]) + + # Mix in values from app.config. + atoms = [x for x in path_info.split("/") if x] + if atoms: + last = atoms.pop() + else: + last = None + for atom in atoms: + curpath = "/".join((curpath, atom)) + if curpath in app.config: + merge(app.config[curpath]) + + handler = None + if result: + controller = result.get('controller') + controller = self.controllers.get(controller, controller) + if controller: + if isinstance(controller, classtype): + controller = controller() + # Get config from the controller. + if hasattr(controller, "_cp_config"): + merge(controller._cp_config) + + action = result.get('action') + if action is not None: + handler = getattr(controller, action, None) + # Get config from the handler + if hasattr(handler, "_cp_config"): + merge(handler._cp_config) + else: + handler = controller + + # Do the last path atom here so it can + # override the controller's _cp_config. + if last: + curpath = "/".join((curpath, last)) + if curpath in app.config: + merge(app.config[curpath]) + + return handler + + +def XMLRPCDispatcher(next_dispatcher=Dispatcher()): + from cherrypy.lib import xmlrpcutil + + def xmlrpc_dispatch(path_info): + path_info = xmlrpcutil.patched_path(path_info) + return next_dispatcher(path_info) + return xmlrpc_dispatch + + +def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True, + **domains): + """ + Select a different handler based on the Host header. + + This can be useful when running multiple sites within one CP server. + It allows several domains to point to different parts of a single + website structure. For example:: + + http://www.domain.example -> root + http://www.domain2.example -> root/domain2/ + http://www.domain2.example:443 -> root/secure + + can be accomplished via the following config:: + + [/] + request.dispatch = cherrypy.dispatch.VirtualHost( + **{'www.domain2.example': '/domain2', + 'www.domain2.example:443': '/secure', + }) + + next_dispatcher + The next dispatcher object in the dispatch chain. + The VirtualHost dispatcher adds a prefix to the URL and calls + another dispatcher. Defaults to cherrypy.dispatch.Dispatcher(). + + use_x_forwarded_host + If True (the default), any "X-Forwarded-Host" + request header will be used instead of the "Host" header. This + is commonly added by HTTP servers (such as Apache) when proxying. + + ``**domains`` + A dict of {host header value: virtual prefix} pairs. + The incoming "Host" request header is looked up in this dict, + and, if a match is found, the corresponding "virtual prefix" + value will be prepended to the URL path before calling the + next dispatcher. Note that you often need separate entries + for "example.com" and "www.example.com". In addition, "Host" + headers may contain the port number. + """ + from cherrypy.lib import httputil + + def vhost_dispatch(path_info): + request = cherrypy.serving.request + header = request.headers.get + + domain = header('Host', '') + if use_x_forwarded_host: + domain = header("X-Forwarded-Host", domain) + + prefix = domains.get(domain, "") + if prefix: + path_info = httputil.urljoin(prefix, path_info) + + result = next_dispatcher(path_info) + + # Touch up staticdir config. See + # https://bitbucket.org/cherrypy/cherrypy/issue/614. + section = request.config.get('tools.staticdir.section') + if section: + section = section[len(prefix):] + request.config['tools.staticdir.section'] = section + + return result + return vhost_dispatch diff --git a/lib/cherrypy/_cperror.py b/lib/cherrypy/_cperror.py new file mode 100644 index 00000000..6256595b --- /dev/null +++ b/lib/cherrypy/_cperror.py @@ -0,0 +1,609 @@ +"""Exception classes for CherryPy. + +CherryPy provides (and uses) exceptions for declaring that the HTTP response +should be a status other than the default "200 OK". You can ``raise`` them like +normal Python exceptions. You can also call them and they will raise +themselves; this means you can set an +:class:`HTTPError` +or :class:`HTTPRedirect` as the +:attr:`request.handler`. + +.. _redirectingpost: + +Redirecting POST +================ + +When you GET a resource and are redirected by the server to another Location, +there's generally no problem since GET is both a "safe method" (there should +be no side-effects) and an "idempotent method" (multiple calls are no different +than a single call). + +POST, however, is neither safe nor idempotent--if you +charge a credit card, you don't want to be charged twice by a redirect! + +For this reason, *none* of the 3xx responses permit a user-agent (browser) to +resubmit a POST on redirection without first confirming the action with the +user: + +===== ================================= =========== +300 Multiple Choices Confirm with the user +301 Moved Permanently Confirm with the user +302 Found (Object moved temporarily) Confirm with the user +303 See Other GET the new URI--no confirmation +304 Not modified (for conditional GET only--POST should not raise this error) +305 Use Proxy Confirm with the user +307 Temporary Redirect Confirm with the user +===== ================================= =========== + +However, browsers have historically implemented these restrictions poorly; +in particular, many browsers do not force the user to confirm 301, 302 +or 307 when redirecting POST. For this reason, CherryPy defaults to 303, +which most user-agents appear to have implemented correctly. Therefore, if +you raise HTTPRedirect for a POST request, the user-agent will most likely +attempt to GET the new URI (without asking for confirmation from the user). +We realize this is confusing for developers, but it's the safest thing we +could do. You are of course free to raise ``HTTPRedirect(uri, status=302)`` +or any other 3xx status if you know what you're doing, but given the +environment, we couldn't let any of those be the default. + +Custom Error Handling +===================== + +.. image:: /refman/cperrors.gif + +Anticipated HTTP responses +-------------------------- + +The 'error_page' config namespace can be used to provide custom HTML output for +expected responses (like 404 Not Found). Supply a filename from which the +output will be read. The contents will be interpolated with the values +%(status)s, %(message)s, %(traceback)s, and %(version)s using plain old Python +`string formatting `_. + +:: + + _cp_config = { + 'error_page.404': os.path.join(localDir, "static/index.html") + } + + +Beginning in version 3.1, you may also provide a function or other callable as +an error_page entry. It will be passed the same status, message, traceback and +version arguments that are interpolated into templates:: + + def error_page_402(status, message, traceback, version): + return "Error %s - Well, I'm very sorry but you haven't paid!" % status + cherrypy.config.update({'error_page.402': error_page_402}) + +Also in 3.1, in addition to the numbered error codes, you may also supply +"error_page.default" to handle all codes which do not have their own error_page +entry. + + + +Unanticipated errors +-------------------- + +CherryPy also has a generic error handling mechanism: whenever an unanticipated +error occurs in your code, it will call +:func:`Request.error_response` to +set the response status, headers, and body. By default, this is the same +output as +:class:`HTTPError(500) `. If you want to provide +some other behavior, you generally replace "request.error_response". + +Here is some sample code that shows how to display a custom error message and +send an e-mail containing the error:: + + from cherrypy import _cperror + + def handle_error(): + cherrypy.response.status = 500 + cherrypy.response.body = [ + "Sorry, an error occured" + ] + sendMail('error@domain.com', + 'Error in your web app', + _cperror.format_exc()) + + class Root: + _cp_config = {'request.error_response': handle_error} + + +Note that you have to explicitly set +:attr:`response.body ` +and not simply return an error message as a result. +""" + +from cgi import escape as _escape +from sys import exc_info as _exc_info +from traceback import format_exception as _format_exception +from cherrypy._cpcompat import basestring, bytestr, iteritems, ntob +from cherrypy._cpcompat import tonative, urljoin as _urljoin +from cherrypy.lib import httputil as _httputil + + +class CherryPyException(Exception): + + """A base class for CherryPy exceptions.""" + pass + + +class TimeoutError(CherryPyException): + + """Exception raised when Response.timed_out is detected.""" + pass + + +class InternalRedirect(CherryPyException): + + """Exception raised to switch to the handler for a different URL. + + This exception will redirect processing to another path within the site + (without informing the client). Provide the new path as an argument when + raising the exception. Provide any params in the querystring for the new + URL. + """ + + def __init__(self, path, query_string=""): + import cherrypy + self.request = cherrypy.serving.request + + self.query_string = query_string + if "?" in path: + # Separate any params included in the path + path, self.query_string = path.split("?", 1) + + # Note that urljoin will "do the right thing" whether url is: + # 1. a URL relative to root (e.g. "/dummy") + # 2. a URL relative to the current path + # Note that any query string will be discarded. + path = _urljoin(self.request.path_info, path) + + # Set a 'path' member attribute so that code which traps this + # error can have access to it. + self.path = path + + CherryPyException.__init__(self, path, self.query_string) + + +class HTTPRedirect(CherryPyException): + + """Exception raised when the request should be redirected. + + This exception will force a HTTP redirect to the URL or URL's you give it. + The new URL must be passed as the first argument to the Exception, + e.g., HTTPRedirect(newUrl). Multiple URLs are allowed in a list. + If a URL is absolute, it will be used as-is. If it is relative, it is + assumed to be relative to the current cherrypy.request.path_info. + + If one of the provided URL is a unicode object, it will be encoded + using the default encoding or the one passed in parameter. + + There are multiple types of redirect, from which you can select via the + ``status`` argument. If you do not provide a ``status`` arg, it defaults to + 303 (or 302 if responding with HTTP/1.0). + + Examples:: + + raise cherrypy.HTTPRedirect("") + raise cherrypy.HTTPRedirect("/abs/path", 307) + raise cherrypy.HTTPRedirect(["path1", "path2?a=1&b=2"], 301) + + See :ref:`redirectingpost` for additional caveats. + """ + + status = None + """The integer HTTP status code to emit.""" + + urls = None + """The list of URL's to emit.""" + + encoding = 'utf-8' + """The encoding when passed urls are not native strings""" + + def __init__(self, urls, status=None, encoding=None): + import cherrypy + request = cherrypy.serving.request + + if isinstance(urls, basestring): + urls = [urls] + + abs_urls = [] + for url in urls: + url = tonative(url, encoding or self.encoding) + + # Note that urljoin will "do the right thing" whether url is: + # 1. a complete URL with host (e.g. "http://www.example.com/test") + # 2. a URL relative to root (e.g. "/dummy") + # 3. a URL relative to the current path + # Note that any query string in cherrypy.request is discarded. + url = _urljoin(cherrypy.url(), url) + abs_urls.append(url) + self.urls = abs_urls + + # RFC 2616 indicates a 301 response code fits our goal; however, + # browser support for 301 is quite messy. Do 302/303 instead. See + # http://www.alanflavell.org.uk/www/post-redirect.html + if status is None: + if request.protocol >= (1, 1): + status = 303 + else: + status = 302 + else: + status = int(status) + if status < 300 or status > 399: + raise ValueError("status must be between 300 and 399.") + + self.status = status + CherryPyException.__init__(self, abs_urls, status) + + def set_response(self): + """Modify cherrypy.response status, headers, and body to represent + self. + + CherryPy uses this internally, but you can also use it to create an + HTTPRedirect object and set its output without *raising* the exception. + """ + import cherrypy + response = cherrypy.serving.response + response.status = status = self.status + + if status in (300, 301, 302, 303, 307): + response.headers['Content-Type'] = "text/html;charset=utf-8" + # "The ... URI SHOULD be given by the Location field + # in the response." + response.headers['Location'] = self.urls[0] + + # "Unless the request method was HEAD, the entity of the response + # SHOULD contain a short hypertext note with a hyperlink to the + # new URI(s)." + msg = { + 300: "This resource can be found at ", + 301: "This resource has permanently moved to ", + 302: "This resource resides temporarily at ", + 303: "This resource can be found at ", + 307: "This resource has moved temporarily to ", + }[status] + msg += '%s.' + from xml.sax import saxutils + msgs = [msg % (saxutils.quoteattr(u), u) for u in self.urls] + response.body = ntob("
\n".join(msgs), 'utf-8') + # Previous code may have set C-L, so we have to reset it + # (allow finalize to set it). + response.headers.pop('Content-Length', None) + elif status == 304: + # Not Modified. + # "The response MUST include the following header fields: + # Date, unless its omission is required by section 14.18.1" + # The "Date" header should have been set in Response.__init__ + + # "...the response SHOULD NOT include other entity-headers." + for key in ('Allow', 'Content-Encoding', 'Content-Language', + 'Content-Length', 'Content-Location', 'Content-MD5', + 'Content-Range', 'Content-Type', 'Expires', + 'Last-Modified'): + if key in response.headers: + del response.headers[key] + + # "The 304 response MUST NOT contain a message-body." + response.body = None + # Previous code may have set C-L, so we have to reset it. + response.headers.pop('Content-Length', None) + elif status == 305: + # Use Proxy. + # self.urls[0] should be the URI of the proxy. + response.headers['Location'] = self.urls[0] + response.body = None + # Previous code may have set C-L, so we have to reset it. + response.headers.pop('Content-Length', None) + else: + raise ValueError("The %s status code is unknown." % status) + + def __call__(self): + """Use this exception as a request.handler (raise self).""" + raise self + + +def clean_headers(status): + """Remove any headers which should not apply to an error response.""" + import cherrypy + + response = cherrypy.serving.response + + # Remove headers which applied to the original content, + # but do not apply to the error page. + respheaders = response.headers + for key in ["Accept-Ranges", "Age", "ETag", "Location", "Retry-After", + "Vary", "Content-Encoding", "Content-Length", "Expires", + "Content-Location", "Content-MD5", "Last-Modified"]: + if key in respheaders: + del respheaders[key] + + if status != 416: + # A server sending a response with status code 416 (Requested + # range not satisfiable) SHOULD include a Content-Range field + # with a byte-range-resp-spec of "*". The instance-length + # specifies the current length of the selected resource. + # A response with status code 206 (Partial Content) MUST NOT + # include a Content-Range field with a byte-range- resp-spec of "*". + if "Content-Range" in respheaders: + del respheaders["Content-Range"] + + +class HTTPError(CherryPyException): + + """Exception used to return an HTTP error code (4xx-5xx) to the client. + + This exception can be used to automatically send a response using a + http status code, with an appropriate error page. It takes an optional + ``status`` argument (which must be between 400 and 599); it defaults to 500 + ("Internal Server Error"). It also takes an optional ``message`` argument, + which will be returned in the response body. See + `RFC2616 `_ + for a complete list of available error codes and when to use them. + + Examples:: + + raise cherrypy.HTTPError(403) + raise cherrypy.HTTPError( + "403 Forbidden", "You are not allowed to access this resource.") + """ + + status = None + """The HTTP status code. May be of type int or str (with a Reason-Phrase). + """ + + code = None + """The integer HTTP status code.""" + + reason = None + """The HTTP Reason-Phrase string.""" + + def __init__(self, status=500, message=None): + self.status = status + try: + self.code, self.reason, defaultmsg = _httputil.valid_status(status) + except ValueError: + raise self.__class__(500, _exc_info()[1].args[0]) + + if self.code < 400 or self.code > 599: + raise ValueError("status must be between 400 and 599.") + + # See http://www.python.org/dev/peps/pep-0352/ + # self.message = message + self._message = message or defaultmsg + CherryPyException.__init__(self, status, message) + + def set_response(self): + """Modify cherrypy.response status, headers, and body to represent + self. + + CherryPy uses this internally, but you can also use it to create an + HTTPError object and set its output without *raising* the exception. + """ + import cherrypy + + response = cherrypy.serving.response + + clean_headers(self.code) + + # In all cases, finalize will be called after this method, + # so don't bother cleaning up response values here. + response.status = self.status + tb = None + if cherrypy.serving.request.show_tracebacks: + tb = format_exc() + + response.headers.pop('Content-Length', None) + + content = self.get_error_page(self.status, traceback=tb, + message=self._message) + response.body = content + + _be_ie_unfriendly(self.code) + + def get_error_page(self, *args, **kwargs): + return get_error_page(*args, **kwargs) + + def __call__(self): + """Use this exception as a request.handler (raise self).""" + raise self + + +class NotFound(HTTPError): + + """Exception raised when a URL could not be mapped to any handler (404). + + This is equivalent to raising + :class:`HTTPError("404 Not Found") `. + """ + + def __init__(self, path=None): + if path is None: + import cherrypy + request = cherrypy.serving.request + path = request.script_name + request.path_info + self.args = (path,) + HTTPError.__init__(self, 404, "The path '%s' was not found." % path) + + +_HTTPErrorTemplate = ''' + + + + %(status)s + + + +

%(status)s

+

%(message)s

+
%(traceback)s
+
+ + Powered by CherryPy %(version)s + +
+ + +''' + + +def get_error_page(status, **kwargs): + """Return an HTML page, containing a pretty error response. + + status should be an int or a str. + kwargs will be interpolated into the page template. + """ + import cherrypy + + try: + code, reason, message = _httputil.valid_status(status) + except ValueError: + raise cherrypy.HTTPError(500, _exc_info()[1].args[0]) + + # We can't use setdefault here, because some + # callers send None for kwarg values. + if kwargs.get('status') is None: + kwargs['status'] = "%s %s" % (code, reason) + if kwargs.get('message') is None: + kwargs['message'] = message + if kwargs.get('traceback') is None: + kwargs['traceback'] = '' + if kwargs.get('version') is None: + kwargs['version'] = cherrypy.__version__ + + for k, v in iteritems(kwargs): + if v is None: + kwargs[k] = "" + else: + kwargs[k] = _escape(kwargs[k]) + + # Use a custom template or callable for the error page? + pages = cherrypy.serving.request.error_page + error_page = pages.get(code) or pages.get('default') + + # Default template, can be overridden below. + template = _HTTPErrorTemplate + if error_page: + try: + if hasattr(error_page, '__call__'): + # The caller function may be setting headers manually, + # so we delegate to it completely. We may be returning + # an iterator as well as a string here. + # + # We *must* make sure any content is not unicode. + result = error_page(**kwargs) + if cherrypy.lib.is_iterator(result): + from cherrypy.lib.encoding import UTF8StreamEncoder + return UTF8StreamEncoder(result) + elif isinstance(result, cherrypy._cpcompat.unicodestr): + return result.encode('utf-8') + else: + if not isinstance(result, cherrypy._cpcompat.bytestr): + raise ValueError('error page function did not ' + 'return a bytestring, unicodestring or an ' + 'iterator - returned object of type %s.' + % (type(result).__name__)) + return result + else: + # Load the template from this path. + template = tonative(open(error_page, 'rb').read()) + except: + e = _format_exception(*_exc_info())[-1] + m = kwargs['message'] + if m: + m += "
" + m += "In addition, the custom error page failed:\n
%s" % e + kwargs['message'] = m + + response = cherrypy.serving.response + response.headers['Content-Type'] = "text/html;charset=utf-8" + result = template % kwargs + return result.encode('utf-8') + + + +_ie_friendly_error_sizes = { + 400: 512, 403: 256, 404: 512, 405: 256, + 406: 512, 408: 512, 409: 512, 410: 256, + 500: 512, 501: 512, 505: 512, +} + + +def _be_ie_unfriendly(status): + import cherrypy + response = cherrypy.serving.response + + # For some statuses, Internet Explorer 5+ shows "friendly error + # messages" instead of our response.body if the body is smaller + # than a given size. Fix this by returning a body over that size + # (by adding whitespace). + # See http://support.microsoft.com/kb/q218155/ + s = _ie_friendly_error_sizes.get(status, 0) + if s: + s += 1 + # Since we are issuing an HTTP error status, we assume that + # the entity is short, and we should just collapse it. + content = response.collapse_body() + l = len(content) + if l and l < s: + # IN ADDITION: the response must be written to IE + # in one chunk or it will still get replaced! Bah. + content = content + (ntob(" ") * (s - l)) + response.body = content + response.headers['Content-Length'] = str(len(content)) + + +def format_exc(exc=None): + """Return exc (or sys.exc_info if None), formatted.""" + try: + if exc is None: + exc = _exc_info() + if exc == (None, None, None): + return "" + import traceback + return "".join(traceback.format_exception(*exc)) + finally: + del exc + + +def bare_error(extrabody=None): + """Produce status, headers, body for a critical error. + + Returns a triple without calling any other questionable functions, + so it should be as error-free as possible. Call it from an HTTP server + if you get errors outside of the request. + + If extrabody is None, a friendly but rather unhelpful error message + is set in the body. If extrabody is a string, it will be appended + as-is to the body. + """ + + # The whole point of this function is to be a last line-of-defense + # in handling errors. That is, it must not raise any errors itself; + # it cannot be allowed to fail. Therefore, don't add to it! + # In particular, don't call any other CP functions. + + body = ntob("Unrecoverable error in the server.") + if extrabody is not None: + if not isinstance(extrabody, bytestr): + extrabody = extrabody.encode('utf-8') + body += ntob("\n") + extrabody + + return (ntob("500 Internal Server Error"), + [(ntob('Content-Type'), ntob('text/plain')), + (ntob('Content-Length'), ntob(str(len(body)), 'ISO-8859-1'))], + [body]) diff --git a/lib/cherrypy/_cplogging.py b/lib/cherrypy/_cplogging.py new file mode 100644 index 00000000..554fd7ef --- /dev/null +++ b/lib/cherrypy/_cplogging.py @@ -0,0 +1,459 @@ +""" +Simple config +============= + +Although CherryPy uses the :mod:`Python logging module `, it does so +behind the scenes so that simple logging is simple, but complicated logging +is still possible. "Simple" logging means that you can log to the screen +(i.e. console/stdout) or to a file, and that you can easily have separate +error and access log files. + +Here are the simplified logging settings. You use these by adding lines to +your config file or dict. You should set these at either the global level or +per application (see next), but generally not both. + + * ``log.screen``: Set this to True to have both "error" and "access" messages + printed to stdout. + * ``log.access_file``: Set this to an absolute filename where you want + "access" messages written. + * ``log.error_file``: Set this to an absolute filename where you want "error" + messages written. + +Many events are automatically logged; to log your own application events, call +:func:`cherrypy.log`. + +Architecture +============ + +Separate scopes +--------------- + +CherryPy provides log managers at both the global and application layers. +This means you can have one set of logging rules for your entire site, +and another set of rules specific to each application. The global log +manager is found at :func:`cherrypy.log`, and the log manager for each +application is found at :attr:`app.log`. +If you're inside a request, the latter is reachable from +``cherrypy.request.app.log``; if you're outside a request, you'll have to +obtain a reference to the ``app``: either the return value of +:func:`tree.mount()` or, if you used +:func:`quickstart()` instead, via +``cherrypy.tree.apps['/']``. + +By default, the global logs are named "cherrypy.error" and "cherrypy.access", +and the application logs are named "cherrypy.error.2378745" and +"cherrypy.access.2378745" (the number is the id of the Application object). +This means that the application logs "bubble up" to the site logs, so if your +application has no log handlers, the site-level handlers will still log the +messages. + +Errors vs. Access +----------------- + +Each log manager handles both "access" messages (one per HTTP request) and +"error" messages (everything else). Note that the "error" log is not just for +errors! The format of access messages is highly formalized, but the error log +isn't--it receives messages from a variety of sources (including full error +tracebacks, if enabled). + +If you are logging the access log and error log to the same source, then there +is a possibility that a specially crafted error message may replicate an access +log message as described in CWE-117. In this case it is the application +developer's responsibility to manually escape data before using CherryPy's log() +functionality, or they may create an application that is vulnerable to CWE-117. +This would be achieved by using a custom handler escape any special characters, +and attached as described below. + +Custom Handlers +=============== + +The simple settings above work by manipulating Python's standard :mod:`logging` +module. So when you need something more complex, the full power of the standard +module is yours to exploit. You can borrow or create custom handlers, formats, +filters, and much more. Here's an example that skips the standard FileHandler +and uses a RotatingFileHandler instead: + +:: + + #python + log = app.log + + # Remove the default FileHandlers if present. + log.error_file = "" + log.access_file = "" + + maxBytes = getattr(log, "rot_maxBytes", 10000000) + backupCount = getattr(log, "rot_backupCount", 1000) + + # Make a new RotatingFileHandler for the error log. + fname = getattr(log, "rot_error_file", "error.log") + h = handlers.RotatingFileHandler(fname, 'a', maxBytes, backupCount) + h.setLevel(DEBUG) + h.setFormatter(_cplogging.logfmt) + log.error_log.addHandler(h) + + # Make a new RotatingFileHandler for the access log. + fname = getattr(log, "rot_access_file", "access.log") + h = handlers.RotatingFileHandler(fname, 'a', maxBytes, backupCount) + h.setLevel(DEBUG) + h.setFormatter(_cplogging.logfmt) + log.access_log.addHandler(h) + + +The ``rot_*`` attributes are pulled straight from the application log object. +Since "log.*" config entries simply set attributes on the log object, you can +add custom attributes to your heart's content. Note that these handlers are +used ''instead'' of the default, simple handlers outlined above (so don't set +the "log.error_file" config entry, for example). +""" + +import datetime +import logging +# Silence the no-handlers "warning" (stderr write!) in stdlib logging +logging.Logger.manager.emittedNoHandlerWarning = 1 +logfmt = logging.Formatter("%(message)s") +import os +import sys + +import cherrypy +from cherrypy import _cperror +from cherrypy._cpcompat import ntob, py3k + + +class NullHandler(logging.Handler): + + """A no-op logging handler to silence the logging.lastResort handler.""" + + def handle(self, record): + pass + + def emit(self, record): + pass + + def createLock(self): + self.lock = None + + +class LogManager(object): + + """An object to assist both simple and advanced logging. + + ``cherrypy.log`` is an instance of this class. + """ + + appid = None + """The id() of the Application object which owns this log manager. If this + is a global log manager, appid is None.""" + + error_log = None + """The actual :class:`logging.Logger` instance for error messages.""" + + access_log = None + """The actual :class:`logging.Logger` instance for access messages.""" + + if py3k: + access_log_format = \ + '{h} {l} {u} {t} "{r}" {s} {b} "{f}" "{a}"' + else: + access_log_format = \ + '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"' + + logger_root = None + """The "top-level" logger name. + + This string will be used as the first segment in the Logger names. + The default is "cherrypy", for example, in which case the Logger names + will be of the form:: + + cherrypy.error. + cherrypy.access. + """ + + def __init__(self, appid=None, logger_root="cherrypy"): + self.logger_root = logger_root + self.appid = appid + if appid is None: + self.error_log = logging.getLogger("%s.error" % logger_root) + self.access_log = logging.getLogger("%s.access" % logger_root) + else: + self.error_log = logging.getLogger( + "%s.error.%s" % (logger_root, appid)) + self.access_log = logging.getLogger( + "%s.access.%s" % (logger_root, appid)) + self.error_log.setLevel(logging.INFO) + self.access_log.setLevel(logging.INFO) + + # Silence the no-handlers "warning" (stderr write!) in stdlib logging + self.error_log.addHandler(NullHandler()) + self.access_log.addHandler(NullHandler()) + + cherrypy.engine.subscribe('graceful', self.reopen_files) + + def reopen_files(self): + """Close and reopen all file handlers.""" + for log in (self.error_log, self.access_log): + for h in log.handlers: + if isinstance(h, logging.FileHandler): + h.acquire() + h.stream.close() + h.stream = open(h.baseFilename, h.mode) + h.release() + + def error(self, msg='', context='', severity=logging.INFO, + traceback=False): + """Write the given ``msg`` to the error log. + + This is not just for errors! Applications may call this at any time + to log application-specific information. + + If ``traceback`` is True, the traceback of the current exception + (if any) will be appended to ``msg``. + """ + if traceback: + msg += _cperror.format_exc() + self.error_log.log(severity, ' '.join((self.time(), context, msg))) + + def __call__(self, *args, **kwargs): + """An alias for ``error``.""" + return self.error(*args, **kwargs) + + def access(self): + """Write to the access log (in Apache/NCSA Combined Log format). + + See the + `apache documentation `_ + for format details. + + CherryPy calls this automatically for you. Note there are no arguments; + it collects the data itself from + :class:`cherrypy.request`. + + Like Apache started doing in 2.0.46, non-printable and other special + characters in %r (and we expand that to all parts) are escaped using + \\xhh sequences, where hh stands for the hexadecimal representation + of the raw byte. Exceptions from this rule are " and \\, which are + escaped by prepending a backslash, and all whitespace characters, + which are written in their C-style notation (\\n, \\t, etc). + """ + request = cherrypy.serving.request + remote = request.remote + response = cherrypy.serving.response + outheaders = response.headers + inheaders = request.headers + if response.output_status is None: + status = "-" + else: + status = response.output_status.split(ntob(" "), 1)[0] + if py3k: + status = status.decode('ISO-8859-1') + + atoms = {'h': remote.name or remote.ip, + 'l': '-', + 'u': getattr(request, "login", None) or "-", + 't': self.time(), + 'r': request.request_line, + 's': status, + 'b': dict.get(outheaders, 'Content-Length', '') or "-", + 'f': dict.get(inheaders, 'Referer', ''), + 'a': dict.get(inheaders, 'User-Agent', ''), + 'o': dict.get(inheaders, 'Host', '-'), + } + if py3k: + for k, v in atoms.items(): + if not isinstance(v, str): + v = str(v) + v = v.replace('"', '\\"').encode('utf8') + # Fortunately, repr(str) escapes unprintable chars, \n, \t, etc + # and backslash for us. All we have to do is strip the quotes. + v = repr(v)[2:-1] + + # in python 3.0 the repr of bytes (as returned by encode) + # uses double \'s. But then the logger escapes them yet, again + # resulting in quadruple slashes. Remove the extra one here. + v = v.replace('\\\\', '\\') + + # Escape double-quote. + atoms[k] = v + + try: + self.access_log.log( + logging.INFO, self.access_log_format.format(**atoms)) + except: + self(traceback=True) + else: + for k, v in atoms.items(): + if isinstance(v, unicode): + v = v.encode('utf8') + elif not isinstance(v, str): + v = str(v) + # Fortunately, repr(str) escapes unprintable chars, \n, \t, etc + # and backslash for us. All we have to do is strip the quotes. + v = repr(v)[1:-1] + # Escape double-quote. + atoms[k] = v.replace('"', '\\"') + + try: + self.access_log.log( + logging.INFO, self.access_log_format % atoms) + except: + self(traceback=True) + + def time(self): + """Return now() in Apache Common Log Format (no timezone).""" + now = datetime.datetime.now() + monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', + 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] + month = monthnames[now.month - 1].capitalize() + return ('[%02d/%s/%04d:%02d:%02d:%02d]' % + (now.day, month, now.year, now.hour, now.minute, now.second)) + + def _get_builtin_handler(self, log, key): + for h in log.handlers: + if getattr(h, "_cpbuiltin", None) == key: + return h + + # ------------------------- Screen handlers ------------------------- # + def _set_screen_handler(self, log, enable, stream=None): + h = self._get_builtin_handler(log, "screen") + if enable: + if not h: + if stream is None: + stream = sys.stderr + h = logging.StreamHandler(stream) + h.setFormatter(logfmt) + h._cpbuiltin = "screen" + log.addHandler(h) + elif h: + log.handlers.remove(h) + + def _get_screen(self): + h = self._get_builtin_handler + has_h = h(self.error_log, "screen") or h(self.access_log, "screen") + return bool(has_h) + + def _set_screen(self, newvalue): + self._set_screen_handler(self.error_log, newvalue, stream=sys.stderr) + self._set_screen_handler(self.access_log, newvalue, stream=sys.stdout) + screen = property(_get_screen, _set_screen, + doc="""Turn stderr/stdout logging on or off. + + If you set this to True, it'll add the appropriate StreamHandler for + you. If you set it to False, it will remove the handler. + """) + + # -------------------------- File handlers -------------------------- # + + def _add_builtin_file_handler(self, log, fname): + h = logging.FileHandler(fname) + h.setFormatter(logfmt) + h._cpbuiltin = "file" + log.addHandler(h) + + def _set_file_handler(self, log, filename): + h = self._get_builtin_handler(log, "file") + if filename: + if h: + if h.baseFilename != os.path.abspath(filename): + h.close() + log.handlers.remove(h) + self._add_builtin_file_handler(log, filename) + else: + self._add_builtin_file_handler(log, filename) + else: + if h: + h.close() + log.handlers.remove(h) + + def _get_error_file(self): + h = self._get_builtin_handler(self.error_log, "file") + if h: + return h.baseFilename + return '' + + def _set_error_file(self, newvalue): + self._set_file_handler(self.error_log, newvalue) + error_file = property(_get_error_file, _set_error_file, + doc="""The filename for self.error_log. + + If you set this to a string, it'll add the appropriate FileHandler for + you. If you set it to ``None`` or ``''``, it will remove the handler. + """) + + def _get_access_file(self): + h = self._get_builtin_handler(self.access_log, "file") + if h: + return h.baseFilename + return '' + + def _set_access_file(self, newvalue): + self._set_file_handler(self.access_log, newvalue) + access_file = property(_get_access_file, _set_access_file, + doc="""The filename for self.access_log. + + If you set this to a string, it'll add the appropriate FileHandler for + you. If you set it to ``None`` or ``''``, it will remove the handler. + """) + + # ------------------------- WSGI handlers ------------------------- # + + def _set_wsgi_handler(self, log, enable): + h = self._get_builtin_handler(log, "wsgi") + if enable: + if not h: + h = WSGIErrorHandler() + h.setFormatter(logfmt) + h._cpbuiltin = "wsgi" + log.addHandler(h) + elif h: + log.handlers.remove(h) + + def _get_wsgi(self): + return bool(self._get_builtin_handler(self.error_log, "wsgi")) + + def _set_wsgi(self, newvalue): + self._set_wsgi_handler(self.error_log, newvalue) + wsgi = property(_get_wsgi, _set_wsgi, + doc="""Write errors to wsgi.errors. + + If you set this to True, it'll add the appropriate + :class:`WSGIErrorHandler` for you + (which writes errors to ``wsgi.errors``). + If you set it to False, it will remove the handler. + """) + + +class WSGIErrorHandler(logging.Handler): + + "A handler class which writes logging records to environ['wsgi.errors']." + + def flush(self): + """Flushes the stream.""" + try: + stream = cherrypy.serving.request.wsgi_environ.get('wsgi.errors') + except (AttributeError, KeyError): + pass + else: + stream.flush() + + def emit(self, record): + """Emit a record.""" + try: + stream = cherrypy.serving.request.wsgi_environ.get('wsgi.errors') + except (AttributeError, KeyError): + pass + else: + try: + msg = self.format(record) + fs = "%s\n" + import types + # if no unicode support... + if not hasattr(types, "UnicodeType"): + stream.write(fs % msg) + else: + try: + stream.write(fs % msg) + except UnicodeError: + stream.write(fs % msg.encode("UTF-8")) + self.flush() + except: + self.handleError(record) diff --git a/lib/cherrypy/_cpmodpy.py b/lib/cherrypy/_cpmodpy.py new file mode 100644 index 00000000..02154d69 --- /dev/null +++ b/lib/cherrypy/_cpmodpy.py @@ -0,0 +1,353 @@ +"""Native adapter for serving CherryPy via mod_python + +Basic usage: + +########################################## +# Application in a module called myapp.py +########################################## + +import cherrypy + +class Root: + @cherrypy.expose + def index(self): + return 'Hi there, Ho there, Hey there' + + +# We will use this method from the mod_python configuration +# as the entry point to our application +def setup_server(): + cherrypy.tree.mount(Root()) + cherrypy.config.update({'environment': 'production', + 'log.screen': False, + 'show_tracebacks': False}) + +########################################## +# mod_python settings for apache2 +# This should reside in your httpd.conf +# or a file that will be loaded at +# apache startup +########################################## + +# Start +DocumentRoot "/" +Listen 8080 +LoadModule python_module /usr/lib/apache2/modules/mod_python.so + + + PythonPath "sys.path+['/path/to/my/application']" + SetHandler python-program + PythonHandler cherrypy._cpmodpy::handler + PythonOption cherrypy.setup myapp::setup_server + PythonDebug On + +# End + +The actual path to your mod_python.so is dependent on your +environment. In this case we suppose a global mod_python +installation on a Linux distribution such as Ubuntu. + +We do set the PythonPath configuration setting so that +your application can be found by from the user running +the apache2 instance. Of course if your application +resides in the global site-package this won't be needed. + +Then restart apache2 and access http://127.0.0.1:8080 +""" + +import logging +import sys + +import cherrypy +from cherrypy._cpcompat import BytesIO, copyitems, ntob +from cherrypy._cperror import format_exc, bare_error +from cherrypy.lib import httputil + + +# ------------------------------ Request-handling + + +def setup(req): + from mod_python import apache + + # Run any setup functions defined by a "PythonOption cherrypy.setup" + # directive. + options = req.get_options() + if 'cherrypy.setup' in options: + for function in options['cherrypy.setup'].split(): + atoms = function.split('::', 1) + if len(atoms) == 1: + mod = __import__(atoms[0], globals(), locals()) + else: + modname, fname = atoms + mod = __import__(modname, globals(), locals(), [fname]) + func = getattr(mod, fname) + func() + + cherrypy.config.update({'log.screen': False, + "tools.ignore_headers.on": True, + "tools.ignore_headers.headers": ['Range'], + }) + + engine = cherrypy.engine + if hasattr(engine, "signal_handler"): + engine.signal_handler.unsubscribe() + if hasattr(engine, "console_control_handler"): + engine.console_control_handler.unsubscribe() + engine.autoreload.unsubscribe() + cherrypy.server.unsubscribe() + + def _log(msg, level): + newlevel = apache.APLOG_ERR + if logging.DEBUG >= level: + newlevel = apache.APLOG_DEBUG + elif logging.INFO >= level: + newlevel = apache.APLOG_INFO + elif logging.WARNING >= level: + newlevel = apache.APLOG_WARNING + # On Windows, req.server is required or the msg will vanish. See + # http://www.modpython.org/pipermail/mod_python/2003-October/014291.html + # Also, "When server is not specified...LogLevel does not apply..." + apache.log_error(msg, newlevel, req.server) + engine.subscribe('log', _log) + + engine.start() + + def cherrypy_cleanup(data): + engine.exit() + try: + # apache.register_cleanup wasn't available until 3.1.4. + apache.register_cleanup(cherrypy_cleanup) + except AttributeError: + req.server.register_cleanup(req, cherrypy_cleanup) + + +class _ReadOnlyRequest: + expose = ('read', 'readline', 'readlines') + + def __init__(self, req): + for method in self.expose: + self.__dict__[method] = getattr(req, method) + + +recursive = False + +_isSetUp = False + + +def handler(req): + from mod_python import apache + try: + global _isSetUp + if not _isSetUp: + setup(req) + _isSetUp = True + + # Obtain a Request object from CherryPy + local = req.connection.local_addr + local = httputil.Host( + local[0], local[1], req.connection.local_host or "") + remote = req.connection.remote_addr + remote = httputil.Host( + remote[0], remote[1], req.connection.remote_host or "") + + scheme = req.parsed_uri[0] or 'http' + req.get_basic_auth_pw() + + try: + # apache.mpm_query only became available in mod_python 3.1 + q = apache.mpm_query + threaded = q(apache.AP_MPMQ_IS_THREADED) + forked = q(apache.AP_MPMQ_IS_FORKED) + except AttributeError: + bad_value = ("You must provide a PythonOption '%s', " + "either 'on' or 'off', when running a version " + "of mod_python < 3.1") + + threaded = options.get('multithread', '').lower() + if threaded == 'on': + threaded = True + elif threaded == 'off': + threaded = False + else: + raise ValueError(bad_value % "multithread") + + forked = options.get('multiprocess', '').lower() + if forked == 'on': + forked = True + elif forked == 'off': + forked = False + else: + raise ValueError(bad_value % "multiprocess") + + sn = cherrypy.tree.script_name(req.uri or "/") + if sn is None: + send_response(req, '404 Not Found', [], '') + else: + app = cherrypy.tree.apps[sn] + method = req.method + path = req.uri + qs = req.args or "" + reqproto = req.protocol + headers = copyitems(req.headers_in) + rfile = _ReadOnlyRequest(req) + prev = None + + try: + redirections = [] + while True: + request, response = app.get_serving(local, remote, scheme, + "HTTP/1.1") + request.login = req.user + request.multithread = bool(threaded) + request.multiprocess = bool(forked) + request.app = app + request.prev = prev + + # Run the CherryPy Request object and obtain the response + try: + request.run(method, path, qs, reqproto, headers, rfile) + break + except cherrypy.InternalRedirect: + ir = sys.exc_info()[1] + app.release_serving() + prev = request + + if not recursive: + if ir.path in redirections: + raise RuntimeError( + "InternalRedirector visited the same URL " + "twice: %r" % ir.path) + else: + # Add the *previous* path_info + qs to + # redirections. + if qs: + qs = "?" + qs + redirections.append(sn + path + qs) + + # Munge environment and try again. + method = "GET" + path = ir.path + qs = ir.query_string + rfile = BytesIO() + + send_response( + req, response.output_status, response.header_list, + response.body, response.stream) + finally: + app.release_serving() + except: + tb = format_exc() + cherrypy.log(tb, 'MOD_PYTHON', severity=logging.ERROR) + s, h, b = bare_error() + send_response(req, s, h, b) + return apache.OK + + +def send_response(req, status, headers, body, stream=False): + # Set response status + req.status = int(status[:3]) + + # Set response headers + req.content_type = "text/plain" + for header, value in headers: + if header.lower() == 'content-type': + req.content_type = value + continue + req.headers_out.add(header, value) + + if stream: + # Flush now so the status and headers are sent immediately. + req.flush() + + # Set response body + if isinstance(body, basestring): + req.write(body) + else: + for seg in body: + req.write(seg) + + +# --------------- Startup tools for CherryPy + mod_python --------------- # +import os +import re +try: + import subprocess + + def popen(fullcmd): + p = subprocess.Popen(fullcmd, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + close_fds=True) + return p.stdout +except ImportError: + def popen(fullcmd): + pipein, pipeout = os.popen4(fullcmd) + return pipeout + + +def read_process(cmd, args=""): + fullcmd = "%s %s" % (cmd, args) + pipeout = popen(fullcmd) + try: + firstline = pipeout.readline() + cmd_not_found = re.search( + ntob("(not recognized|No such file|not found)"), + firstline, + re.IGNORECASE + ) + if cmd_not_found: + raise IOError('%s must be on your system path.' % cmd) + output = firstline + pipeout.read() + finally: + pipeout.close() + return output + + +class ModPythonServer(object): + + template = """ +# Apache2 server configuration file for running CherryPy with mod_python. + +DocumentRoot "/" +Listen %(port)s +LoadModule python_module modules/mod_python.so + + + SetHandler python-program + PythonHandler %(handler)s + PythonDebug On +%(opts)s + +""" + + def __init__(self, loc="/", port=80, opts=None, apache_path="apache", + handler="cherrypy._cpmodpy::handler"): + self.loc = loc + self.port = port + self.opts = opts + self.apache_path = apache_path + self.handler = handler + + def start(self): + opts = "".join([" PythonOption %s %s\n" % (k, v) + for k, v in self.opts]) + conf_data = self.template % {"port": self.port, + "loc": self.loc, + "opts": opts, + "handler": self.handler, + } + + mpconf = os.path.join(os.path.dirname(__file__), "cpmodpy.conf") + f = open(mpconf, 'wb') + try: + f.write(conf_data) + finally: + f.close() + + response = read_process(self.apache_path, "-k start -f %s" % mpconf) + self.ready = True + return response + + def stop(self): + os.popen("apache -k stop") + self.ready = False diff --git a/lib/cherrypy/_cpnative_server.py b/lib/cherrypy/_cpnative_server.py new file mode 100644 index 00000000..e303573d --- /dev/null +++ b/lib/cherrypy/_cpnative_server.py @@ -0,0 +1,154 @@ +"""Native adapter for serving CherryPy via its builtin server.""" + +import logging +import sys + +import cherrypy +from cherrypy._cpcompat import BytesIO +from cherrypy._cperror import format_exc, bare_error +from cherrypy.lib import httputil +from cherrypy import wsgiserver + + +class NativeGateway(wsgiserver.Gateway): + + recursive = False + + def respond(self): + req = self.req + try: + # Obtain a Request object from CherryPy + local = req.server.bind_addr + local = httputil.Host(local[0], local[1], "") + remote = req.conn.remote_addr, req.conn.remote_port + remote = httputil.Host(remote[0], remote[1], "") + + scheme = req.scheme + sn = cherrypy.tree.script_name(req.uri or "/") + if sn is None: + self.send_response('404 Not Found', [], ['']) + else: + app = cherrypy.tree.apps[sn] + method = req.method + path = req.path + qs = req.qs or "" + headers = req.inheaders.items() + rfile = req.rfile + prev = None + + try: + redirections = [] + while True: + request, response = app.get_serving( + local, remote, scheme, "HTTP/1.1") + request.multithread = True + request.multiprocess = False + request.app = app + request.prev = prev + + # Run the CherryPy Request object and obtain the + # response + try: + request.run(method, path, qs, + req.request_protocol, headers, rfile) + break + except cherrypy.InternalRedirect: + ir = sys.exc_info()[1] + app.release_serving() + prev = request + + if not self.recursive: + if ir.path in redirections: + raise RuntimeError( + "InternalRedirector visited the same " + "URL twice: %r" % ir.path) + else: + # Add the *previous* path_info + qs to + # redirections. + if qs: + qs = "?" + qs + redirections.append(sn + path + qs) + + # Munge environment and try again. + method = "GET" + path = ir.path + qs = ir.query_string + rfile = BytesIO() + + self.send_response( + response.output_status, response.header_list, + response.body) + finally: + app.release_serving() + except: + tb = format_exc() + # print tb + cherrypy.log(tb, 'NATIVE_ADAPTER', severity=logging.ERROR) + s, h, b = bare_error() + self.send_response(s, h, b) + + def send_response(self, status, headers, body): + req = self.req + + # Set response status + req.status = str(status or "500 Server Error") + + # Set response headers + for header, value in headers: + req.outheaders.append((header, value)) + if (req.ready and not req.sent_headers): + req.sent_headers = True + req.send_headers() + + # Set response body + for seg in body: + req.write(seg) + + +class CPHTTPServer(wsgiserver.HTTPServer): + + """Wrapper for wsgiserver.HTTPServer. + + wsgiserver has been designed to not reference CherryPy in any way, + so that it can be used in other frameworks and applications. + Therefore, we wrap it here, so we can apply some attributes + from config -> cherrypy.server -> HTTPServer. + """ + + def __init__(self, server_adapter=cherrypy.server): + self.server_adapter = server_adapter + + server_name = (self.server_adapter.socket_host or + self.server_adapter.socket_file or + None) + + wsgiserver.HTTPServer.__init__( + self, server_adapter.bind_addr, NativeGateway, + minthreads=server_adapter.thread_pool, + maxthreads=server_adapter.thread_pool_max, + server_name=server_name) + + self.max_request_header_size = ( + self.server_adapter.max_request_header_size or 0) + self.max_request_body_size = ( + self.server_adapter.max_request_body_size or 0) + self.request_queue_size = self.server_adapter.socket_queue_size + self.timeout = self.server_adapter.socket_timeout + self.shutdown_timeout = self.server_adapter.shutdown_timeout + self.protocol = self.server_adapter.protocol_version + self.nodelay = self.server_adapter.nodelay + + ssl_module = self.server_adapter.ssl_module or 'pyopenssl' + if self.server_adapter.ssl_context: + adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) + self.ssl_adapter = adapter_class( + self.server_adapter.ssl_certificate, + self.server_adapter.ssl_private_key, + self.server_adapter.ssl_certificate_chain) + self.ssl_adapter.context = self.server_adapter.ssl_context + elif self.server_adapter.ssl_certificate: + adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) + self.ssl_adapter = adapter_class( + self.server_adapter.ssl_certificate, + self.server_adapter.ssl_private_key, + self.server_adapter.ssl_certificate_chain) diff --git a/lib/cherrypy/_cpreqbody.py b/lib/cherrypy/_cpreqbody.py new file mode 100644 index 00000000..d2dbbc92 --- /dev/null +++ b/lib/cherrypy/_cpreqbody.py @@ -0,0 +1,1013 @@ +"""Request body processing for CherryPy. + +.. versionadded:: 3.2 + +Application authors have complete control over the parsing of HTTP request +entities. In short, +:attr:`cherrypy.request.body` +is now always set to an instance of +:class:`RequestBody`, +and *that* class is a subclass of :class:`Entity`. + +When an HTTP request includes an entity body, it is often desirable to +provide that information to applications in a form other than the raw bytes. +Different content types demand different approaches. Examples: + + * For a GIF file, we want the raw bytes in a stream. + * An HTML form is better parsed into its component fields, and each text field + decoded from bytes to unicode. + * A JSON body should be deserialized into a Python dict or list. + +When the request contains a Content-Type header, the media type is used as a +key to look up a value in the +:attr:`request.body.processors` dict. +If the full media +type is not found, then the major type is tried; for example, if no processor +is found for the 'image/jpeg' type, then we look for a processor for the +'image' types altogether. If neither the full type nor the major type has a +matching processor, then a default processor is used +(:func:`default_proc`). For most +types, this means no processing is done, and the body is left unread as a +raw byte stream. Processors are configurable in an 'on_start_resource' hook. + +Some processors, especially those for the 'text' types, attempt to decode bytes +to unicode. If the Content-Type request header includes a 'charset' parameter, +this is used to decode the entity. Otherwise, one or more default charsets may +be attempted, although this decision is up to each processor. If a processor +successfully decodes an Entity or Part, it should set the +:attr:`charset` attribute +on the Entity or Part to the name of the successful charset, so that +applications can easily re-encode or transcode the value if they wish. + +If the Content-Type of the request entity is of major type 'multipart', then +the above parsing process, and possibly a decoding process, is performed for +each part. + +For both the full entity and multipart parts, a Content-Disposition header may +be used to fill :attr:`name` and +:attr:`filename` attributes on the +request.body or the Part. + +.. _custombodyprocessors: + +Custom Processors +================= + +You can add your own processors for any specific or major MIME type. Simply add +it to the :attr:`processors` dict in a +hook/tool that runs at ``on_start_resource`` or ``before_request_body``. +Here's the built-in JSON tool for an example:: + + def json_in(force=True, debug=False): + request = cherrypy.serving.request + def json_processor(entity): + \"""Read application/json data into request.json.\""" + if not entity.headers.get("Content-Length", ""): + raise cherrypy.HTTPError(411) + + body = entity.fp.read() + try: + request.json = json_decode(body) + except ValueError: + raise cherrypy.HTTPError(400, 'Invalid JSON document') + if force: + request.body.processors.clear() + request.body.default_proc = cherrypy.HTTPError( + 415, 'Expected an application/json content type') + request.body.processors['application/json'] = json_processor + +We begin by defining a new ``json_processor`` function to stick in the +``processors`` dictionary. All processor functions take a single argument, +the ``Entity`` instance they are to process. It will be called whenever a +request is received (for those URI's where the tool is turned on) which +has a ``Content-Type`` of "application/json". + +First, it checks for a valid ``Content-Length`` (raising 411 if not valid), +then reads the remaining bytes on the socket. The ``fp`` object knows its +own length, so it won't hang waiting for data that never arrives. It will +return when all data has been read. Then, we decode those bytes using +Python's built-in ``json`` module, and stick the decoded result onto +``request.json`` . If it cannot be decoded, we raise 400. + +If the "force" argument is True (the default), the ``Tool`` clears the +``processors`` dict so that request entities of other ``Content-Types`` +aren't parsed at all. Since there's no entry for those invalid MIME +types, the ``default_proc`` method of ``cherrypy.request.body`` is +called. But this does nothing by default (usually to provide the page +handler an opportunity to handle it.) +But in our case, we want to raise 415, so we replace +``request.body.default_proc`` +with the error (``HTTPError`` instances, when called, raise themselves). + +If we were defining a custom processor, we can do so without making a ``Tool``. +Just add the config entry:: + + request.body.processors = {'application/json': json_processor} + +Note that you can only replace the ``processors`` dict wholesale this way, +not update the existing one. +""" + +try: + from io import DEFAULT_BUFFER_SIZE +except ImportError: + DEFAULT_BUFFER_SIZE = 8192 +import re +import sys +import tempfile +try: + from urllib import unquote_plus +except ImportError: + def unquote_plus(bs): + """Bytes version of urllib.parse.unquote_plus.""" + bs = bs.replace(ntob('+'), ntob(' ')) + atoms = bs.split(ntob('%')) + for i in range(1, len(atoms)): + item = atoms[i] + try: + pct = int(item[:2], 16) + atoms[i] = bytes([pct]) + item[2:] + except ValueError: + pass + return ntob('').join(atoms) + +import cherrypy +from cherrypy._cpcompat import basestring, ntob, ntou +from cherrypy.lib import httputil + + +# ------------------------------- Processors -------------------------------- # + +def process_urlencoded(entity): + """Read application/x-www-form-urlencoded data into entity.params.""" + qs = entity.fp.read() + for charset in entity.attempt_charsets: + try: + params = {} + for aparam in qs.split(ntob('&')): + for pair in aparam.split(ntob(';')): + if not pair: + continue + + atoms = pair.split(ntob('='), 1) + if len(atoms) == 1: + atoms.append(ntob('')) + + key = unquote_plus(atoms[0]).decode(charset) + value = unquote_plus(atoms[1]).decode(charset) + + if key in params: + if not isinstance(params[key], list): + params[key] = [params[key]] + params[key].append(value) + else: + params[key] = value + except UnicodeDecodeError: + pass + else: + entity.charset = charset + break + else: + raise cherrypy.HTTPError( + 400, "The request entity could not be decoded. The following " + "charsets were attempted: %s" % repr(entity.attempt_charsets)) + + # Now that all values have been successfully parsed and decoded, + # apply them to the entity.params dict. + for key, value in params.items(): + if key in entity.params: + if not isinstance(entity.params[key], list): + entity.params[key] = [entity.params[key]] + entity.params[key].append(value) + else: + entity.params[key] = value + + +def process_multipart(entity): + """Read all multipart parts into entity.parts.""" + ib = "" + if 'boundary' in entity.content_type.params: + # http://tools.ietf.org/html/rfc2046#section-5.1.1 + # "The grammar for parameters on the Content-type field is such that it + # is often necessary to enclose the boundary parameter values in quotes + # on the Content-type line" + ib = entity.content_type.params['boundary'].strip('"') + + if not re.match("^[ -~]{0,200}[!-~]$", ib): + raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) + + ib = ('--' + ib).encode('ascii') + + # Find the first marker + while True: + b = entity.readline() + if not b: + return + + b = b.strip() + if b == ib: + break + + # Read all parts + while True: + part = entity.part_class.from_fp(entity.fp, ib) + entity.parts.append(part) + part.process() + if part.fp.done: + break + + +def process_multipart_form_data(entity): + """Read all multipart/form-data parts into entity.parts or entity.params. + """ + process_multipart(entity) + + kept_parts = [] + for part in entity.parts: + if part.name is None: + kept_parts.append(part) + else: + if part.filename is None: + # It's a regular field + value = part.fullvalue() + else: + # It's a file upload. Retain the whole part so consumer code + # has access to its .file and .filename attributes. + value = part + + if part.name in entity.params: + if not isinstance(entity.params[part.name], list): + entity.params[part.name] = [entity.params[part.name]] + entity.params[part.name].append(value) + else: + entity.params[part.name] = value + + entity.parts = kept_parts + + +def _old_process_multipart(entity): + """The behavior of 3.2 and lower. Deprecated and will be changed in 3.3.""" + process_multipart(entity) + + params = entity.params + + for part in entity.parts: + if part.name is None: + key = ntou('parts') + else: + key = part.name + + if part.filename is None: + # It's a regular field + value = part.fullvalue() + else: + # It's a file upload. Retain the whole part so consumer code + # has access to its .file and .filename attributes. + value = part + + if key in params: + if not isinstance(params[key], list): + params[key] = [params[key]] + params[key].append(value) + else: + params[key] = value + + +# -------------------------------- Entities --------------------------------- # +class Entity(object): + + """An HTTP request body, or MIME multipart body. + + This class collects information about the HTTP request entity. When a + given entity is of MIME type "multipart", each part is parsed into its own + Entity instance, and the set of parts stored in + :attr:`entity.parts`. + + Between the ``before_request_body`` and ``before_handler`` tools, CherryPy + tries to process the request body (if any) by calling + :func:`request.body.process`. + This uses the ``content_type`` of the Entity to look up a suitable + processor in + :attr:`Entity.processors`, + a dict. + If a matching processor cannot be found for the complete Content-Type, + it tries again using the major type. For example, if a request with an + entity of type "image/jpeg" arrives, but no processor can be found for + that complete type, then one is sought for the major type "image". If a + processor is still not found, then the + :func:`default_proc` method + of the Entity is called (which does nothing by default; you can + override this too). + + CherryPy includes processors for the "application/x-www-form-urlencoded" + type, the "multipart/form-data" type, and the "multipart" major type. + CherryPy 3.2 processes these types almost exactly as older versions. + Parts are passed as arguments to the page handler using their + ``Content-Disposition.name`` if given, otherwise in a generic "parts" + argument. Each such part is either a string, or the + :class:`Part` itself if it's a file. (In this + case it will have ``file`` and ``filename`` attributes, or possibly a + ``value`` attribute). Each Part is itself a subclass of + Entity, and has its own ``process`` method and ``processors`` dict. + + There is a separate processor for the "multipart" major type which is more + flexible, and simply stores all multipart parts in + :attr:`request.body.parts`. You can + enable it with:: + + cherrypy.request.body.processors['multipart'] = _cpreqbody.process_multipart + + in an ``on_start_resource`` tool. + """ + + # http://tools.ietf.org/html/rfc2046#section-4.1.2: + # "The default character set, which must be assumed in the + # absence of a charset parameter, is US-ASCII." + # However, many browsers send data in utf-8 with no charset. + attempt_charsets = ['utf-8'] + """A list of strings, each of which should be a known encoding. + + When the Content-Type of the request body warrants it, each of the given + encodings will be tried in order. The first one to successfully decode the + entity without raising an error is stored as + :attr:`entity.charset`. This defaults + to ``['utf-8']`` (plus 'ISO-8859-1' for "text/\*" types, as required by + `HTTP/1.1 `_), + but ``['us-ascii', 'utf-8']`` for multipart parts. + """ + + charset = None + """The successful decoding; see "attempt_charsets" above.""" + + content_type = None + """The value of the Content-Type request header. + + If the Entity is part of a multipart payload, this will be the Content-Type + given in the MIME headers for this part. + """ + + default_content_type = 'application/x-www-form-urlencoded' + """This defines a default ``Content-Type`` to use if no Content-Type header + is given. The empty string is used for RequestBody, which results in the + request body not being read or parsed at all. This is by design; a missing + ``Content-Type`` header in the HTTP request entity is an error at best, + and a security hole at worst. For multipart parts, however, the MIME spec + declares that a part with no Content-Type defaults to "text/plain" + (see :class:`Part`). + """ + + filename = None + """The ``Content-Disposition.filename`` header, if available.""" + + fp = None + """The readable socket file object.""" + + headers = None + """A dict of request/multipart header names and values. + + This is a copy of the ``request.headers`` for the ``request.body``; + for multipart parts, it is the set of headers for that part. + """ + + length = None + """The value of the ``Content-Length`` header, if provided.""" + + name = None + """The "name" parameter of the ``Content-Disposition`` header, if any.""" + + params = None + """ + If the request Content-Type is 'application/x-www-form-urlencoded' or + multipart, this will be a dict of the params pulled from the entity + body; that is, it will be the portion of request.params that come + from the message body (sometimes called "POST params", although they + can be sent with various HTTP method verbs). This value is set between + the 'before_request_body' and 'before_handler' hooks (assuming that + process_request_body is True).""" + + processors = {'application/x-www-form-urlencoded': process_urlencoded, + 'multipart/form-data': process_multipart_form_data, + 'multipart': process_multipart, + } + """A dict of Content-Type names to processor methods.""" + + parts = None + """A list of Part instances if ``Content-Type`` is of major type + "multipart".""" + + part_class = None + """The class used for multipart parts. + + You can replace this with custom subclasses to alter the processing of + multipart parts. + """ + + def __init__(self, fp, headers, params=None, parts=None): + # Make an instance-specific copy of the class processors + # so Tools, etc. can replace them per-request. + self.processors = self.processors.copy() + + self.fp = fp + self.headers = headers + + if params is None: + params = {} + self.params = params + + if parts is None: + parts = [] + self.parts = parts + + # Content-Type + self.content_type = headers.elements('Content-Type') + if self.content_type: + self.content_type = self.content_type[0] + else: + self.content_type = httputil.HeaderElement.from_str( + self.default_content_type) + + # Copy the class 'attempt_charsets', prepending any Content-Type + # charset + dec = self.content_type.params.get("charset", None) + if dec: + self.attempt_charsets = [dec] + [c for c in self.attempt_charsets + if c != dec] + else: + self.attempt_charsets = self.attempt_charsets[:] + + # Length + self.length = None + clen = headers.get('Content-Length', None) + # If Transfer-Encoding is 'chunked', ignore any Content-Length. + if ( + clen is not None and + 'chunked' not in headers.get('Transfer-Encoding', '') + ): + try: + self.length = int(clen) + except ValueError: + pass + + # Content-Disposition + self.name = None + self.filename = None + disp = headers.elements('Content-Disposition') + if disp: + disp = disp[0] + if 'name' in disp.params: + self.name = disp.params['name'] + if self.name.startswith('"') and self.name.endswith('"'): + self.name = self.name[1:-1] + if 'filename' in disp.params: + self.filename = disp.params['filename'] + if ( + self.filename.startswith('"') and + self.filename.endswith('"') + ): + self.filename = self.filename[1:-1] + + # The 'type' attribute is deprecated in 3.2; remove it in 3.3. + type = property( + lambda self: self.content_type, + doc="A deprecated alias for " + ":attr:`content_type`." + ) + + def read(self, size=None, fp_out=None): + return self.fp.read(size, fp_out) + + def readline(self, size=None): + return self.fp.readline(size) + + def readlines(self, sizehint=None): + return self.fp.readlines(sizehint) + + def __iter__(self): + return self + + def __next__(self): + line = self.readline() + if not line: + raise StopIteration + return line + + def next(self): + return self.__next__() + + def read_into_file(self, fp_out=None): + """Read the request body into fp_out (or make_file() if None). + + Return fp_out. + """ + if fp_out is None: + fp_out = self.make_file() + self.read(fp_out=fp_out) + return fp_out + + def make_file(self): + """Return a file-like object into which the request body will be read. + + By default, this will return a TemporaryFile. Override as needed. + See also :attr:`cherrypy._cpreqbody.Part.maxrambytes`.""" + return tempfile.TemporaryFile() + + def fullvalue(self): + """Return this entity as a string, whether stored in a file or not.""" + if self.file: + # It was stored in a tempfile. Read it. + self.file.seek(0) + value = self.file.read() + self.file.seek(0) + else: + value = self.value + return value + + def process(self): + """Execute the best-match processor for the given media type.""" + proc = None + ct = self.content_type.value + try: + proc = self.processors[ct] + except KeyError: + toptype = ct.split('/', 1)[0] + try: + proc = self.processors[toptype] + except KeyError: + pass + if proc is None: + self.default_proc() + else: + proc(self) + + def default_proc(self): + """Called if a more-specific processor is not found for the + ``Content-Type``. + """ + # Leave the fp alone for someone else to read. This works fine + # for request.body, but the Part subclasses need to override this + # so they can move on to the next part. + pass + + +class Part(Entity): + + """A MIME part entity, part of a multipart entity.""" + + # "The default character set, which must be assumed in the absence of a + # charset parameter, is US-ASCII." + attempt_charsets = ['us-ascii', 'utf-8'] + """A list of strings, each of which should be a known encoding. + + When the Content-Type of the request body warrants it, each of the given + encodings will be tried in order. The first one to successfully decode the + entity without raising an error is stored as + :attr:`entity.charset`. This defaults + to ``['utf-8']`` (plus 'ISO-8859-1' for "text/\*" types, as required by + `HTTP/1.1 `_), + but ``['us-ascii', 'utf-8']`` for multipart parts. + """ + + boundary = None + """The MIME multipart boundary.""" + + default_content_type = 'text/plain' + """This defines a default ``Content-Type`` to use if no Content-Type header + is given. The empty string is used for RequestBody, which results in the + request body not being read or parsed at all. This is by design; a missing + ``Content-Type`` header in the HTTP request entity is an error at best, + and a security hole at worst. For multipart parts, however (this class), + the MIME spec declares that a part with no Content-Type defaults to + "text/plain". + """ + + # This is the default in stdlib cgi. We may want to increase it. + maxrambytes = 1000 + """The threshold of bytes after which point the ``Part`` will store + its data in a file (generated by + :func:`make_file`) + instead of a string. Defaults to 1000, just like the :mod:`cgi` + module in Python's standard library. + """ + + def __init__(self, fp, headers, boundary): + Entity.__init__(self, fp, headers) + self.boundary = boundary + self.file = None + self.value = None + + def from_fp(cls, fp, boundary): + headers = cls.read_headers(fp) + return cls(fp, headers, boundary) + from_fp = classmethod(from_fp) + + def read_headers(cls, fp): + headers = httputil.HeaderMap() + while True: + line = fp.readline() + if not line: + # No more data--illegal end of headers + raise EOFError("Illegal end of headers.") + + if line == ntob('\r\n'): + # Normal end of headers + break + if not line.endswith(ntob('\r\n')): + raise ValueError("MIME requires CRLF terminators: %r" % line) + + if line[0] in ntob(' \t'): + # It's a continuation line. + v = line.strip().decode('ISO-8859-1') + else: + k, v = line.split(ntob(":"), 1) + k = k.strip().decode('ISO-8859-1') + v = v.strip().decode('ISO-8859-1') + + existing = headers.get(k) + if existing: + v = ", ".join((existing, v)) + headers[k] = v + + return headers + read_headers = classmethod(read_headers) + + def read_lines_to_boundary(self, fp_out=None): + """Read bytes from self.fp and return or write them to a file. + + If the 'fp_out' argument is None (the default), all bytes read are + returned in a single byte string. + + If the 'fp_out' argument is not None, it must be a file-like + object that supports the 'write' method; all bytes read will be + written to the fp, and that fp is returned. + """ + endmarker = self.boundary + ntob("--") + delim = ntob("") + prev_lf = True + lines = [] + seen = 0 + while True: + line = self.fp.readline(1 << 16) + if not line: + raise EOFError("Illegal end of multipart body.") + if line.startswith(ntob("--")) and prev_lf: + strippedline = line.strip() + if strippedline == self.boundary: + break + if strippedline == endmarker: + self.fp.finish() + break + + line = delim + line + + if line.endswith(ntob("\r\n")): + delim = ntob("\r\n") + line = line[:-2] + prev_lf = True + elif line.endswith(ntob("\n")): + delim = ntob("\n") + line = line[:-1] + prev_lf = True + else: + delim = ntob("") + prev_lf = False + + if fp_out is None: + lines.append(line) + seen += len(line) + if seen > self.maxrambytes: + fp_out = self.make_file() + for line in lines: + fp_out.write(line) + else: + fp_out.write(line) + + if fp_out is None: + result = ntob('').join(lines) + for charset in self.attempt_charsets: + try: + result = result.decode(charset) + except UnicodeDecodeError: + pass + else: + self.charset = charset + return result + else: + raise cherrypy.HTTPError( + 400, + "The request entity could not be decoded. The following " + "charsets were attempted: %s" % repr(self.attempt_charsets) + ) + else: + fp_out.seek(0) + return fp_out + + def default_proc(self): + """Called if a more-specific processor is not found for the + ``Content-Type``. + """ + if self.filename: + # Always read into a file if a .filename was given. + self.file = self.read_into_file() + else: + result = self.read_lines_to_boundary() + if isinstance(result, basestring): + self.value = result + else: + self.file = result + + def read_into_file(self, fp_out=None): + """Read the request body into fp_out (or make_file() if None). + + Return fp_out. + """ + if fp_out is None: + fp_out = self.make_file() + self.read_lines_to_boundary(fp_out=fp_out) + return fp_out + +Entity.part_class = Part + +try: + inf = float('inf') +except ValueError: + # Python 2.4 and lower + class Infinity(object): + + def __cmp__(self, other): + return 1 + + def __sub__(self, other): + return self + inf = Infinity() + + +comma_separated_headers = [ + 'Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Accept-Ranges', 'Allow', + 'Cache-Control', 'Connection', 'Content-Encoding', + 'Content-Language', 'Expect', 'If-Match', + 'If-None-Match', 'Pragma', 'Proxy-Authenticate', + 'Te', 'Trailer', 'Transfer-Encoding', 'Upgrade', + 'Vary', 'Via', 'Warning', 'Www-Authenticate' +] + + +class SizedReader: + + def __init__(self, fp, length, maxbytes, bufsize=DEFAULT_BUFFER_SIZE, + has_trailers=False): + # Wrap our fp in a buffer so peek() works + self.fp = fp + self.length = length + self.maxbytes = maxbytes + self.buffer = ntob('') + self.bufsize = bufsize + self.bytes_read = 0 + self.done = False + self.has_trailers = has_trailers + + def read(self, size=None, fp_out=None): + """Read bytes from the request body and return or write them to a file. + + A number of bytes less than or equal to the 'size' argument are read + off the socket. The actual number of bytes read are tracked in + self.bytes_read. The number may be smaller than 'size' when 1) the + client sends fewer bytes, 2) the 'Content-Length' request header + specifies fewer bytes than requested, or 3) the number of bytes read + exceeds self.maxbytes (in which case, 413 is raised). + + If the 'fp_out' argument is None (the default), all bytes read are + returned in a single byte string. + + If the 'fp_out' argument is not None, it must be a file-like + object that supports the 'write' method; all bytes read will be + written to the fp, and None is returned. + """ + + if self.length is None: + if size is None: + remaining = inf + else: + remaining = size + else: + remaining = self.length - self.bytes_read + if size and size < remaining: + remaining = size + if remaining == 0: + self.finish() + if fp_out is None: + return ntob('') + else: + return None + + chunks = [] + + # Read bytes from the buffer. + if self.buffer: + if remaining is inf: + data = self.buffer + self.buffer = ntob('') + else: + data = self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + datalen = len(data) + remaining -= datalen + + # Check lengths. + self.bytes_read += datalen + if self.maxbytes and self.bytes_read > self.maxbytes: + raise cherrypy.HTTPError(413) + + # Store the data. + if fp_out is None: + chunks.append(data) + else: + fp_out.write(data) + + # Read bytes from the socket. + while remaining > 0: + chunksize = min(remaining, self.bufsize) + try: + data = self.fp.read(chunksize) + except Exception: + e = sys.exc_info()[1] + if e.__class__.__name__ == 'MaxSizeExceeded': + # Post data is too big + raise cherrypy.HTTPError( + 413, "Maximum request length: %r" % e.args[1]) + else: + raise + if not data: + self.finish() + break + datalen = len(data) + remaining -= datalen + + # Check lengths. + self.bytes_read += datalen + if self.maxbytes and self.bytes_read > self.maxbytes: + raise cherrypy.HTTPError(413) + + # Store the data. + if fp_out is None: + chunks.append(data) + else: + fp_out.write(data) + + if fp_out is None: + return ntob('').join(chunks) + + def readline(self, size=None): + """Read a line from the request body and return it.""" + chunks = [] + while size is None or size > 0: + chunksize = self.bufsize + if size is not None and size < self.bufsize: + chunksize = size + data = self.read(chunksize) + if not data: + break + pos = data.find(ntob('\n')) + 1 + if pos: + chunks.append(data[:pos]) + remainder = data[pos:] + self.buffer += remainder + self.bytes_read -= len(remainder) + break + else: + chunks.append(data) + return ntob('').join(chunks) + + def readlines(self, sizehint=None): + """Read lines from the request body and return them.""" + if self.length is not None: + if sizehint is None: + sizehint = self.length - self.bytes_read + else: + sizehint = min(sizehint, self.length - self.bytes_read) + + lines = [] + seen = 0 + while True: + line = self.readline() + if not line: + break + lines.append(line) + seen += len(line) + if seen >= sizehint: + break + return lines + + def finish(self): + self.done = True + if self.has_trailers and hasattr(self.fp, 'read_trailer_lines'): + self.trailers = {} + + try: + for line in self.fp.read_trailer_lines(): + if line[0] in ntob(' \t'): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(ntob(":"), 1) + except ValueError: + raise ValueError("Illegal header line.") + k = k.strip().title() + v = v.strip() + + if k in comma_separated_headers: + existing = self.trailers.get(envname) + if existing: + v = ntob(", ").join((existing, v)) + self.trailers[k] = v + except Exception: + e = sys.exc_info()[1] + if e.__class__.__name__ == 'MaxSizeExceeded': + # Post data is too big + raise cherrypy.HTTPError( + 413, "Maximum request length: %r" % e.args[1]) + else: + raise + + +class RequestBody(Entity): + + """The entity of the HTTP request.""" + + bufsize = 8 * 1024 + """The buffer size used when reading the socket.""" + + # Don't parse the request body at all if the client didn't provide + # a Content-Type header. See + # https://bitbucket.org/cherrypy/cherrypy/issue/790 + default_content_type = '' + """This defines a default ``Content-Type`` to use if no Content-Type header + is given. The empty string is used for RequestBody, which results in the + request body not being read or parsed at all. This is by design; a missing + ``Content-Type`` header in the HTTP request entity is an error at best, + and a security hole at worst. For multipart parts, however, the MIME spec + declares that a part with no Content-Type defaults to "text/plain" + (see :class:`Part`). + """ + + maxbytes = None + """Raise ``MaxSizeExceeded`` if more bytes than this are read from + the socket. + """ + + def __init__(self, fp, headers, params=None, request_params=None): + Entity.__init__(self, fp, headers, params) + + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 + # When no explicit charset parameter is provided by the + # sender, media subtypes of the "text" type are defined + # to have a default charset value of "ISO-8859-1" when + # received via HTTP. + if self.content_type.value.startswith('text/'): + for c in ('ISO-8859-1', 'iso-8859-1', 'Latin-1', 'latin-1'): + if c in self.attempt_charsets: + break + else: + self.attempt_charsets.append('ISO-8859-1') + + # Temporary fix while deprecating passing .parts as .params. + self.processors['multipart'] = _old_process_multipart + + if request_params is None: + request_params = {} + self.request_params = request_params + + def process(self): + """Process the request entity based on its Content-Type.""" + # "The presence of a message-body in a request is signaled by the + # inclusion of a Content-Length or Transfer-Encoding header field in + # the request's message-headers." + # It is possible to send a POST request with no body, for example; + # however, app developers are responsible in that case to set + # cherrypy.request.process_body to False so this method isn't called. + h = cherrypy.serving.request.headers + if 'Content-Length' not in h and 'Transfer-Encoding' not in h: + raise cherrypy.HTTPError(411) + + self.fp = SizedReader(self.fp, self.length, + self.maxbytes, bufsize=self.bufsize, + has_trailers='Trailer' in h) + super(RequestBody, self).process() + + # Body params should also be a part of the request_params + # add them in here. + request_params = self.request_params + for key, value in self.params.items(): + # Python 2 only: keyword arguments must be byte strings (type + # 'str'). + if sys.version_info < (3, 0): + if isinstance(key, unicode): + key = key.encode('ISO-8859-1') + + if key in request_params: + if not isinstance(request_params[key], list): + request_params[key] = [request_params[key]] + request_params[key].append(value) + else: + request_params[key] = value diff --git a/lib/cherrypy/_cprequest.py b/lib/cherrypy/_cprequest.py new file mode 100644 index 00000000..290bd2eb --- /dev/null +++ b/lib/cherrypy/_cprequest.py @@ -0,0 +1,973 @@ + +import os +import sys +import time +import warnings + +import cherrypy +from cherrypy._cpcompat import basestring, copykeys, ntob, unicodestr +from cherrypy._cpcompat import SimpleCookie, CookieError, py3k +from cherrypy import _cpreqbody, _cpconfig +from cherrypy._cperror import format_exc, bare_error +from cherrypy.lib import httputil, file_generator + + +class Hook(object): + + """A callback and its metadata: failsafe, priority, and kwargs.""" + + callback = None + """ + The bare callable that this Hook object is wrapping, which will + be called when the Hook is called.""" + + failsafe = False + """ + If True, the callback is guaranteed to run even if other callbacks + from the same call point raise exceptions.""" + + priority = 50 + """ + Defines the order of execution for a list of Hooks. Priority numbers + should be limited to the closed interval [0, 100], but values outside + this range are acceptable, as are fractional values.""" + + kwargs = {} + """ + A set of keyword arguments that will be passed to the + callable on each call.""" + + def __init__(self, callback, failsafe=None, priority=None, **kwargs): + self.callback = callback + + if failsafe is None: + failsafe = getattr(callback, "failsafe", False) + self.failsafe = failsafe + + if priority is None: + priority = getattr(callback, "priority", 50) + self.priority = priority + + self.kwargs = kwargs + + def __lt__(self, other): + # Python 3 + return self.priority < other.priority + + def __cmp__(self, other): + # Python 2 + return cmp(self.priority, other.priority) + + def __call__(self): + """Run self.callback(**self.kwargs).""" + return self.callback(**self.kwargs) + + def __repr__(self): + cls = self.__class__ + return ("%s.%s(callback=%r, failsafe=%r, priority=%r, %s)" + % (cls.__module__, cls.__name__, self.callback, + self.failsafe, self.priority, + ", ".join(['%s=%r' % (k, v) + for k, v in self.kwargs.items()]))) + + +class HookMap(dict): + + """A map of call points to lists of callbacks (Hook objects).""" + + def __new__(cls, points=None): + d = dict.__new__(cls) + for p in points or []: + d[p] = [] + return d + + def __init__(self, *a, **kw): + pass + + def attach(self, point, callback, failsafe=None, priority=None, **kwargs): + """Append a new Hook made from the supplied arguments.""" + self[point].append(Hook(callback, failsafe, priority, **kwargs)) + + def run(self, point): + """Execute all registered Hooks (callbacks) for the given point.""" + exc = None + hooks = self[point] + hooks.sort() + for hook in hooks: + # Some hooks are guaranteed to run even if others at + # the same hookpoint fail. We will still log the failure, + # but proceed on to the next hook. The only way + # to stop all processing from one of these hooks is + # to raise SystemExit and stop the whole server. + if exc is None or hook.failsafe: + try: + hook() + except (KeyboardInterrupt, SystemExit): + raise + except (cherrypy.HTTPError, cherrypy.HTTPRedirect, + cherrypy.InternalRedirect): + exc = sys.exc_info()[1] + except: + exc = sys.exc_info()[1] + cherrypy.log(traceback=True, severity=40) + if exc: + raise exc + + def __copy__(self): + newmap = self.__class__() + # We can't just use 'update' because we want copies of the + # mutable values (each is a list) as well. + for k, v in self.items(): + newmap[k] = v[:] + return newmap + copy = __copy__ + + def __repr__(self): + cls = self.__class__ + return "%s.%s(points=%r)" % ( + cls.__module__, + cls.__name__, + copykeys(self) + ) + + +# Config namespace handlers + +def hooks_namespace(k, v): + """Attach bare hooks declared in config.""" + # Use split again to allow multiple hooks for a single + # hookpoint per path (e.g. "hooks.before_handler.1"). + # Little-known fact you only get from reading source ;) + hookpoint = k.split(".", 1)[0] + if isinstance(v, basestring): + v = cherrypy.lib.attributes(v) + if not isinstance(v, Hook): + v = Hook(v) + cherrypy.serving.request.hooks[hookpoint].append(v) + + +def request_namespace(k, v): + """Attach request attributes declared in config.""" + # Provides config entries to set request.body attrs (like + # attempt_charsets). + if k[:5] == 'body.': + setattr(cherrypy.serving.request.body, k[5:], v) + else: + setattr(cherrypy.serving.request, k, v) + + +def response_namespace(k, v): + """Attach response attributes declared in config.""" + # Provides config entries to set default response headers + # http://cherrypy.org/ticket/889 + if k[:8] == 'headers.': + cherrypy.serving.response.headers[k.split('.', 1)[1]] = v + else: + setattr(cherrypy.serving.response, k, v) + + +def error_page_namespace(k, v): + """Attach error pages declared in config.""" + if k != 'default': + k = int(k) + cherrypy.serving.request.error_page[k] = v + + +hookpoints = ['on_start_resource', 'before_request_body', + 'before_handler', 'before_finalize', + 'on_end_resource', 'on_end_request', + 'before_error_response', 'after_error_response'] + + +class Request(object): + + """An HTTP request. + + This object represents the metadata of an HTTP request message; + that is, it contains attributes which describe the environment + in which the request URL, headers, and body were sent (if you + want tools to interpret the headers and body, those are elsewhere, + mostly in Tools). This 'metadata' consists of socket data, + transport characteristics, and the Request-Line. This object + also contains data regarding the configuration in effect for + the given URL, and the execution plan for generating a response. + """ + + prev = None + """ + The previous Request object (if any). This should be None + unless we are processing an InternalRedirect.""" + + # Conversation/connection attributes + local = httputil.Host("127.0.0.1", 80) + "An httputil.Host(ip, port, hostname) object for the server socket." + + remote = httputil.Host("127.0.0.1", 1111) + "An httputil.Host(ip, port, hostname) object for the client socket." + + scheme = "http" + """ + The protocol used between client and server. In most cases, + this will be either 'http' or 'https'.""" + + server_protocol = "HTTP/1.1" + """ + The HTTP version for which the HTTP server is at least + conditionally compliant.""" + + base = "" + """The (scheme://host) portion of the requested URL. + In some cases (e.g. when proxying via mod_rewrite), this may contain + path segments which cherrypy.url uses when constructing url's, but + which otherwise are ignored by CherryPy. Regardless, this value + MUST NOT end in a slash.""" + + # Request-Line attributes + request_line = "" + """ + The complete Request-Line received from the client. This is a + single string consisting of the request method, URI, and protocol + version (joined by spaces). Any final CRLF is removed.""" + + method = "GET" + """ + Indicates the HTTP method to be performed on the resource identified + by the Request-URI. Common methods include GET, HEAD, POST, PUT, and + DELETE. CherryPy allows any extension method; however, various HTTP + servers and gateways may restrict the set of allowable methods. + CherryPy applications SHOULD restrict the set (on a per-URI basis).""" + + query_string = "" + """ + The query component of the Request-URI, a string of information to be + interpreted by the resource. The query portion of a URI follows the + path component, and is separated by a '?'. For example, the URI + 'http://www.cherrypy.org/wiki?a=3&b=4' has the query component, + 'a=3&b=4'.""" + + query_string_encoding = 'utf8' + """ + The encoding expected for query string arguments after % HEX HEX decoding). + If a query string is provided that cannot be decoded with this encoding, + 404 is raised (since technically it's a different URI). If you want + arbitrary encodings to not error, set this to 'Latin-1'; you can then + encode back to bytes and re-decode to whatever encoding you like later. + """ + + protocol = (1, 1) + """The HTTP protocol version corresponding to the set + of features which should be allowed in the response. If BOTH + the client's request message AND the server's level of HTTP + compliance is HTTP/1.1, this attribute will be the tuple (1, 1). + If either is 1.0, this attribute will be the tuple (1, 0). + Lower HTTP protocol versions are not explicitly supported.""" + + params = {} + """ + A dict which combines query string (GET) and request entity (POST) + variables. This is populated in two stages: GET params are added + before the 'on_start_resource' hook, and POST params are added + between the 'before_request_body' and 'before_handler' hooks.""" + + # Message attributes + header_list = [] + """ + A list of the HTTP request headers as (name, value) tuples. + In general, you should use request.headers (a dict) instead.""" + + headers = httputil.HeaderMap() + """ + A dict-like object containing the request headers. Keys are header + names (in Title-Case format); however, you may get and set them in + a case-insensitive manner. That is, headers['Content-Type'] and + headers['content-type'] refer to the same value. Values are header + values (decoded according to :rfc:`2047` if necessary). See also: + httputil.HeaderMap, httputil.HeaderElement.""" + + cookie = SimpleCookie() + """See help(Cookie).""" + + rfile = None + """ + If the request included an entity (body), it will be available + as a stream in this attribute. However, the rfile will normally + be read for you between the 'before_request_body' hook and the + 'before_handler' hook, and the resulting string is placed into + either request.params or the request.body attribute. + + You may disable the automatic consumption of the rfile by setting + request.process_request_body to False, either in config for the desired + path, or in an 'on_start_resource' or 'before_request_body' hook. + + WARNING: In almost every case, you should not attempt to read from the + rfile stream after CherryPy's automatic mechanism has read it. If you + turn off the automatic parsing of rfile, you should read exactly the + number of bytes specified in request.headers['Content-Length']. + Ignoring either of these warnings may result in a hung request thread + or in corruption of the next (pipelined) request. + """ + + process_request_body = True + """ + If True, the rfile (if any) is automatically read and parsed, + and the result placed into request.params or request.body.""" + + methods_with_bodies = ("POST", "PUT") + """ + A sequence of HTTP methods for which CherryPy will automatically + attempt to read a body from the rfile. If you are going to change + this property, modify it on the configuration (recommended) + or on the "hook point" `on_start_resource`. + """ + + body = None + """ + If the request Content-Type is 'application/x-www-form-urlencoded' + or multipart, this will be None. Otherwise, this will be an instance + of :class:`RequestBody` (which you + can .read()); this value is set between the 'before_request_body' and + 'before_handler' hooks (assuming that process_request_body is True).""" + + # Dispatch attributes + dispatch = cherrypy.dispatch.Dispatcher() + """ + The object which looks up the 'page handler' callable and collects + config for the current request based on the path_info, other + request attributes, and the application architecture. The core + calls the dispatcher as early as possible, passing it a 'path_info' + argument. + + The default dispatcher discovers the page handler by matching path_info + to a hierarchical arrangement of objects, starting at request.app.root. + See help(cherrypy.dispatch) for more information.""" + + script_name = "" + """ + The 'mount point' of the application which is handling this request. + + This attribute MUST NOT end in a slash. If the script_name refers to + the root of the URI, it MUST be an empty string (not "/"). + """ + + path_info = "/" + """ + The 'relative path' portion of the Request-URI. This is relative + to the script_name ('mount point') of the application which is + handling this request.""" + + login = None + """ + When authentication is used during the request processing this is + set to 'False' if it failed and to the 'username' value if it succeeded. + The default 'None' implies that no authentication happened.""" + + # Note that cherrypy.url uses "if request.app:" to determine whether + # the call is during a real HTTP request or not. So leave this None. + app = None + """The cherrypy.Application object which is handling this request.""" + + handler = None + """ + The function, method, or other callable which CherryPy will call to + produce the response. The discovery of the handler and the arguments + it will receive are determined by the request.dispatch object. + By default, the handler is discovered by walking a tree of objects + starting at request.app.root, and is then passed all HTTP params + (from the query string and POST body) as keyword arguments.""" + + toolmaps = {} + """ + A nested dict of all Toolboxes and Tools in effect for this request, + of the form: {Toolbox.namespace: {Tool.name: config dict}}.""" + + config = None + """ + A flat dict of all configuration entries which apply to the + current request. These entries are collected from global config, + application config (based on request.path_info), and from handler + config (exactly how is governed by the request.dispatch object in + effect for this request; by default, handler config can be attached + anywhere in the tree between request.app.root and the final handler, + and inherits downward).""" + + is_index = None + """ + This will be True if the current request is mapped to an 'index' + resource handler (also, a 'default' handler if path_info ends with + a slash). The value may be used to automatically redirect the + user-agent to a 'more canonical' URL which either adds or removes + the trailing slash. See cherrypy.tools.trailing_slash.""" + + hooks = HookMap(hookpoints) + """ + A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}. + Each key is a str naming the hook point, and each value is a list + of hooks which will be called at that hook point during this request. + The list of hooks is generally populated as early as possible (mostly + from Tools specified in config), but may be extended at any time. + See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools.""" + + error_response = cherrypy.HTTPError(500).set_response + """ + The no-arg callable which will handle unexpected, untrapped errors + during request processing. This is not used for expected exceptions + (like NotFound, HTTPError, or HTTPRedirect) which are raised in + response to expected conditions (those should be customized either + via request.error_page or by overriding HTTPError.set_response). + By default, error_response uses HTTPError(500) to return a generic + error response to the user-agent.""" + + error_page = {} + """ + A dict of {error code: response filename or callable} pairs. + + The error code must be an int representing a given HTTP error code, + or the string 'default', which will be used if no matching entry + is found for a given numeric code. + + If a filename is provided, the file should contain a Python string- + formatting template, and can expect by default to receive format + values with the mapping keys %(status)s, %(message)s, %(traceback)s, + and %(version)s. The set of format mappings can be extended by + overriding HTTPError.set_response. + + If a callable is provided, it will be called by default with keyword + arguments 'status', 'message', 'traceback', and 'version', as for a + string-formatting template. The callable must return a string or + iterable of strings which will be set to response.body. It may also + override headers or perform any other processing. + + If no entry is given for an error code, and no 'default' entry exists, + a default template will be used. + """ + + show_tracebacks = True + """ + If True, unexpected errors encountered during request processing will + include a traceback in the response body.""" + + show_mismatched_params = True + """ + If True, mismatched parameters encountered during PageHandler invocation + processing will be included in the response body.""" + + throws = (KeyboardInterrupt, SystemExit, cherrypy.InternalRedirect) + """The sequence of exceptions which Request.run does not trap.""" + + throw_errors = False + """ + If True, Request.run will not trap any errors (except HTTPRedirect and + HTTPError, which are more properly called 'exceptions', not errors).""" + + closed = False + """True once the close method has been called, False otherwise.""" + + stage = None + """ + A string containing the stage reached in the request-handling process. + This is useful when debugging a live server with hung requests.""" + + namespaces = _cpconfig.NamespaceSet( + **{"hooks": hooks_namespace, + "request": request_namespace, + "response": response_namespace, + "error_page": error_page_namespace, + "tools": cherrypy.tools, + }) + + def __init__(self, local_host, remote_host, scheme="http", + server_protocol="HTTP/1.1"): + """Populate a new Request object. + + local_host should be an httputil.Host object with the server info. + remote_host should be an httputil.Host object with the client info. + scheme should be a string, either "http" or "https". + """ + self.local = local_host + self.remote = remote_host + self.scheme = scheme + self.server_protocol = server_protocol + + self.closed = False + + # Put a *copy* of the class error_page into self. + self.error_page = self.error_page.copy() + + # Put a *copy* of the class namespaces into self. + self.namespaces = self.namespaces.copy() + + self.stage = None + + def close(self): + """Run cleanup code. (Core)""" + if not self.closed: + self.closed = True + self.stage = 'on_end_request' + self.hooks.run('on_end_request') + self.stage = 'close' + + def run(self, method, path, query_string, req_protocol, headers, rfile): + r"""Process the Request. (Core) + + method, path, query_string, and req_protocol should be pulled directly + from the Request-Line (e.g. "GET /path?key=val HTTP/1.0"). + + path + This should be %XX-unquoted, but query_string should not be. + + When using Python 2, they both MUST be byte strings, + not unicode strings. + + When using Python 3, they both MUST be unicode strings, + not byte strings, and preferably not bytes \x00-\xFF + disguised as unicode. + + headers + A list of (name, value) tuples. + + rfile + A file-like object containing the HTTP request entity. + + When run() is done, the returned object should have 3 attributes: + + * status, e.g. "200 OK" + * header_list, a list of (name, value) tuples + * body, an iterable yielding strings + + Consumer code (HTTP servers) should then access these response + attributes to build the outbound stream. + + """ + response = cherrypy.serving.response + self.stage = 'run' + try: + self.error_response = cherrypy.HTTPError(500).set_response + + self.method = method + path = path or "/" + self.query_string = query_string or '' + self.params = {} + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + rp = int(req_protocol[5]), int(req_protocol[7]) + sp = int(self.server_protocol[5]), int(self.server_protocol[7]) + self.protocol = min(rp, sp) + response.headers.protocol = self.protocol + + # Rebuild first line of the request (e.g. "GET /path HTTP/1.0"). + url = path + if query_string: + url += '?' + query_string + self.request_line = '%s %s %s' % (method, url, req_protocol) + + self.header_list = list(headers) + self.headers = httputil.HeaderMap() + + self.rfile = rfile + self.body = None + + self.cookie = SimpleCookie() + self.handler = None + + # path_info should be the path from the + # app root (script_name) to the handler. + self.script_name = self.app.script_name + self.path_info = pi = path[len(self.script_name):] + + self.stage = 'respond' + self.respond(pi) + + except self.throws: + raise + except: + if self.throw_errors: + raise + else: + # Failure in setup, error handler or finalize. Bypass them. + # Can't use handle_error because we may not have hooks yet. + cherrypy.log(traceback=True, severity=40) + if self.show_tracebacks: + body = format_exc() + else: + body = "" + r = bare_error(body) + response.output_status, response.header_list, response.body = r + + if self.method == "HEAD": + # HEAD requests MUST NOT return a message-body in the response. + response.body = [] + + try: + cherrypy.log.access() + except: + cherrypy.log.error(traceback=True) + + if response.timed_out: + raise cherrypy.TimeoutError() + + return response + + # Uncomment for stage debugging + # stage = property(lambda self: self._stage, lambda self, v: print(v)) + + def respond(self, path_info): + """Generate a response for the resource at self.path_info. (Core)""" + response = cherrypy.serving.response + try: + try: + try: + if self.app is None: + raise cherrypy.NotFound() + + # Get the 'Host' header, so we can HTTPRedirect properly. + self.stage = 'process_headers' + self.process_headers() + + # Make a copy of the class hooks + self.hooks = self.__class__.hooks.copy() + self.toolmaps = {} + + self.stage = 'get_resource' + self.get_resource(path_info) + + self.body = _cpreqbody.RequestBody( + self.rfile, self.headers, request_params=self.params) + + self.namespaces(self.config) + + self.stage = 'on_start_resource' + self.hooks.run('on_start_resource') + + # Parse the querystring + self.stage = 'process_query_string' + self.process_query_string() + + # Process the body + if self.process_request_body: + if self.method not in self.methods_with_bodies: + self.process_request_body = False + self.stage = 'before_request_body' + self.hooks.run('before_request_body') + if self.process_request_body: + self.body.process() + + # Run the handler + self.stage = 'before_handler' + self.hooks.run('before_handler') + if self.handler: + self.stage = 'handler' + response.body = self.handler() + + # Finalize + self.stage = 'before_finalize' + self.hooks.run('before_finalize') + response.finalize() + except (cherrypy.HTTPRedirect, cherrypy.HTTPError): + inst = sys.exc_info()[1] + inst.set_response() + self.stage = 'before_finalize (HTTPError)' + self.hooks.run('before_finalize') + response.finalize() + finally: + self.stage = 'on_end_resource' + self.hooks.run('on_end_resource') + except self.throws: + raise + except: + if self.throw_errors: + raise + self.handle_error() + + def process_query_string(self): + """Parse the query string into Python structures. (Core)""" + try: + p = httputil.parse_query_string( + self.query_string, encoding=self.query_string_encoding) + except UnicodeDecodeError: + raise cherrypy.HTTPError( + 404, "The given query string could not be processed. Query " + "strings for this resource must be encoded with %r." % + self.query_string_encoding) + + # Python 2 only: keyword arguments must be byte strings (type 'str'). + if not py3k: + for key, value in p.items(): + if isinstance(key, unicode): + del p[key] + p[key.encode(self.query_string_encoding)] = value + self.params.update(p) + + def process_headers(self): + """Parse HTTP header data into Python structures. (Core)""" + # Process the headers into self.headers + headers = self.headers + for name, value in self.header_list: + # Call title() now (and use dict.__method__(headers)) + # so title doesn't have to be called twice. + name = name.title() + value = value.strip() + + # Warning: if there is more than one header entry for cookies + # (AFAIK, only Konqueror does that), only the last one will + # remain in headers (but they will be correctly stored in + # request.cookie). + if "=?" in value: + dict.__setitem__(headers, name, httputil.decode_TEXT(value)) + else: + dict.__setitem__(headers, name, value) + + # Handle cookies differently because on Konqueror, multiple + # cookies come on different lines with the same key + if name == 'Cookie': + try: + self.cookie.load(value) + except CookieError: + msg = "Illegal cookie name %s" % value.split('=')[0] + raise cherrypy.HTTPError(400, msg) + + if not dict.__contains__(headers, 'Host'): + # All Internet-based HTTP/1.1 servers MUST respond with a 400 + # (Bad Request) status code to any HTTP/1.1 request message + # which lacks a Host header field. + if self.protocol >= (1, 1): + msg = "HTTP/1.1 requires a 'Host' request header." + raise cherrypy.HTTPError(400, msg) + host = dict.get(headers, 'Host') + if not host: + host = self.local.name or self.local.ip + self.base = "%s://%s" % (self.scheme, host) + + def get_resource(self, path): + """Call a dispatcher (which sets self.handler and .config). (Core)""" + # First, see if there is a custom dispatch at this URI. Custom + # dispatchers can only be specified in app.config, not in _cp_config + # (since custom dispatchers may not even have an app.root). + dispatch = self.app.find_config( + path, "request.dispatch", self.dispatch) + + # dispatch() should set self.handler and self.config + dispatch(path) + + def handle_error(self): + """Handle the last unanticipated exception. (Core)""" + try: + self.hooks.run("before_error_response") + if self.error_response: + self.error_response() + self.hooks.run("after_error_response") + cherrypy.serving.response.finalize() + except cherrypy.HTTPRedirect: + inst = sys.exc_info()[1] + inst.set_response() + cherrypy.serving.response.finalize() + + # ------------------------- Properties ------------------------- # + + def _get_body_params(self): + warnings.warn( + "body_params is deprecated in CherryPy 3.2, will be removed in " + "CherryPy 3.3.", + DeprecationWarning + ) + return self.body.params + body_params = property(_get_body_params, + doc=""" + If the request Content-Type is 'application/x-www-form-urlencoded' or + multipart, this will be a dict of the params pulled from the entity + body; that is, it will be the portion of request.params that come + from the message body (sometimes called "POST params", although they + can be sent with various HTTP method verbs). This value is set between + the 'before_request_body' and 'before_handler' hooks (assuming that + process_request_body is True). + + Deprecated in 3.2, will be removed for 3.3 in favor of + :attr:`request.body.params`.""") + + +class ResponseBody(object): + + """The body of the HTTP response (the response entity).""" + + if py3k: + unicode_err = ("Page handlers MUST return bytes. Use tools.encode " + "if you wish to return unicode.") + + def __get__(self, obj, objclass=None): + if obj is None: + # When calling on the class instead of an instance... + return self + else: + return obj._body + + def __set__(self, obj, value): + # Convert the given value to an iterable object. + if py3k and isinstance(value, str): + raise ValueError(self.unicode_err) + + if isinstance(value, basestring): + # strings get wrapped in a list because iterating over a single + # item list is much faster than iterating over every character + # in a long string. + if value: + value = [value] + else: + # [''] doesn't evaluate to False, so replace it with []. + value = [] + elif py3k and isinstance(value, list): + # every item in a list must be bytes... + for i, item in enumerate(value): + if isinstance(item, str): + raise ValueError(self.unicode_err) + # Don't use isinstance here; io.IOBase which has an ABC takes + # 1000 times as long as, say, isinstance(value, str) + elif hasattr(value, 'read'): + value = file_generator(value) + elif value is None: + value = [] + obj._body = value + + +class Response(object): + + """An HTTP Response, including status, headers, and body.""" + + status = "" + """The HTTP Status-Code and Reason-Phrase.""" + + header_list = [] + """ + A list of the HTTP response headers as (name, value) tuples. + In general, you should use response.headers (a dict) instead. This + attribute is generated from response.headers and is not valid until + after the finalize phase.""" + + headers = httputil.HeaderMap() + """ + A dict-like object containing the response headers. Keys are header + names (in Title-Case format); however, you may get and set them in + a case-insensitive manner. That is, headers['Content-Type'] and + headers['content-type'] refer to the same value. Values are header + values (decoded according to :rfc:`2047` if necessary). + + .. seealso:: classes :class:`HeaderMap`, :class:`HeaderElement` + """ + + cookie = SimpleCookie() + """See help(Cookie).""" + + body = ResponseBody() + """The body (entity) of the HTTP response.""" + + time = None + """The value of time.time() when created. Use in HTTP dates.""" + + timeout = 300 + """Seconds after which the response will be aborted.""" + + timed_out = False + """ + Flag to indicate the response should be aborted, because it has + exceeded its timeout.""" + + stream = False + """If False, buffer the response body.""" + + def __init__(self): + self.status = None + self.header_list = None + self._body = [] + self.time = time.time() + + self.headers = httputil.HeaderMap() + # Since we know all our keys are titled strings, we can + # bypass HeaderMap.update and get a big speed boost. + dict.update(self.headers, { + "Content-Type": 'text/html', + "Server": "CherryPy/" + cherrypy.__version__, + "Date": httputil.HTTPDate(self.time), + }) + self.cookie = SimpleCookie() + + def collapse_body(self): + """Collapse self.body to a single string; replace it and return it.""" + if isinstance(self.body, basestring): + return self.body + + newbody = [] + for chunk in self.body: + if py3k and not isinstance(chunk, bytes): + raise TypeError("Chunk %s is not of type 'bytes'." % + repr(chunk)) + newbody.append(chunk) + newbody = ntob('').join(newbody) + + self.body = newbody + return newbody + + def finalize(self): + """Transform headers (and cookies) into self.header_list. (Core)""" + try: + code, reason, _ = httputil.valid_status(self.status) + except ValueError: + raise cherrypy.HTTPError(500, sys.exc_info()[1].args[0]) + + headers = self.headers + + self.status = "%s %s" % (code, reason) + self.output_status = ntob(str(code), 'ascii') + \ + ntob(" ") + headers.encode(reason) + + if self.stream: + # The upshot: wsgiserver will chunk the response if + # you pop Content-Length (or set it explicitly to None). + # Note that lib.static sets C-L to the file's st_size. + if dict.get(headers, 'Content-Length') is None: + dict.pop(headers, 'Content-Length', None) + elif code < 200 or code in (204, 205, 304): + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." + dict.pop(headers, 'Content-Length', None) + self.body = ntob("") + else: + # Responses which are not streamed should have a Content-Length, + # but allow user code to set Content-Length if desired. + if dict.get(headers, 'Content-Length') is None: + content = self.collapse_body() + dict.__setitem__(headers, 'Content-Length', len(content)) + + # Transform our header dict into a list of tuples. + self.header_list = h = headers.output() + + cookie = self.cookie.output() + if cookie: + for line in cookie.split("\n"): + if line.endswith("\r"): + # Python 2.4 emits cookies joined by LF but 2.5+ by CRLF. + line = line[:-1] + name, value = line.split(": ", 1) + if isinstance(name, unicodestr): + name = name.encode("ISO-8859-1") + if isinstance(value, unicodestr): + value = headers.encode(value) + h.append((name, value)) + + def check_timeout(self): + """If now > self.time + self.timeout, set self.timed_out. + + This purposefully sets a flag, rather than raising an error, + so that a monitor thread can interrupt the Response thread. + """ + if time.time() > self.time + self.timeout: + self.timed_out = True diff --git a/lib/cherrypy/_cpserver.py b/lib/cherrypy/_cpserver.py new file mode 100644 index 00000000..a31e7428 --- /dev/null +++ b/lib/cherrypy/_cpserver.py @@ -0,0 +1,226 @@ +"""Manage HTTP servers with CherryPy.""" + +import warnings + +import cherrypy +from cherrypy.lib import attributes +from cherrypy._cpcompat import basestring, py3k + +# We import * because we want to export check_port +# et al as attributes of this module. +from cherrypy.process.servers import * + + +class Server(ServerAdapter): + + """An adapter for an HTTP server. + + You can set attributes (like socket_host and socket_port) + on *this* object (which is probably cherrypy.server), and call + quickstart. For example:: + + cherrypy.server.socket_port = 80 + cherrypy.quickstart() + """ + + socket_port = 8080 + """The TCP port on which to listen for connections.""" + + _socket_host = '127.0.0.1' + + def _get_socket_host(self): + return self._socket_host + + def _set_socket_host(self, value): + if value == '': + raise ValueError("The empty string ('') is not an allowed value. " + "Use '0.0.0.0' instead to listen on all active " + "interfaces (INADDR_ANY).") + self._socket_host = value + socket_host = property( + _get_socket_host, + _set_socket_host, + doc="""The hostname or IP address on which to listen for connections. + + Host values may be any IPv4 or IPv6 address, or any valid hostname. + The string 'localhost' is a synonym for '127.0.0.1' (or '::1', if + your hosts file prefers IPv6). The string '0.0.0.0' is a special + IPv4 entry meaning "any active interface" (INADDR_ANY), and '::' + is the similar IN6ADDR_ANY for IPv6. The empty string or None are + not allowed.""") + + socket_file = None + """If given, the name of the UNIX socket to use instead of TCP/IP. + + When this option is not None, the `socket_host` and `socket_port` options + are ignored.""" + + socket_queue_size = 5 + """The 'backlog' argument to socket.listen(); specifies the maximum number + of queued connections (default 5).""" + + socket_timeout = 10 + """The timeout in seconds for accepted connections (default 10).""" + + accepted_queue_size = -1 + """The maximum number of requests which will be queued up before + the server refuses to accept it (default -1, meaning no limit).""" + + accepted_queue_timeout = 10 + """The timeout in seconds for attempting to add a request to the + queue when the queue is full (default 10).""" + + shutdown_timeout = 5 + """The time to wait for HTTP worker threads to clean up.""" + + protocol_version = 'HTTP/1.1' + """The version string to write in the Status-Line of all HTTP responses, + for example, "HTTP/1.1" (the default). Depending on the HTTP server used, + this should also limit the supported features used in the response.""" + + thread_pool = 10 + """The number of worker threads to start up in the pool.""" + + thread_pool_max = -1 + """The maximum size of the worker-thread pool. Use -1 to indicate no limit. + """ + + max_request_header_size = 500 * 1024 + """The maximum number of bytes allowable in the request headers. + If exceeded, the HTTP server should return "413 Request Entity Too Large". + """ + + max_request_body_size = 100 * 1024 * 1024 + """The maximum number of bytes allowable in the request body. If exceeded, + the HTTP server should return "413 Request Entity Too Large".""" + + instance = None + """If not None, this should be an HTTP server instance (such as + CPWSGIServer) which cherrypy.server will control. Use this when you need + more control over object instantiation than is available in the various + configuration options.""" + + ssl_context = None + """When using PyOpenSSL, an instance of SSL.Context.""" + + ssl_certificate = None + """The filename of the SSL certificate to use.""" + + ssl_certificate_chain = None + """When using PyOpenSSL, the certificate chain to pass to + Context.load_verify_locations.""" + + ssl_private_key = None + """The filename of the private key to use with SSL.""" + + if py3k: + ssl_module = 'builtin' + """The name of a registered SSL adaptation module to use with + the builtin WSGI server. Builtin options are: 'builtin' (to + use the SSL library built into recent versions of Python). + You may also register your own classes in the + wsgiserver.ssl_adapters dict.""" + else: + ssl_module = 'pyopenssl' + """The name of a registered SSL adaptation module to use with the + builtin WSGI server. Builtin options are 'builtin' (to use the SSL + library built into recent versions of Python) and 'pyopenssl' (to + use the PyOpenSSL project, which you must install separately). You + may also register your own classes in the wsgiserver.ssl_adapters + dict.""" + + statistics = False + """Turns statistics-gathering on or off for aware HTTP servers.""" + + nodelay = True + """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" + + wsgi_version = (1, 0) + """The WSGI version tuple to use with the builtin WSGI server. + The provided options are (1, 0) [which includes support for PEP 3333, + which declares it covers WSGI version 1.0.1 but still mandates the + wsgi.version (1, 0)] and ('u', 0), an experimental unicode version. + You may create and register your own experimental versions of the WSGI + protocol by adding custom classes to the wsgiserver.wsgi_gateways dict.""" + + def __init__(self): + self.bus = cherrypy.engine + self.httpserver = None + self.interrupt = None + self.running = False + + def httpserver_from_self(self, httpserver=None): + """Return a (httpserver, bind_addr) pair based on self attributes.""" + if httpserver is None: + httpserver = self.instance + if httpserver is None: + from cherrypy import _cpwsgi_server + httpserver = _cpwsgi_server.CPWSGIServer(self) + if isinstance(httpserver, basestring): + # Is anyone using this? Can I add an arg? + httpserver = attributes(httpserver)(self) + return httpserver, self.bind_addr + + def start(self): + """Start the HTTP server.""" + if not self.httpserver: + self.httpserver, self.bind_addr = self.httpserver_from_self() + ServerAdapter.start(self) + start.priority = 75 + + def _get_bind_addr(self): + if self.socket_file: + return self.socket_file + if self.socket_host is None and self.socket_port is None: + return None + return (self.socket_host, self.socket_port) + + def _set_bind_addr(self, value): + if value is None: + self.socket_file = None + self.socket_host = None + self.socket_port = None + elif isinstance(value, basestring): + self.socket_file = value + self.socket_host = None + self.socket_port = None + else: + try: + self.socket_host, self.socket_port = value + self.socket_file = None + except ValueError: + raise ValueError("bind_addr must be a (host, port) tuple " + "(for TCP sockets) or a string (for Unix " + "domain sockets), not %r" % value) + bind_addr = property( + _get_bind_addr, + _set_bind_addr, + doc='A (host, port) tuple for TCP sockets or ' + 'a str for Unix domain sockets.') + + def base(self): + """Return the base (scheme://host[:port] or sock file) for this server. + """ + if self.socket_file: + return self.socket_file + + host = self.socket_host + if host in ('0.0.0.0', '::'): + # 0.0.0.0 is INADDR_ANY and :: is IN6ADDR_ANY. + # Look up the host name, which should be the + # safest thing to spit out in a URL. + import socket + host = socket.gethostname() + + port = self.socket_port + + if self.ssl_certificate: + scheme = "https" + if port != 443: + host += ":%s" % port + else: + scheme = "http" + if port != 80: + host += ":%s" % port + + return "%s://%s" % (scheme, host) diff --git a/lib/cherrypy/_cpthreadinglocal.py b/lib/cherrypy/_cpthreadinglocal.py new file mode 100644 index 00000000..238c3224 --- /dev/null +++ b/lib/cherrypy/_cpthreadinglocal.py @@ -0,0 +1,241 @@ +# This is a backport of Python-2.4's threading.local() implementation + +"""Thread-local objects + +(Note that this module provides a Python version of thread + threading.local class. Depending on the version of Python you're + using, there may be a faster one available. You should always import + the local class from threading.) + +Thread-local objects support the management of thread-local data. +If you have data that you want to be local to a thread, simply create +a thread-local object and use its attributes: + + >>> mydata = local() + >>> mydata.number = 42 + >>> mydata.number + 42 + +You can also access the local-object's dictionary: + + >>> mydata.__dict__ + {'number': 42} + >>> mydata.__dict__.setdefault('widgets', []) + [] + >>> mydata.widgets + [] + +What's important about thread-local objects is that their data are +local to a thread. If we access the data in a different thread: + + >>> log = [] + >>> def f(): + ... items = mydata.__dict__.items() + ... items.sort() + ... log.append(items) + ... mydata.number = 11 + ... log.append(mydata.number) + + >>> import threading + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + >>> log + [[], 11] + +we get different data. Furthermore, changes made in the other thread +don't affect data seen in this thread: + + >>> mydata.number + 42 + +Of course, values you get from a local object, including a __dict__ +attribute, are for whatever thread was current at the time the +attribute was read. For that reason, you generally don't want to save +these values across threads, as they apply only to the thread they +came from. + +You can create custom local objects by subclassing the local class: + + >>> class MyLocal(local): + ... number = 2 + ... initialized = False + ... def __init__(self, **kw): + ... if self.initialized: + ... raise SystemError('__init__ called too many times') + ... self.initialized = True + ... self.__dict__.update(kw) + ... def squared(self): + ... return self.number ** 2 + +This can be useful to support default values, methods and +initialization. Note that if you define an __init__ method, it will be +called each time the local object is used in a separate thread. This +is necessary to initialize each thread's dictionary. + +Now if we create a local object: + + >>> mydata = MyLocal(color='red') + +Now we have a default number: + + >>> mydata.number + 2 + +an initial color: + + >>> mydata.color + 'red' + >>> del mydata.color + +And a method that operates on the data: + + >>> mydata.squared() + 4 + +As before, we can access the data in a separate thread: + + >>> log = [] + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + >>> log + [[('color', 'red'), ('initialized', True)], 11] + +without affecting this thread's data: + + >>> mydata.number + 2 + >>> mydata.color + Traceback (most recent call last): + ... + AttributeError: 'MyLocal' object has no attribute 'color' + +Note that subclasses can define slots, but they are not thread +local. They are shared across threads: + + >>> class MyLocal(local): + ... __slots__ = 'number' + + >>> mydata = MyLocal() + >>> mydata.number = 42 + >>> mydata.color = 'red' + +So, the separate thread: + + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + +affects what we see: + + >>> mydata.number + 11 + +>>> del mydata +""" + +# Threading import is at end + + +class _localbase(object): + __slots__ = '_local__key', '_local__args', '_local__lock' + + def __new__(cls, *args, **kw): + self = object.__new__(cls) + key = 'thread.local.' + str(id(self)) + object.__setattr__(self, '_local__key', key) + object.__setattr__(self, '_local__args', (args, kw)) + object.__setattr__(self, '_local__lock', RLock()) + + if args or kw and (cls.__init__ is object.__init__): + raise TypeError("Initialization arguments are not supported") + + # We need to create the thread dict in anticipation of + # __init__ being called, to make sure we don't call it + # again ourselves. + dict = object.__getattribute__(self, '__dict__') + currentThread().__dict__[key] = dict + + return self + + +def _patch(self): + key = object.__getattribute__(self, '_local__key') + d = currentThread().__dict__.get(key) + if d is None: + d = {} + currentThread().__dict__[key] = d + object.__setattr__(self, '__dict__', d) + + # we have a new instance dict, so call out __init__ if we have + # one + cls = type(self) + if cls.__init__ is not object.__init__: + args, kw = object.__getattribute__(self, '_local__args') + cls.__init__(self, *args, **kw) + else: + object.__setattr__(self, '__dict__', d) + + +class local(_localbase): + + def __getattribute__(self, name): + lock = object.__getattribute__(self, '_local__lock') + lock.acquire() + try: + _patch(self) + return object.__getattribute__(self, name) + finally: + lock.release() + + def __setattr__(self, name, value): + lock = object.__getattribute__(self, '_local__lock') + lock.acquire() + try: + _patch(self) + return object.__setattr__(self, name, value) + finally: + lock.release() + + def __delattr__(self, name): + lock = object.__getattribute__(self, '_local__lock') + lock.acquire() + try: + _patch(self) + return object.__delattr__(self, name) + finally: + lock.release() + + def __del__(): + threading_enumerate = enumerate + __getattribute__ = object.__getattribute__ + + def __del__(self): + key = __getattribute__(self, '_local__key') + + try: + threads = list(threading_enumerate()) + except: + # if enumerate fails, as it seems to do during + # shutdown, we'll skip cleanup under the assumption + # that there is nothing to clean up + return + + for thread in threads: + try: + __dict__ = thread.__dict__ + except AttributeError: + # Thread is dying, rest in peace + continue + + if key in __dict__: + try: + del __dict__[key] + except KeyError: + pass # didn't have anything in this thread + + return __del__ + __del__ = __del__() + +from threading import currentThread, enumerate, RLock diff --git a/lib/cherrypy/_cptools.py b/lib/cherrypy/_cptools.py new file mode 100644 index 00000000..06a56e87 --- /dev/null +++ b/lib/cherrypy/_cptools.py @@ -0,0 +1,529 @@ +"""CherryPy tools. A "tool" is any helper, adapted to CP. + +Tools are usually designed to be used in a variety of ways (although some +may only offer one if they choose): + + Library calls + All tools are callables that can be used wherever needed. + The arguments are straightforward and should be detailed within the + docstring. + + Function decorators + All tools, when called, may be used as decorators which configure + individual CherryPy page handlers (methods on the CherryPy tree). + That is, "@tools.anytool()" should "turn on" the tool via the + decorated function's _cp_config attribute. + + CherryPy config + If a tool exposes a "_setup" callable, it will be called + once per Request (if the feature is "turned on" via config). + +Tools may be implemented as any object with a namespace. The builtins +are generally either modules or instances of the tools.Tool class. +""" + +import sys +import warnings + +import cherrypy + + +def _getargs(func): + """Return the names of all static arguments to the given function.""" + # Use this instead of importing inspect for less mem overhead. + import types + if sys.version_info >= (3, 0): + if isinstance(func, types.MethodType): + func = func.__func__ + co = func.__code__ + else: + if isinstance(func, types.MethodType): + func = func.im_func + co = func.func_code + return co.co_varnames[:co.co_argcount] + + +_attr_error = ( + "CherryPy Tools cannot be turned on directly. Instead, turn them " + "on via config, or use them as decorators on your page handlers." +) + + +class Tool(object): + + """A registered function for use with CherryPy request-processing hooks. + + help(tool.callable) should give you more information about this Tool. + """ + + namespace = "tools" + + def __init__(self, point, callable, name=None, priority=50): + self._point = point + self.callable = callable + self._name = name + self._priority = priority + self.__doc__ = self.callable.__doc__ + self._setargs() + + def _get_on(self): + raise AttributeError(_attr_error) + + def _set_on(self, value): + raise AttributeError(_attr_error) + on = property(_get_on, _set_on) + + def _setargs(self): + """Copy func parameter names to obj attributes.""" + try: + for arg in _getargs(self.callable): + setattr(self, arg, None) + except (TypeError, AttributeError): + if hasattr(self.callable, "__call__"): + for arg in _getargs(self.callable.__call__): + setattr(self, arg, None) + # IronPython 1.0 raises NotImplementedError because + # inspect.getargspec tries to access Python bytecode + # in co_code attribute. + except NotImplementedError: + pass + # IronPython 1B1 may raise IndexError in some cases, + # but if we trap it here it doesn't prevent CP from working. + except IndexError: + pass + + def _merged_args(self, d=None): + """Return a dict of configuration entries for this Tool.""" + if d: + conf = d.copy() + else: + conf = {} + + tm = cherrypy.serving.request.toolmaps[self.namespace] + if self._name in tm: + conf.update(tm[self._name]) + + if "on" in conf: + del conf["on"] + + return conf + + def __call__(self, *args, **kwargs): + """Compile-time decorator (turn on the tool in config). + + For example:: + + @tools.proxy() + def whats_my_base(self): + return cherrypy.request.base + whats_my_base.exposed = True + """ + if args: + raise TypeError("The %r Tool does not accept positional " + "arguments; you must use keyword arguments." + % self._name) + + def tool_decorator(f): + if not hasattr(f, "_cp_config"): + f._cp_config = {} + subspace = self.namespace + "." + self._name + "." + f._cp_config[subspace + "on"] = True + for k, v in kwargs.items(): + f._cp_config[subspace + k] = v + return f + return tool_decorator + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + conf = self._merged_args() + p = conf.pop("priority", None) + if p is None: + p = getattr(self.callable, "priority", self._priority) + cherrypy.serving.request.hooks.attach(self._point, self.callable, + priority=p, **conf) + + +class HandlerTool(Tool): + + """Tool which is called 'before main', that may skip normal handlers. + + If the tool successfully handles the request (by setting response.body), + if should return True. This will cause CherryPy to skip any 'normal' page + handler. If the tool did not handle the request, it should return False + to tell CherryPy to continue on and call the normal page handler. If the + tool is declared AS a page handler (see the 'handler' method), returning + False will raise NotFound. + """ + + def __init__(self, callable, name=None): + Tool.__init__(self, 'before_handler', callable, name) + + def handler(self, *args, **kwargs): + """Use this tool as a CherryPy page handler. + + For example:: + + class Root: + nav = tools.staticdir.handler(section="/nav", dir="nav", + root=absDir) + """ + def handle_func(*a, **kw): + handled = self.callable(*args, **self._merged_args(kwargs)) + if not handled: + raise cherrypy.NotFound() + return cherrypy.serving.response.body + handle_func.exposed = True + return handle_func + + def _wrapper(self, **kwargs): + if self.callable(**kwargs): + cherrypy.serving.request.handler = None + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + conf = self._merged_args() + p = conf.pop("priority", None) + if p is None: + p = getattr(self.callable, "priority", self._priority) + cherrypy.serving.request.hooks.attach(self._point, self._wrapper, + priority=p, **conf) + + +class HandlerWrapperTool(Tool): + + """Tool which wraps request.handler in a provided wrapper function. + + The 'newhandler' arg must be a handler wrapper function that takes a + 'next_handler' argument, plus ``*args`` and ``**kwargs``. Like all + page handler + functions, it must return an iterable for use as cherrypy.response.body. + + For example, to allow your 'inner' page handlers to return dicts + which then get interpolated into a template:: + + def interpolator(next_handler, *args, **kwargs): + filename = cherrypy.request.config.get('template') + cherrypy.response.template = env.get_template(filename) + response_dict = next_handler(*args, **kwargs) + return cherrypy.response.template.render(**response_dict) + cherrypy.tools.jinja = HandlerWrapperTool(interpolator) + """ + + def __init__(self, newhandler, point='before_handler', name=None, + priority=50): + self.newhandler = newhandler + self._point = point + self._name = name + self._priority = priority + + def callable(self, *args, **kwargs): + innerfunc = cherrypy.serving.request.handler + + def wrap(*args, **kwargs): + return self.newhandler(innerfunc, *args, **kwargs) + cherrypy.serving.request.handler = wrap + + +class ErrorTool(Tool): + + """Tool which is used to replace the default request.error_response.""" + + def __init__(self, callable, name=None): + Tool.__init__(self, None, callable, name) + + def _wrapper(self): + self.callable(**self._merged_args()) + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + cherrypy.serving.request.error_response = self._wrapper + + +# Builtin tools # + +from cherrypy.lib import cptools, encoding, auth, static, jsontools +from cherrypy.lib import sessions as _sessions, xmlrpcutil as _xmlrpc +from cherrypy.lib import caching as _caching +from cherrypy.lib import auth_basic, auth_digest + + +class SessionTool(Tool): + + """Session Tool for CherryPy. + + sessions.locking + When 'implicit' (the default), the session will be locked for you, + just before running the page handler. + + When 'early', the session will be locked before reading the request + body. This is off by default for safety reasons; for example, + a large upload would block the session, denying an AJAX + progress meter + (`issue `_). + + When 'explicit' (or any other value), you need to call + cherrypy.session.acquire_lock() yourself before using + session data. + """ + + def __init__(self): + # _sessions.init must be bound after headers are read + Tool.__init__(self, 'before_request_body', _sessions.init) + + def _lock_session(self): + cherrypy.serving.session.acquire_lock() + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + hooks = cherrypy.serving.request.hooks + + conf = self._merged_args() + + p = conf.pop("priority", None) + if p is None: + p = getattr(self.callable, "priority", self._priority) + + hooks.attach(self._point, self.callable, priority=p, **conf) + + locking = conf.pop('locking', 'implicit') + if locking == 'implicit': + hooks.attach('before_handler', self._lock_session) + elif locking == 'early': + # Lock before the request body (but after _sessions.init runs!) + hooks.attach('before_request_body', self._lock_session, + priority=60) + else: + # Don't lock + pass + + hooks.attach('before_finalize', _sessions.save) + hooks.attach('on_end_request', _sessions.close) + + def regenerate(self): + """Drop the current session and make a new one (with a new id).""" + sess = cherrypy.serving.session + sess.regenerate() + + # Grab cookie-relevant tool args + conf = dict([(k, v) for k, v in self._merged_args().items() + if k in ('path', 'path_header', 'name', 'timeout', + 'domain', 'secure')]) + _sessions.set_response_cookie(**conf) + + +class XMLRPCController(object): + + """A Controller (page handler collection) for XML-RPC. + + To use it, have your controllers subclass this base class (it will + turn on the tool for you). + + You can also supply the following optional config entries:: + + tools.xmlrpc.encoding: 'utf-8' + tools.xmlrpc.allow_none: 0 + + XML-RPC is a rather discontinuous layer over HTTP; dispatching to the + appropriate handler must first be performed according to the URL, and + then a second dispatch step must take place according to the RPC method + specified in the request body. It also allows a superfluous "/RPC2" + prefix in the URL, supplies its own handler args in the body, and + requires a 200 OK "Fault" response instead of 404 when the desired + method is not found. + + Therefore, XML-RPC cannot be implemented for CherryPy via a Tool alone. + This Controller acts as the dispatch target for the first half (based + on the URL); it then reads the RPC method from the request body and + does its own second dispatch step based on that method. It also reads + body params, and returns a Fault on error. + + The XMLRPCDispatcher strips any /RPC2 prefix; if you aren't using /RPC2 + in your URL's, you can safely skip turning on the XMLRPCDispatcher. + Otherwise, you need to use declare it in config:: + + request.dispatch: cherrypy.dispatch.XMLRPCDispatcher() + """ + + # Note we're hard-coding this into the 'tools' namespace. We could do + # a huge amount of work to make it relocatable, but the only reason why + # would be if someone actually disabled the default_toolbox. Meh. + _cp_config = {'tools.xmlrpc.on': True} + + def default(self, *vpath, **params): + rpcparams, rpcmethod = _xmlrpc.process_body() + + subhandler = self + for attr in str(rpcmethod).split('.'): + subhandler = getattr(subhandler, attr, None) + + if subhandler and getattr(subhandler, "exposed", False): + body = subhandler(*(vpath + rpcparams), **params) + + else: + # https://bitbucket.org/cherrypy/cherrypy/issue/533 + # if a method is not found, an xmlrpclib.Fault should be returned + # raising an exception here will do that; see + # cherrypy.lib.xmlrpcutil.on_error + raise Exception('method "%s" is not supported' % attr) + + conf = cherrypy.serving.request.toolmaps['tools'].get("xmlrpc", {}) + _xmlrpc.respond(body, + conf.get('encoding', 'utf-8'), + conf.get('allow_none', 0)) + return cherrypy.serving.response.body + default.exposed = True + + +class SessionAuthTool(HandlerTool): + + def _setargs(self): + for name in dir(cptools.SessionAuth): + if not name.startswith("__"): + setattr(self, name, None) + + +class CachingTool(Tool): + + """Caching Tool for CherryPy.""" + + def _wrapper(self, **kwargs): + request = cherrypy.serving.request + if _caching.get(**kwargs): + request.handler = None + else: + if request.cacheable: + # Note the devious technique here of adding hooks on the fly + request.hooks.attach('before_finalize', _caching.tee_output, + priority=90) + _wrapper.priority = 20 + + def _setup(self): + """Hook caching into cherrypy.request.""" + conf = self._merged_args() + + p = conf.pop("priority", None) + cherrypy.serving.request.hooks.attach('before_handler', self._wrapper, + priority=p, **conf) + + +class Toolbox(object): + + """A collection of Tools. + + This object also functions as a config namespace handler for itself. + Custom toolboxes should be added to each Application's toolboxes dict. + """ + + def __init__(self, namespace): + self.namespace = namespace + + def __setattr__(self, name, value): + # If the Tool._name is None, supply it from the attribute name. + if isinstance(value, Tool): + if value._name is None: + value._name = name + value.namespace = self.namespace + object.__setattr__(self, name, value) + + def __enter__(self): + """Populate request.toolmaps from tools specified in config.""" + cherrypy.serving.request.toolmaps[self.namespace] = map = {} + + def populate(k, v): + toolname, arg = k.split(".", 1) + bucket = map.setdefault(toolname, {}) + bucket[arg] = v + return populate + + def __exit__(self, exc_type, exc_val, exc_tb): + """Run tool._setup() for each tool in our toolmap.""" + map = cherrypy.serving.request.toolmaps.get(self.namespace) + if map: + for name, settings in map.items(): + if settings.get("on", False): + tool = getattr(self, name) + tool._setup() + + +class DeprecatedTool(Tool): + + _name = None + warnmsg = "This Tool is deprecated." + + def __init__(self, point, warnmsg=None): + self.point = point + if warnmsg is not None: + self.warnmsg = warnmsg + + def __call__(self, *args, **kwargs): + warnings.warn(self.warnmsg) + + def tool_decorator(f): + return f + return tool_decorator + + def _setup(self): + warnings.warn(self.warnmsg) + + +default_toolbox = _d = Toolbox("tools") +_d.session_auth = SessionAuthTool(cptools.session_auth) +_d.allow = Tool('on_start_resource', cptools.allow) +_d.proxy = Tool('before_request_body', cptools.proxy, priority=30) +_d.response_headers = Tool('on_start_resource', cptools.response_headers) +_d.log_tracebacks = Tool('before_error_response', cptools.log_traceback) +_d.log_headers = Tool('before_error_response', cptools.log_request_headers) +_d.log_hooks = Tool('on_end_request', cptools.log_hooks, priority=100) +_d.err_redirect = ErrorTool(cptools.redirect) +_d.etags = Tool('before_finalize', cptools.validate_etags, priority=75) +_d.decode = Tool('before_request_body', encoding.decode) +# the order of encoding, gzip, caching is important +_d.encode = Tool('before_handler', encoding.ResponseEncoder, priority=70) +_d.gzip = Tool('before_finalize', encoding.gzip, priority=80) +_d.staticdir = HandlerTool(static.staticdir) +_d.staticfile = HandlerTool(static.staticfile) +_d.sessions = SessionTool() +_d.xmlrpc = ErrorTool(_xmlrpc.on_error) +_d.caching = CachingTool('before_handler', _caching.get, 'caching') +_d.expires = Tool('before_finalize', _caching.expires) +_d.tidy = DeprecatedTool( + 'before_finalize', + "The tidy tool has been removed from the standard distribution of " + "CherryPy. The most recent version can be found at " + "http://tools.cherrypy.org/browser.") +_d.nsgmls = DeprecatedTool( + 'before_finalize', + "The nsgmls tool has been removed from the standard distribution of " + "CherryPy. The most recent version can be found at " + "http://tools.cherrypy.org/browser.") +_d.ignore_headers = Tool('before_request_body', cptools.ignore_headers) +_d.referer = Tool('before_request_body', cptools.referer) +_d.basic_auth = Tool('on_start_resource', auth.basic_auth) +_d.digest_auth = Tool('on_start_resource', auth.digest_auth) +_d.trailing_slash = Tool('before_handler', cptools.trailing_slash, priority=60) +_d.flatten = Tool('before_finalize', cptools.flatten) +_d.accept = Tool('on_start_resource', cptools.accept) +_d.redirect = Tool('on_start_resource', cptools.redirect) +_d.autovary = Tool('on_start_resource', cptools.autovary, priority=0) +_d.json_in = Tool('before_request_body', jsontools.json_in, priority=30) +_d.json_out = Tool('before_handler', jsontools.json_out, priority=30) +_d.auth_basic = Tool('before_handler', auth_basic.basic_auth, priority=1) +_d.auth_digest = Tool('before_handler', auth_digest.digest_auth, priority=1) + +del _d, cptools, encoding, auth, static diff --git a/lib/cherrypy/_cptree.py b/lib/cherrypy/_cptree.py new file mode 100644 index 00000000..a31b2793 --- /dev/null +++ b/lib/cherrypy/_cptree.py @@ -0,0 +1,299 @@ +"""CherryPy Application and Tree objects.""" + +import os + +import cherrypy +from cherrypy._cpcompat import ntou, py3k +from cherrypy import _cpconfig, _cplogging, _cprequest, _cpwsgi, tools +from cherrypy.lib import httputil + + +class Application(object): + + """A CherryPy Application. + + Servers and gateways should not instantiate Request objects directly. + Instead, they should ask an Application object for a request object. + + An instance of this class may also be used as a WSGI callable + (WSGI application object) for itself. + """ + + root = None + """The top-most container of page handlers for this app. Handlers should + be arranged in a hierarchy of attributes, matching the expected URI + hierarchy; the default dispatcher then searches this hierarchy for a + matching handler. When using a dispatcher other than the default, + this value may be None.""" + + config = {} + """A dict of {path: pathconf} pairs, where 'pathconf' is itself a dict + of {key: value} pairs.""" + + namespaces = _cpconfig.NamespaceSet() + toolboxes = {'tools': cherrypy.tools} + + log = None + """A LogManager instance. See _cplogging.""" + + wsgiapp = None + """A CPWSGIApp instance. See _cpwsgi.""" + + request_class = _cprequest.Request + response_class = _cprequest.Response + + relative_urls = False + + def __init__(self, root, script_name="", config=None): + self.log = _cplogging.LogManager(id(self), cherrypy.log.logger_root) + self.root = root + self.script_name = script_name + self.wsgiapp = _cpwsgi.CPWSGIApp(self) + + self.namespaces = self.namespaces.copy() + self.namespaces["log"] = lambda k, v: setattr(self.log, k, v) + self.namespaces["wsgi"] = self.wsgiapp.namespace_handler + + self.config = self.__class__.config.copy() + if config: + self.merge(config) + + def __repr__(self): + return "%s.%s(%r, %r)" % (self.__module__, self.__class__.__name__, + self.root, self.script_name) + + script_name_doc = """The URI "mount point" for this app. A mount point + is that portion of the URI which is constant for all URIs that are + serviced by this application; it does not include scheme, host, or proxy + ("virtual host") portions of the URI. + + For example, if script_name is "/my/cool/app", then the URL + "http://www.example.com/my/cool/app/page1" might be handled by a + "page1" method on the root object. + + The value of script_name MUST NOT end in a slash. If the script_name + refers to the root of the URI, it MUST be an empty string (not "/"). + + If script_name is explicitly set to None, then the script_name will be + provided for each call from request.wsgi_environ['SCRIPT_NAME']. + """ + + def _get_script_name(self): + if self._script_name is not None: + return self._script_name + + # A `_script_name` with a value of None signals that the script name + # should be pulled from WSGI environ. + return cherrypy.serving.request.wsgi_environ['SCRIPT_NAME'].rstrip("/") + + def _set_script_name(self, value): + if value: + value = value.rstrip("/") + self._script_name = value + script_name = property(fget=_get_script_name, fset=_set_script_name, + doc=script_name_doc) + + def merge(self, config): + """Merge the given config into self.config.""" + _cpconfig.merge(self.config, config) + + # Handle namespaces specified in config. + self.namespaces(self.config.get("/", {})) + + def find_config(self, path, key, default=None): + """Return the most-specific value for key along path, or default.""" + trail = path or "/" + while trail: + nodeconf = self.config.get(trail, {}) + + if key in nodeconf: + return nodeconf[key] + + lastslash = trail.rfind("/") + if lastslash == -1: + break + elif lastslash == 0 and trail != "/": + trail = "/" + else: + trail = trail[:lastslash] + + return default + + def get_serving(self, local, remote, scheme, sproto): + """Create and return a Request and Response object.""" + req = self.request_class(local, remote, scheme, sproto) + req.app = self + + for name, toolbox in self.toolboxes.items(): + req.namespaces[name] = toolbox + + resp = self.response_class() + cherrypy.serving.load(req, resp) + cherrypy.engine.publish('acquire_thread') + cherrypy.engine.publish('before_request') + + return req, resp + + def release_serving(self): + """Release the current serving (request and response).""" + req = cherrypy.serving.request + + cherrypy.engine.publish('after_request') + + try: + req.close() + except: + cherrypy.log(traceback=True, severity=40) + + cherrypy.serving.clear() + + def __call__(self, environ, start_response): + return self.wsgiapp(environ, start_response) + + +class Tree(object): + + """A registry of CherryPy applications, mounted at diverse points. + + An instance of this class may also be used as a WSGI callable + (WSGI application object), in which case it dispatches to all + mounted apps. + """ + + apps = {} + """ + A dict of the form {script name: application}, where "script name" + is a string declaring the URI mount point (no trailing slash), and + "application" is an instance of cherrypy.Application (or an arbitrary + WSGI callable if you happen to be using a WSGI server).""" + + def __init__(self): + self.apps = {} + + def mount(self, root, script_name="", config=None): + """Mount a new app from a root object, script_name, and config. + + root + An instance of a "controller class" (a collection of page + handler methods) which represents the root of the application. + This may also be an Application instance, or None if using + a dispatcher other than the default. + + script_name + A string containing the "mount point" of the application. + This should start with a slash, and be the path portion of the + URL at which to mount the given root. For example, if root.index() + will handle requests to "http://www.example.com:8080/dept/app1/", + then the script_name argument would be "/dept/app1". + + It MUST NOT end in a slash. If the script_name refers to the + root of the URI, it MUST be an empty string (not "/"). + + config + A file or dict containing application config. + """ + if script_name is None: + raise TypeError( + "The 'script_name' argument may not be None. Application " + "objects may, however, possess a script_name of None (in " + "order to inpect the WSGI environ for SCRIPT_NAME upon each " + "request). You cannot mount such Applications on this Tree; " + "you must pass them to a WSGI server interface directly.") + + # Next line both 1) strips trailing slash and 2) maps "/" -> "". + script_name = script_name.rstrip("/") + + if isinstance(root, Application): + app = root + if script_name != "" and script_name != app.script_name: + raise ValueError( + "Cannot specify a different script name and pass an " + "Application instance to cherrypy.mount") + script_name = app.script_name + else: + app = Application(root, script_name) + + # If mounted at "", add favicon.ico + if (script_name == "" and root is not None + and not hasattr(root, "favicon_ico")): + favicon = os.path.join(os.getcwd(), os.path.dirname(__file__), + "favicon.ico") + root.favicon_ico = tools.staticfile.handler(favicon) + + if config: + app.merge(config) + + self.apps[script_name] = app + + return app + + def graft(self, wsgi_callable, script_name=""): + """Mount a wsgi callable at the given script_name.""" + # Next line both 1) strips trailing slash and 2) maps "/" -> "". + script_name = script_name.rstrip("/") + self.apps[script_name] = wsgi_callable + + def script_name(self, path=None): + """The script_name of the app at the given path, or None. + + If path is None, cherrypy.request is used. + """ + if path is None: + try: + request = cherrypy.serving.request + path = httputil.urljoin(request.script_name, + request.path_info) + except AttributeError: + return None + + while True: + if path in self.apps: + return path + + if path == "": + return None + + # Move one node up the tree and try again. + path = path[:path.rfind("/")] + + def __call__(self, environ, start_response): + # If you're calling this, then you're probably setting SCRIPT_NAME + # to '' (some WSGI servers always set SCRIPT_NAME to ''). + # Try to look up the app using the full path. + env1x = environ + if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): + env1x = _cpwsgi.downgrade_wsgi_ux_to_1x(environ) + path = httputil.urljoin(env1x.get('SCRIPT_NAME', ''), + env1x.get('PATH_INFO', '')) + sn = self.script_name(path or "/") + if sn is None: + start_response('404 Not Found', []) + return [] + + app = self.apps[sn] + + # Correct the SCRIPT_NAME and PATH_INFO environ entries. + environ = environ.copy() + if not py3k: + if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): + # Python 2/WSGI u.0: all strings MUST be of type unicode + enc = environ[ntou('wsgi.url_encoding')] + environ[ntou('SCRIPT_NAME')] = sn.decode(enc) + environ[ntou('PATH_INFO')] = path[ + len(sn.rstrip("/")):].decode(enc) + else: + # Python 2/WSGI 1.x: all strings MUST be of type str + environ['SCRIPT_NAME'] = sn + environ['PATH_INFO'] = path[len(sn.rstrip("/")):] + else: + if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): + # Python 3/WSGI u.0: all strings MUST be full unicode + environ['SCRIPT_NAME'] = sn + environ['PATH_INFO'] = path[len(sn.rstrip("/")):] + else: + # Python 3/WSGI 1.x: all strings MUST be ISO-8859-1 str + environ['SCRIPT_NAME'] = sn.encode( + 'utf-8').decode('ISO-8859-1') + environ['PATH_INFO'] = path[ + len(sn.rstrip("/")):].encode('utf-8').decode('ISO-8859-1') + return app(environ, start_response) diff --git a/lib/cherrypy/_cpwsgi.py b/lib/cherrypy/_cpwsgi.py new file mode 100644 index 00000000..f6db68b0 --- /dev/null +++ b/lib/cherrypy/_cpwsgi.py @@ -0,0 +1,438 @@ +"""WSGI interface (see PEP 333 and 3333). + +Note that WSGI environ keys and values are 'native strings'; that is, +whatever the type of "" is. For Python 2, that's a byte string; for Python 3, +it's a unicode string. But PEP 3333 says: "even if Python's str type is +actually Unicode "under the hood", the content of native strings must +still be translatable to bytes via the Latin-1 encoding!" +""" + +import sys as _sys + +import cherrypy as _cherrypy +from cherrypy._cpcompat import BytesIO, bytestr, ntob, ntou, py3k, unicodestr +from cherrypy import _cperror +from cherrypy.lib import httputil +from cherrypy.lib import is_closable_iterator + +def downgrade_wsgi_ux_to_1x(environ): + """Return a new environ dict for WSGI 1.x from the given WSGI u.x environ. + """ + env1x = {} + + url_encoding = environ[ntou('wsgi.url_encoding')] + for k, v in list(environ.items()): + if k in [ntou('PATH_INFO'), ntou('SCRIPT_NAME'), ntou('QUERY_STRING')]: + v = v.encode(url_encoding) + elif isinstance(v, unicodestr): + v = v.encode('ISO-8859-1') + env1x[k.encode('ISO-8859-1')] = v + + return env1x + + +class VirtualHost(object): + + """Select a different WSGI application based on the Host header. + + This can be useful when running multiple sites within one CP server. + It allows several domains to point to different applications. For example:: + + root = Root() + RootApp = cherrypy.Application(root) + Domain2App = cherrypy.Application(root) + SecureApp = cherrypy.Application(Secure()) + + vhost = cherrypy._cpwsgi.VirtualHost(RootApp, + domains={'www.domain2.example': Domain2App, + 'www.domain2.example:443': SecureApp, + }) + + cherrypy.tree.graft(vhost) + """ + default = None + """Required. The default WSGI application.""" + + use_x_forwarded_host = True + """If True (the default), any "X-Forwarded-Host" + request header will be used instead of the "Host" header. This + is commonly added by HTTP servers (such as Apache) when proxying.""" + + domains = {} + """A dict of {host header value: application} pairs. + The incoming "Host" request header is looked up in this dict, + and, if a match is found, the corresponding WSGI application + will be called instead of the default. Note that you often need + separate entries for "example.com" and "www.example.com". + In addition, "Host" headers may contain the port number. + """ + + def __init__(self, default, domains=None, use_x_forwarded_host=True): + self.default = default + self.domains = domains or {} + self.use_x_forwarded_host = use_x_forwarded_host + + def __call__(self, environ, start_response): + domain = environ.get('HTTP_HOST', '') + if self.use_x_forwarded_host: + domain = environ.get("HTTP_X_FORWARDED_HOST", domain) + + nextapp = self.domains.get(domain) + if nextapp is None: + nextapp = self.default + return nextapp(environ, start_response) + + +class InternalRedirector(object): + + """WSGI middleware that handles raised cherrypy.InternalRedirect.""" + + def __init__(self, nextapp, recursive=False): + self.nextapp = nextapp + self.recursive = recursive + + def __call__(self, environ, start_response): + redirections = [] + while True: + environ = environ.copy() + try: + return self.nextapp(environ, start_response) + except _cherrypy.InternalRedirect: + ir = _sys.exc_info()[1] + sn = environ.get('SCRIPT_NAME', '') + path = environ.get('PATH_INFO', '') + qs = environ.get('QUERY_STRING', '') + + # Add the *previous* path_info + qs to redirections. + old_uri = sn + path + if qs: + old_uri += "?" + qs + redirections.append(old_uri) + + if not self.recursive: + # Check to see if the new URI has been redirected to + # already + new_uri = sn + ir.path + if ir.query_string: + new_uri += "?" + ir.query_string + if new_uri in redirections: + ir.request.close() + raise RuntimeError("InternalRedirector visited the " + "same URL twice: %r" % new_uri) + + # Munge the environment and try again. + environ['REQUEST_METHOD'] = "GET" + environ['PATH_INFO'] = ir.path + environ['QUERY_STRING'] = ir.query_string + environ['wsgi.input'] = BytesIO() + environ['CONTENT_LENGTH'] = "0" + environ['cherrypy.previous_request'] = ir.request + + +class ExceptionTrapper(object): + + """WSGI middleware that traps exceptions.""" + + def __init__(self, nextapp, throws=(KeyboardInterrupt, SystemExit)): + self.nextapp = nextapp + self.throws = throws + + def __call__(self, environ, start_response): + return _TrappedResponse( + self.nextapp, + environ, + start_response, + self.throws + ) + + +class _TrappedResponse(object): + + response = iter([]) + + def __init__(self, nextapp, environ, start_response, throws): + self.nextapp = nextapp + self.environ = environ + self.start_response = start_response + self.throws = throws + self.started_response = False + self.response = self.trap( + self.nextapp, self.environ, self.start_response) + self.iter_response = iter(self.response) + + def __iter__(self): + self.started_response = True + return self + + if py3k: + def __next__(self): + return self.trap(next, self.iter_response) + else: + def next(self): + return self.trap(self.iter_response.next) + + def close(self): + if hasattr(self.response, 'close'): + self.response.close() + + def trap(self, func, *args, **kwargs): + try: + return func(*args, **kwargs) + except self.throws: + raise + except StopIteration: + raise + except: + tb = _cperror.format_exc() + #print('trapped (started %s):' % self.started_response, tb) + _cherrypy.log(tb, severity=40) + if not _cherrypy.request.show_tracebacks: + tb = "" + s, h, b = _cperror.bare_error(tb) + if py3k: + # What fun. + s = s.decode('ISO-8859-1') + h = [(k.decode('ISO-8859-1'), v.decode('ISO-8859-1')) + for k, v in h] + if self.started_response: + # Empty our iterable (so future calls raise StopIteration) + self.iter_response = iter([]) + else: + self.iter_response = iter(b) + + try: + self.start_response(s, h, _sys.exc_info()) + except: + # "The application must not trap any exceptions raised by + # start_response, if it called start_response with exc_info. + # Instead, it should allow such exceptions to propagate + # back to the server or gateway." + # But we still log and call close() to clean up ourselves. + _cherrypy.log(traceback=True, severity=40) + raise + + if self.started_response: + return ntob("").join(b) + else: + return b + + +# WSGI-to-CP Adapter # + + +class AppResponse(object): + + """WSGI response iterable for CherryPy applications.""" + + def __init__(self, environ, start_response, cpapp): + self.cpapp = cpapp + try: + if not py3k: + if environ.get(ntou('wsgi.version')) == (ntou('u'), 0): + environ = downgrade_wsgi_ux_to_1x(environ) + self.environ = environ + self.run() + + r = _cherrypy.serving.response + + outstatus = r.output_status + if not isinstance(outstatus, bytestr): + raise TypeError("response.output_status is not a byte string.") + + outheaders = [] + for k, v in r.header_list: + if not isinstance(k, bytestr): + raise TypeError( + "response.header_list key %r is not a byte string." % + k) + if not isinstance(v, bytestr): + raise TypeError( + "response.header_list value %r is not a byte string." % + v) + outheaders.append((k, v)) + + if py3k: + # According to PEP 3333, when using Python 3, the response + # status and headers must be bytes masquerading as unicode; + # that is, they must be of type "str" but are restricted to + # code points in the "latin-1" set. + outstatus = outstatus.decode('ISO-8859-1') + outheaders = [(k.decode('ISO-8859-1'), v.decode('ISO-8859-1')) + for k, v in outheaders] + + self.iter_response = iter(r.body) + self.write = start_response(outstatus, outheaders) + except: + self.close() + raise + + def __iter__(self): + return self + + if py3k: + def __next__(self): + return next(self.iter_response) + else: + def next(self): + return self.iter_response.next() + + def close(self): + """Close and de-reference the current request and response. (Core)""" + streaming = _cherrypy.serving.response.stream + self.cpapp.release_serving() + + # We avoid the expense of examining the iterator to see if it's + # closable unless we are streaming the response, as that's the + # only situation where we are going to have an iterator which + # may not have been exhausted yet. + if streaming and is_closable_iterator(self.iter_response): + iter_close = self.iter_response.close + try: + iter_close() + except Exception: + _cherrypy.log(traceback=True, severity=40) + + def run(self): + """Create a Request object using environ.""" + env = self.environ.get + + local = httputil.Host('', int(env('SERVER_PORT', 80)), + env('SERVER_NAME', '')) + remote = httputil.Host(env('REMOTE_ADDR', ''), + int(env('REMOTE_PORT', -1) or -1), + env('REMOTE_HOST', '')) + scheme = env('wsgi.url_scheme') + sproto = env('ACTUAL_SERVER_PROTOCOL', "HTTP/1.1") + request, resp = self.cpapp.get_serving(local, remote, scheme, sproto) + + # LOGON_USER is served by IIS, and is the name of the + # user after having been mapped to a local account. + # Both IIS and Apache set REMOTE_USER, when possible. + request.login = env('LOGON_USER') or env('REMOTE_USER') or None + request.multithread = self.environ['wsgi.multithread'] + request.multiprocess = self.environ['wsgi.multiprocess'] + request.wsgi_environ = self.environ + request.prev = env('cherrypy.previous_request', None) + + meth = self.environ['REQUEST_METHOD'] + + path = httputil.urljoin(self.environ.get('SCRIPT_NAME', ''), + self.environ.get('PATH_INFO', '')) + qs = self.environ.get('QUERY_STRING', '') + + if py3k: + # This isn't perfect; if the given PATH_INFO is in the + # wrong encoding, it may fail to match the appropriate config + # section URI. But meh. + old_enc = self.environ.get('wsgi.url_encoding', 'ISO-8859-1') + new_enc = self.cpapp.find_config(self.environ.get('PATH_INFO', ''), + "request.uri_encoding", 'utf-8') + if new_enc.lower() != old_enc.lower(): + # Even though the path and qs are unicode, the WSGI server + # is required by PEP 3333 to coerce them to ISO-8859-1 + # masquerading as unicode. So we have to encode back to + # bytes and then decode again using the "correct" encoding. + try: + u_path = path.encode(old_enc).decode(new_enc) + u_qs = qs.encode(old_enc).decode(new_enc) + except (UnicodeEncodeError, UnicodeDecodeError): + # Just pass them through without transcoding and hope. + pass + else: + # Only set transcoded values if they both succeed. + path = u_path + qs = u_qs + + rproto = self.environ.get('SERVER_PROTOCOL') + headers = self.translate_headers(self.environ) + rfile = self.environ['wsgi.input'] + request.run(meth, path, qs, rproto, headers, rfile) + + headerNames = {'HTTP_CGI_AUTHORIZATION': 'Authorization', + 'CONTENT_LENGTH': 'Content-Length', + 'CONTENT_TYPE': 'Content-Type', + 'REMOTE_HOST': 'Remote-Host', + 'REMOTE_ADDR': 'Remote-Addr', + } + + def translate_headers(self, environ): + """Translate CGI-environ header names to HTTP header names.""" + for cgiName in environ: + # We assume all incoming header keys are uppercase already. + if cgiName in self.headerNames: + yield self.headerNames[cgiName], environ[cgiName] + elif cgiName[:5] == "HTTP_": + # Hackish attempt at recovering original header names. + translatedHeader = cgiName[5:].replace("_", "-") + yield translatedHeader, environ[cgiName] + + +class CPWSGIApp(object): + + """A WSGI application object for a CherryPy Application.""" + + pipeline = [('ExceptionTrapper', ExceptionTrapper), + ('InternalRedirector', InternalRedirector), + ] + """A list of (name, wsgiapp) pairs. Each 'wsgiapp' MUST be a + constructor that takes an initial, positional 'nextapp' argument, + plus optional keyword arguments, and returns a WSGI application + (that takes environ and start_response arguments). The 'name' can + be any you choose, and will correspond to keys in self.config.""" + + head = None + """Rather than nest all apps in the pipeline on each call, it's only + done the first time, and the result is memoized into self.head. Set + this to None again if you change self.pipeline after calling self.""" + + config = {} + """A dict whose keys match names listed in the pipeline. Each + value is a further dict which will be passed to the corresponding + named WSGI callable (from the pipeline) as keyword arguments.""" + + response_class = AppResponse + """The class to instantiate and return as the next app in the WSGI chain. + """ + + def __init__(self, cpapp, pipeline=None): + self.cpapp = cpapp + self.pipeline = self.pipeline[:] + if pipeline: + self.pipeline.extend(pipeline) + self.config = self.config.copy() + + def tail(self, environ, start_response): + """WSGI application callable for the actual CherryPy application. + + You probably shouldn't call this; call self.__call__ instead, + so that any WSGI middleware in self.pipeline can run first. + """ + return self.response_class(environ, start_response, self.cpapp) + + def __call__(self, environ, start_response): + head = self.head + if head is None: + # Create and nest the WSGI apps in our pipeline (in reverse order). + # Then memoize the result in self.head. + head = self.tail + for name, callable in self.pipeline[::-1]: + conf = self.config.get(name, {}) + head = callable(head, **conf) + self.head = head + return head(environ, start_response) + + def namespace_handler(self, k, v): + """Config handler for the 'wsgi' namespace.""" + if k == "pipeline": + # Note this allows multiple 'wsgi.pipeline' config entries + # (but each entry will be processed in a 'random' order). + # It should also allow developers to set default middleware + # in code (passed to self.__init__) that deployers can add to + # (but not remove) via config. + self.pipeline.extend(v) + elif k == "response_class": + self.response_class = v + else: + name, arg = k.split(".", 1) + bucket = self.config.setdefault(name, {}) + bucket[arg] = v diff --git a/lib/cherrypy/_cpwsgi_server.py b/lib/cherrypy/_cpwsgi_server.py new file mode 100644 index 00000000..874e2e9f --- /dev/null +++ b/lib/cherrypy/_cpwsgi_server.py @@ -0,0 +1,70 @@ +"""WSGI server interface (see PEP 333). This adds some CP-specific bits to +the framework-agnostic wsgiserver package. +""" +import sys + +import cherrypy +from cherrypy import wsgiserver + + +class CPWSGIServer(wsgiserver.CherryPyWSGIServer): + + """Wrapper for wsgiserver.CherryPyWSGIServer. + + wsgiserver has been designed to not reference CherryPy in any way, + so that it can be used in other frameworks and applications. Therefore, + we wrap it here, so we can set our own mount points from cherrypy.tree + and apply some attributes from config -> cherrypy.server -> wsgiserver. + """ + + def __init__(self, server_adapter=cherrypy.server): + self.server_adapter = server_adapter + self.max_request_header_size = ( + self.server_adapter.max_request_header_size or 0 + ) + self.max_request_body_size = ( + self.server_adapter.max_request_body_size or 0 + ) + + server_name = (self.server_adapter.socket_host or + self.server_adapter.socket_file or + None) + + self.wsgi_version = self.server_adapter.wsgi_version + s = wsgiserver.CherryPyWSGIServer + s.__init__(self, server_adapter.bind_addr, cherrypy.tree, + self.server_adapter.thread_pool, + server_name, + max=self.server_adapter.thread_pool_max, + request_queue_size=self.server_adapter.socket_queue_size, + timeout=self.server_adapter.socket_timeout, + shutdown_timeout=self.server_adapter.shutdown_timeout, + accepted_queue_size=self.server_adapter.accepted_queue_size, + accepted_queue_timeout=self.server_adapter.accepted_queue_timeout, + ) + self.protocol = self.server_adapter.protocol_version + self.nodelay = self.server_adapter.nodelay + + if sys.version_info >= (3, 0): + ssl_module = self.server_adapter.ssl_module or 'builtin' + else: + ssl_module = self.server_adapter.ssl_module or 'pyopenssl' + if self.server_adapter.ssl_context: + adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) + self.ssl_adapter = adapter_class( + self.server_adapter.ssl_certificate, + self.server_adapter.ssl_private_key, + self.server_adapter.ssl_certificate_chain) + self.ssl_adapter.context = self.server_adapter.ssl_context + elif self.server_adapter.ssl_certificate: + adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module) + self.ssl_adapter = adapter_class( + self.server_adapter.ssl_certificate, + self.server_adapter.ssl_private_key, + self.server_adapter.ssl_certificate_chain) + + self.stats['Enabled'] = getattr( + self.server_adapter, 'statistics', False) + + def error_log(self, msg="", level=20, traceback=False): + cherrypy.engine.log(msg, level, traceback) diff --git a/lib/cherrypy/cherryd b/lib/cherrypy/cherryd new file mode 100644 index 00000000..5afb27ad --- /dev/null +++ b/lib/cherrypy/cherryd @@ -0,0 +1,110 @@ +#! /usr/bin/env python +"""The CherryPy daemon.""" + +import sys + +import cherrypy +from cherrypy.process import plugins, servers +from cherrypy import Application + + +def start(configfiles=None, daemonize=False, environment=None, + fastcgi=False, scgi=False, pidfile=None, imports=None, + cgi=False): + """Subscribe all engine plugins and start the engine.""" + sys.path = [''] + sys.path + for i in imports or []: + exec("import %s" % i) + + for c in configfiles or []: + cherrypy.config.update(c) + # If there's only one app mounted, merge config into it. + if len(cherrypy.tree.apps) == 1: + for app in cherrypy.tree.apps.values(): + if isinstance(app, Application): + app.merge(c) + + engine = cherrypy.engine + + if environment is not None: + cherrypy.config.update({'environment': environment}) + + # Only daemonize if asked to. + if daemonize: + # Don't print anything to stdout/sterr. + cherrypy.config.update({'log.screen': False}) + plugins.Daemonizer(engine).subscribe() + + if pidfile: + plugins.PIDFile(engine, pidfile).subscribe() + + if hasattr(engine, "signal_handler"): + engine.signal_handler.subscribe() + if hasattr(engine, "console_control_handler"): + engine.console_control_handler.subscribe() + + if (fastcgi and (scgi or cgi)) or (scgi and cgi): + cherrypy.log.error("You may only specify one of the cgi, fastcgi, and " + "scgi options.", 'ENGINE') + sys.exit(1) + elif fastcgi or scgi or cgi: + # Turn off autoreload when using *cgi. + cherrypy.config.update({'engine.autoreload_on': False}) + # Turn off the default HTTP server (which is subscribed by default). + cherrypy.server.unsubscribe() + + addr = cherrypy.server.bind_addr + if fastcgi: + f = servers.FlupFCGIServer(application=cherrypy.tree, + bindAddress=addr) + elif scgi: + f = servers.FlupSCGIServer(application=cherrypy.tree, + bindAddress=addr) + else: + f = servers.FlupCGIServer(application=cherrypy.tree, + bindAddress=addr) + s = servers.ServerAdapter(engine, httpserver=f, bind_addr=addr) + s.subscribe() + + # Always start the engine; this will start all other services + try: + engine.start() + except: + # Assume the error has been logged already via bus.log. + sys.exit(1) + else: + engine.block() + + +if __name__ == '__main__': + from optparse import OptionParser + + p = OptionParser() + p.add_option('-c', '--config', action="append", dest='config', + help="specify config file(s)") + p.add_option('-d', action="store_true", dest='daemonize', + help="run the server as a daemon") + p.add_option('-e', '--environment', dest='environment', default=None, + help="apply the given config environment") + p.add_option('-f', action="store_true", dest='fastcgi', + help="start a fastcgi server instead of the default HTTP " + "server") + p.add_option('-s', action="store_true", dest='scgi', + help="start a scgi server instead of the default HTTP server") + p.add_option('-x', action="store_true", dest='cgi', + help="start a cgi server instead of the default HTTP server") + p.add_option('-i', '--import', action="append", dest='imports', + help="specify modules to import") + p.add_option('-p', '--pidfile', dest='pidfile', default=None, + help="store the process id in the given file") + p.add_option('-P', '--Path', action="append", dest='Path', + help="add the given paths to sys.path") + options, args = p.parse_args() + + if options.Path: + for p in options.Path: + sys.path.insert(0, p) + + start(options.config, options.daemonize, + options.environment, options.fastcgi, options.scgi, + options.pidfile, options.imports, options.cgi) diff --git a/lib/cherrypy/favicon.ico b/lib/cherrypy/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..f0d7e61badad3f332cf1e663efb97c0b5be80f5e GIT binary patch literal 1406 zcmb`Hd05U_6vscWSJJB#$ugQ@jA;zAXDv%YAxlVP&ytAjTU10ymNpe4LY9=2_SI5K zC3>Y)vZNK!rhR_zJdtfiw$m?BBmX0|pFW;J|@s zYHBiQ&>#j69?Xy-Ll`=AD8q&gWBBmlj2JNjEiElZjvUFTQKJ|=dNgCkjA889v5Xrx z4sC61baZqWKYlzDCQM-B#EDFrGznc@T_#VSjGmqzQ>IK|>eQ)Bn>G!7eSHiJ446KB zIx}X>VCKx37#bQfYt}4g&z{YkIdhmhcP>UoM$DTxkNNZGvtYpjjE#+1xNspRCMGOe zw1~xv7h`H_%915ZSh{p6%a$!;`SRtgSh0eYD_62=)hf))%vim8HEY(aVeQ(rtXsDZ zb8~anuV0Uag#{ZnY+&QYjaXV*vT4&MHgDdHm6a7+wrpYR)~#&YwvFxEx3go%4tDO` z$*x_y*u8rke3QwOtB{embw6rwR)6;qO>=_vu z89aafoEI-%keQi@R4V1=%a>$jW%26OE3&h*$;rv#_3PK<=H`-@mq&hnK5yQT(K__B|98+X@Zp^73MZjvW!HsVT|~sc)O^ZGM)}O{A)-)@n=bmFdz? zRaLc>D=T%Qr>f`&r)>x2Kb1tH)^h|wLs?1GQ@HOR^ik=lq13yT$@Z=qojU#WZ-LIg Kbp8+jKgeIP@z;_7 literal 0 HcmV?d00001 diff --git a/lib/cherrypy/lib/__init__.py b/lib/cherrypy/lib/__init__.py new file mode 100644 index 00000000..a75a53da --- /dev/null +++ b/lib/cherrypy/lib/__init__.py @@ -0,0 +1,85 @@ +"""CherryPy Library""" + +# Deprecated in CherryPy 3.2 -- remove in CherryPy 3.3 +from cherrypy.lib.reprconf import unrepr, modules, attributes + +def is_iterator(obj): + '''Returns a boolean indicating if the object provided implements + the iterator protocol (i.e. like a generator). This will return + false for objects which iterable, but not iterators themselves.''' + from types import GeneratorType + if isinstance(obj, GeneratorType): + return True + elif not hasattr(obj, '__iter__'): + return False + else: + # Types which implement the protocol must return themselves when + # invoking 'iter' upon them. + return iter(obj) is obj + +def is_closable_iterator(obj): + + # Not an iterator. + if not is_iterator(obj): + return False + + # A generator - the easiest thing to deal with. + import inspect + if inspect.isgenerator(obj): + return True + + # A custom iterator. Look for a close method... + if not (hasattr(obj, 'close') and callable(obj.close)): + return False + + # ... which doesn't require any arguments. + try: + inspect.getcallargs(obj.close) + except TypeError: + return False + else: + return True + +class file_generator(object): + + """Yield the given input (a file object) in chunks (default 64k). (Core)""" + + def __init__(self, input, chunkSize=65536): + self.input = input + self.chunkSize = chunkSize + + def __iter__(self): + return self + + def __next__(self): + chunk = self.input.read(self.chunkSize) + if chunk: + return chunk + else: + if hasattr(self.input, 'close'): + self.input.close() + raise StopIteration() + next = __next__ + + +def file_generator_limited(fileobj, count, chunk_size=65536): + """Yield the given file object in chunks, stopping after `count` + bytes has been emitted. Default chunk size is 64kB. (Core) + """ + remaining = count + while remaining > 0: + chunk = fileobj.read(min(chunk_size, remaining)) + chunklen = len(chunk) + if chunklen == 0: + return + remaining -= chunklen + yield chunk + + +def set_vary_header(response, header_name): + "Add a Vary header to a response" + varies = response.headers.get("Vary", "") + varies = [x.strip() for x in varies.split(",") if x.strip()] + if header_name not in varies: + varies.append(header_name) + response.headers['Vary'] = ", ".join(varies) diff --git a/lib/cherrypy/lib/auth.py b/lib/cherrypy/lib/auth.py new file mode 100644 index 00000000..71591aaa --- /dev/null +++ b/lib/cherrypy/lib/auth.py @@ -0,0 +1,97 @@ +import cherrypy +from cherrypy.lib import httpauth + + +def check_auth(users, encrypt=None, realm=None): + """If an authorization header contains credentials, return True or False. + """ + request = cherrypy.serving.request + if 'authorization' in request.headers: + # make sure the provided credentials are correctly set + ah = httpauth.parseAuthorization(request.headers['authorization']) + if ah is None: + raise cherrypy.HTTPError(400, 'Bad Request') + + if not encrypt: + encrypt = httpauth.DIGEST_AUTH_ENCODERS[httpauth.MD5] + + if hasattr(users, '__call__'): + try: + # backward compatibility + users = users() # expect it to return a dictionary + + if not isinstance(users, dict): + raise ValueError( + "Authentication users must be a dictionary") + + # fetch the user password + password = users.get(ah["username"], None) + except TypeError: + # returns a password (encrypted or clear text) + password = users(ah["username"]) + else: + if not isinstance(users, dict): + raise ValueError("Authentication users must be a dictionary") + + # fetch the user password + password = users.get(ah["username"], None) + + # validate the authorization by re-computing it here + # and compare it with what the user-agent provided + if httpauth.checkResponse(ah, password, method=request.method, + encrypt=encrypt, realm=realm): + request.login = ah["username"] + return True + + request.login = False + return False + + +def basic_auth(realm, users, encrypt=None, debug=False): + """If auth fails, raise 401 with a basic authentication header. + + realm + A string containing the authentication realm. + + users + A dict of the form: {username: password} or a callable returning + a dict. + + encrypt + callable used to encrypt the password returned from the user-agent. + if None it defaults to a md5 encryption. + + """ + if check_auth(users, encrypt): + if debug: + cherrypy.log('Auth successful', 'TOOLS.BASIC_AUTH') + return + + # inform the user-agent this path is protected + cherrypy.serving.response.headers[ + 'www-authenticate'] = httpauth.basicAuth(realm) + + raise cherrypy.HTTPError( + 401, "You are not authorized to access that resource") + + +def digest_auth(realm, users, debug=False): + """If auth fails, raise 401 with a digest authentication header. + + realm + A string containing the authentication realm. + users + A dict of the form: {username: password} or a callable returning + a dict. + """ + if check_auth(users, realm=realm): + if debug: + cherrypy.log('Auth successful', 'TOOLS.DIGEST_AUTH') + return + + # inform the user-agent this path is protected + cherrypy.serving.response.headers[ + 'www-authenticate'] = httpauth.digestAuth(realm) + + raise cherrypy.HTTPError( + 401, "You are not authorized to access that resource") diff --git a/lib/cherrypy/lib/auth_basic.py b/lib/cherrypy/lib/auth_basic.py new file mode 100644 index 00000000..5ba16f7f --- /dev/null +++ b/lib/cherrypy/lib/auth_basic.py @@ -0,0 +1,90 @@ +# This file is part of CherryPy +# -*- coding: utf-8 -*- +# vim:ts=4:sw=4:expandtab:fileencoding=utf-8 + +__doc__ = """This module provides a CherryPy 3.x tool which implements +the server-side of HTTP Basic Access Authentication, as described in +:rfc:`2617`. + +Example usage, using the built-in checkpassword_dict function which uses a dict +as the credentials store:: + + userpassdict = {'bird' : 'bebop', 'ornette' : 'wayout'} + checkpassword = cherrypy.lib.auth_basic.checkpassword_dict(userpassdict) + basic_auth = {'tools.auth_basic.on': True, + 'tools.auth_basic.realm': 'earth', + 'tools.auth_basic.checkpassword': checkpassword, + } + app_config = { '/' : basic_auth } + +""" + +__author__ = 'visteya' +__date__ = 'April 2009' + +import binascii +from cherrypy._cpcompat import base64_decode +import cherrypy + + +def checkpassword_dict(user_password_dict): + """Returns a checkpassword function which checks credentials + against a dictionary of the form: {username : password}. + + If you want a simple dictionary-based authentication scheme, use + checkpassword_dict(my_credentials_dict) as the value for the + checkpassword argument to basic_auth(). + """ + def checkpassword(realm, user, password): + p = user_password_dict.get(user) + return p and p == password or False + + return checkpassword + + +def basic_auth(realm, checkpassword, debug=False): + """A CherryPy tool which hooks at before_handler to perform + HTTP Basic Access Authentication, as specified in :rfc:`2617`. + + If the request has an 'authorization' header with a 'Basic' scheme, this + tool attempts to authenticate the credentials supplied in that header. If + the request has no 'authorization' header, or if it does but the scheme is + not 'Basic', or if authentication fails, the tool sends a 401 response with + a 'WWW-Authenticate' Basic header. + + realm + A string containing the authentication realm. + + checkpassword + A callable which checks the authentication credentials. + Its signature is checkpassword(realm, username, password). where + username and password are the values obtained from the request's + 'authorization' header. If authentication succeeds, checkpassword + returns True, else it returns False. + + """ + + if '"' in realm: + raise ValueError('Realm cannot contain the " (quote) character.') + request = cherrypy.serving.request + + auth_header = request.headers.get('authorization') + if auth_header is not None: + try: + scheme, params = auth_header.split(' ', 1) + if scheme.lower() == 'basic': + username, password = base64_decode(params).split(':', 1) + if checkpassword(realm, username, password): + if debug: + cherrypy.log('Auth succeeded', 'TOOLS.AUTH_BASIC') + request.login = username + return # successful authentication + # split() error, base64.decodestring() error + except (ValueError, binascii.Error): + raise cherrypy.HTTPError(400, 'Bad Request') + + # Respond with 401 status and a WWW-Authenticate header + cherrypy.serving.response.headers[ + 'www-authenticate'] = 'Basic realm="%s"' % realm + raise cherrypy.HTTPError( + 401, "You are not authorized to access that resource") diff --git a/lib/cherrypy/lib/auth_digest.py b/lib/cherrypy/lib/auth_digest.py new file mode 100644 index 00000000..e06535dc --- /dev/null +++ b/lib/cherrypy/lib/auth_digest.py @@ -0,0 +1,390 @@ +# This file is part of CherryPy +# -*- coding: utf-8 -*- +# vim:ts=4:sw=4:expandtab:fileencoding=utf-8 + +__doc__ = """An implementation of the server-side of HTTP Digest Access +Authentication, which is described in :rfc:`2617`. + +Example usage, using the built-in get_ha1_dict_plain function which uses a dict +of plaintext passwords as the credentials store:: + + userpassdict = {'alice' : '4x5istwelve'} + get_ha1 = cherrypy.lib.auth_digest.get_ha1_dict_plain(userpassdict) + digest_auth = {'tools.auth_digest.on': True, + 'tools.auth_digest.realm': 'wonderland', + 'tools.auth_digest.get_ha1': get_ha1, + 'tools.auth_digest.key': 'a565c27146791cfb', + } + app_config = { '/' : digest_auth } +""" + +__author__ = 'visteya' +__date__ = 'April 2009' + + +import time +from cherrypy._cpcompat import parse_http_list, parse_keqv_list + +import cherrypy +from cherrypy._cpcompat import md5, ntob +md5_hex = lambda s: md5(ntob(s)).hexdigest() + +qop_auth = 'auth' +qop_auth_int = 'auth-int' +valid_qops = (qop_auth, qop_auth_int) + +valid_algorithms = ('MD5', 'MD5-sess') + + +def TRACE(msg): + cherrypy.log(msg, context='TOOLS.AUTH_DIGEST') + +# Three helper functions for users of the tool, providing three variants +# of get_ha1() functions for three different kinds of credential stores. + + +def get_ha1_dict_plain(user_password_dict): + """Returns a get_ha1 function which obtains a plaintext password from a + dictionary of the form: {username : password}. + + If you want a simple dictionary-based authentication scheme, with plaintext + passwords, use get_ha1_dict_plain(my_userpass_dict) as the value for the + get_ha1 argument to digest_auth(). + """ + def get_ha1(realm, username): + password = user_password_dict.get(username) + if password: + return md5_hex('%s:%s:%s' % (username, realm, password)) + return None + + return get_ha1 + + +def get_ha1_dict(user_ha1_dict): + """Returns a get_ha1 function which obtains a HA1 password hash from a + dictionary of the form: {username : HA1}. + + If you want a dictionary-based authentication scheme, but with + pre-computed HA1 hashes instead of plain-text passwords, use + get_ha1_dict(my_userha1_dict) as the value for the get_ha1 + argument to digest_auth(). + """ + def get_ha1(realm, username): + return user_ha1_dict.get(username) + + return get_ha1 + + +def get_ha1_file_htdigest(filename): + """Returns a get_ha1 function which obtains a HA1 password hash from a + flat file with lines of the same format as that produced by the Apache + htdigest utility. For example, for realm 'wonderland', username 'alice', + and password '4x5istwelve', the htdigest line would be:: + + alice:wonderland:3238cdfe91a8b2ed8e39646921a02d4c + + If you want to use an Apache htdigest file as the credentials store, + then use get_ha1_file_htdigest(my_htdigest_file) as the value for the + get_ha1 argument to digest_auth(). It is recommended that the filename + argument be an absolute path, to avoid problems. + """ + def get_ha1(realm, username): + result = None + f = open(filename, 'r') + for line in f: + u, r, ha1 = line.rstrip().split(':') + if u == username and r == realm: + result = ha1 + break + f.close() + return result + + return get_ha1 + + +def synthesize_nonce(s, key, timestamp=None): + """Synthesize a nonce value which resists spoofing and can be checked + for staleness. Returns a string suitable as the value for 'nonce' in + the www-authenticate header. + + s + A string related to the resource, such as the hostname of the server. + + key + A secret string known only to the server. + + timestamp + An integer seconds-since-the-epoch timestamp + + """ + if timestamp is None: + timestamp = int(time.time()) + h = md5_hex('%s:%s:%s' % (timestamp, s, key)) + nonce = '%s:%s' % (timestamp, h) + return nonce + + +def H(s): + """The hash function H""" + return md5_hex(s) + + +class HttpDigestAuthorization (object): + + """Class to parse a Digest Authorization header and perform re-calculation + of the digest. + """ + + def errmsg(self, s): + return 'Digest Authorization header: %s' % s + + def __init__(self, auth_header, http_method, debug=False): + self.http_method = http_method + self.debug = debug + scheme, params = auth_header.split(" ", 1) + self.scheme = scheme.lower() + if self.scheme != 'digest': + raise ValueError('Authorization scheme is not "Digest"') + + self.auth_header = auth_header + + # make a dict of the params + items = parse_http_list(params) + paramsd = parse_keqv_list(items) + + self.realm = paramsd.get('realm') + self.username = paramsd.get('username') + self.nonce = paramsd.get('nonce') + self.uri = paramsd.get('uri') + self.method = paramsd.get('method') + self.response = paramsd.get('response') # the response digest + self.algorithm = paramsd.get('algorithm', 'MD5').upper() + self.cnonce = paramsd.get('cnonce') + self.opaque = paramsd.get('opaque') + self.qop = paramsd.get('qop') # qop + self.nc = paramsd.get('nc') # nonce count + + # perform some correctness checks + if self.algorithm not in valid_algorithms: + raise ValueError( + self.errmsg("Unsupported value for algorithm: '%s'" % + self.algorithm)) + + has_reqd = ( + self.username and + self.realm and + self.nonce and + self.uri and + self.response + ) + if not has_reqd: + raise ValueError( + self.errmsg("Not all required parameters are present.")) + + if self.qop: + if self.qop not in valid_qops: + raise ValueError( + self.errmsg("Unsupported value for qop: '%s'" % self.qop)) + if not (self.cnonce and self.nc): + raise ValueError( + self.errmsg("If qop is sent then " + "cnonce and nc MUST be present")) + else: + if self.cnonce or self.nc: + raise ValueError( + self.errmsg("If qop is not sent, " + "neither cnonce nor nc can be present")) + + def __str__(self): + return 'authorization : %s' % self.auth_header + + def validate_nonce(self, s, key): + """Validate the nonce. + Returns True if nonce was generated by synthesize_nonce() and the + timestamp is not spoofed, else returns False. + + s + A string related to the resource, such as the hostname of + the server. + + key + A secret string known only to the server. + + Both s and key must be the same values which were used to synthesize + the nonce we are trying to validate. + """ + try: + timestamp, hashpart = self.nonce.split(':', 1) + s_timestamp, s_hashpart = synthesize_nonce( + s, key, timestamp).split(':', 1) + is_valid = s_hashpart == hashpart + if self.debug: + TRACE('validate_nonce: %s' % is_valid) + return is_valid + except ValueError: # split() error + pass + return False + + def is_nonce_stale(self, max_age_seconds=600): + """Returns True if a validated nonce is stale. The nonce contains a + timestamp in plaintext and also a secure hash of the timestamp. + You should first validate the nonce to ensure the plaintext + timestamp is not spoofed. + """ + try: + timestamp, hashpart = self.nonce.split(':', 1) + if int(timestamp) + max_age_seconds > int(time.time()): + return False + except ValueError: # int() error + pass + if self.debug: + TRACE("nonce is stale") + return True + + def HA2(self, entity_body=''): + """Returns the H(A2) string. See :rfc:`2617` section 3.2.2.3.""" + # RFC 2617 3.2.2.3 + # If the "qop" directive's value is "auth" or is unspecified, + # then A2 is: + # A2 = method ":" digest-uri-value + # + # If the "qop" value is "auth-int", then A2 is: + # A2 = method ":" digest-uri-value ":" H(entity-body) + if self.qop is None or self.qop == "auth": + a2 = '%s:%s' % (self.http_method, self.uri) + elif self.qop == "auth-int": + a2 = "%s:%s:%s" % (self.http_method, self.uri, H(entity_body)) + else: + # in theory, this should never happen, since I validate qop in + # __init__() + raise ValueError(self.errmsg("Unrecognized value for qop!")) + return H(a2) + + def request_digest(self, ha1, entity_body=''): + """Calculates the Request-Digest. See :rfc:`2617` section 3.2.2.1. + + ha1 + The HA1 string obtained from the credentials store. + + entity_body + If 'qop' is set to 'auth-int', then A2 includes a hash + of the "entity body". The entity body is the part of the + message which follows the HTTP headers. See :rfc:`2617` section + 4.3. This refers to the entity the user agent sent in the + request which has the Authorization header. Typically GET + requests don't have an entity, and POST requests do. + + """ + ha2 = self.HA2(entity_body) + # Request-Digest -- RFC 2617 3.2.2.1 + if self.qop: + req = "%s:%s:%s:%s:%s" % ( + self.nonce, self.nc, self.cnonce, self.qop, ha2) + else: + req = "%s:%s" % (self.nonce, ha2) + + # RFC 2617 3.2.2.2 + # + # If the "algorithm" directive's value is "MD5" or is unspecified, + # then A1 is: + # A1 = unq(username-value) ":" unq(realm-value) ":" passwd + # + # If the "algorithm" directive's value is "MD5-sess", then A1 is + # calculated only once - on the first request by the client following + # receipt of a WWW-Authenticate challenge from the server. + # A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd ) + # ":" unq(nonce-value) ":" unq(cnonce-value) + if self.algorithm == 'MD5-sess': + ha1 = H('%s:%s:%s' % (ha1, self.nonce, self.cnonce)) + + digest = H('%s:%s' % (ha1, req)) + return digest + + +def www_authenticate(realm, key, algorithm='MD5', nonce=None, qop=qop_auth, + stale=False): + """Constructs a WWW-Authenticate header for Digest authentication.""" + if qop not in valid_qops: + raise ValueError("Unsupported value for qop: '%s'" % qop) + if algorithm not in valid_algorithms: + raise ValueError("Unsupported value for algorithm: '%s'" % algorithm) + + if nonce is None: + nonce = synthesize_nonce(realm, key) + s = 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % ( + realm, nonce, algorithm, qop) + if stale: + s += ', stale="true"' + return s + + +def digest_auth(realm, get_ha1, key, debug=False): + """A CherryPy tool which hooks at before_handler to perform + HTTP Digest Access Authentication, as specified in :rfc:`2617`. + + If the request has an 'authorization' header with a 'Digest' scheme, + this tool authenticates the credentials supplied in that header. + If the request has no 'authorization' header, or if it does but the + scheme is not "Digest", or if authentication fails, the tool sends + a 401 response with a 'WWW-Authenticate' Digest header. + + realm + A string containing the authentication realm. + + get_ha1 + A callable which looks up a username in a credentials store + and returns the HA1 string, which is defined in the RFC to be + MD5(username : realm : password). The function's signature is: + ``get_ha1(realm, username)`` + where username is obtained from the request's 'authorization' header. + If username is not found in the credentials store, get_ha1() returns + None. + + key + A secret string known only to the server, used in the synthesis + of nonces. + + """ + request = cherrypy.serving.request + + auth_header = request.headers.get('authorization') + nonce_is_stale = False + if auth_header is not None: + try: + auth = HttpDigestAuthorization( + auth_header, request.method, debug=debug) + except ValueError: + raise cherrypy.HTTPError( + 400, "The Authorization header could not be parsed.") + + if debug: + TRACE(str(auth)) + + if auth.validate_nonce(realm, key): + ha1 = get_ha1(realm, auth.username) + if ha1 is not None: + # note that for request.body to be available we need to + # hook in at before_handler, not on_start_resource like + # 3.1.x digest_auth does. + digest = auth.request_digest(ha1, entity_body=request.body) + if digest == auth.response: # authenticated + if debug: + TRACE("digest matches auth.response") + # Now check if nonce is stale. + # The choice of ten minutes' lifetime for nonce is somewhat + # arbitrary + nonce_is_stale = auth.is_nonce_stale(max_age_seconds=600) + if not nonce_is_stale: + request.login = auth.username + if debug: + TRACE("authentication of %s successful" % + auth.username) + return + + # Respond with 401 status and a WWW-Authenticate header + header = www_authenticate(realm, key, stale=nonce_is_stale) + if debug: + TRACE(header) + cherrypy.serving.response.headers['WWW-Authenticate'] = header + raise cherrypy.HTTPError( + 401, "You are not authorized to access that resource") diff --git a/lib/cherrypy/lib/caching.py b/lib/cherrypy/lib/caching.py new file mode 100644 index 00000000..fab6b569 --- /dev/null +++ b/lib/cherrypy/lib/caching.py @@ -0,0 +1,470 @@ +""" +CherryPy implements a simple caching system as a pluggable Tool. This tool +tries to be an (in-process) HTTP/1.1-compliant cache. It's not quite there +yet, but it's probably good enough for most sites. + +In general, GET responses are cached (along with selecting headers) and, if +another request arrives for the same resource, the caching Tool will return 304 +Not Modified if possible, or serve the cached response otherwise. It also sets +request.cached to True if serving a cached representation, and sets +request.cacheable to False (so it doesn't get cached again). + +If POST, PUT, or DELETE requests are made for a cached resource, they +invalidate (delete) any cached response. + +Usage +===== + +Configuration file example:: + + [/] + tools.caching.on = True + tools.caching.delay = 3600 + +You may use a class other than the default +:class:`MemoryCache` by supplying the config +entry ``cache_class``; supply the full dotted name of the replacement class +as the config value. It must implement the basic methods ``get``, ``put``, +``delete``, and ``clear``. + +You may set any attribute, including overriding methods, on the cache +instance by providing them in config. The above sets the +:attr:`delay` attribute, for example. +""" + +import datetime +import sys +import threading +import time + +import cherrypy +from cherrypy.lib import cptools, httputil +from cherrypy._cpcompat import copyitems, ntob, set_daemon, sorted, Event + + +class Cache(object): + + """Base class for Cache implementations.""" + + def get(self): + """Return the current variant if in the cache, else None.""" + raise NotImplemented + + def put(self, obj, size): + """Store the current variant in the cache.""" + raise NotImplemented + + def delete(self): + """Remove ALL cached variants of the current resource.""" + raise NotImplemented + + def clear(self): + """Reset the cache to its initial, empty state.""" + raise NotImplemented + + +# ------------------------------ Memory Cache ------------------------------- # +class AntiStampedeCache(dict): + + """A storage system for cached items which reduces stampede collisions.""" + + def wait(self, key, timeout=5, debug=False): + """Return the cached value for the given key, or None. + + If timeout is not None, and the value is already + being calculated by another thread, wait until the given timeout has + elapsed. If the value is available before the timeout expires, it is + returned. If not, None is returned, and a sentinel placed in the cache + to signal other threads to wait. + + If timeout is None, no waiting is performed nor sentinels used. + """ + value = self.get(key) + if isinstance(value, Event): + if timeout is None: + # Ignore the other thread and recalc it ourselves. + if debug: + cherrypy.log('No timeout', 'TOOLS.CACHING') + return None + + # Wait until it's done or times out. + if debug: + cherrypy.log('Waiting up to %s seconds' % + timeout, 'TOOLS.CACHING') + value.wait(timeout) + if value.result is not None: + # The other thread finished its calculation. Use it. + if debug: + cherrypy.log('Result!', 'TOOLS.CACHING') + return value.result + # Timed out. Stick an Event in the slot so other threads wait + # on this one to finish calculating the value. + if debug: + cherrypy.log('Timed out', 'TOOLS.CACHING') + e = threading.Event() + e.result = None + dict.__setitem__(self, key, e) + + return None + elif value is None: + # Stick an Event in the slot so other threads wait + # on this one to finish calculating the value. + if debug: + cherrypy.log('Timed out', 'TOOLS.CACHING') + e = threading.Event() + e.result = None + dict.__setitem__(self, key, e) + return value + + def __setitem__(self, key, value): + """Set the cached value for the given key.""" + existing = self.get(key) + dict.__setitem__(self, key, value) + if isinstance(existing, Event): + # Set Event.result so other threads waiting on it have + # immediate access without needing to poll the cache again. + existing.result = value + existing.set() + + +class MemoryCache(Cache): + + """An in-memory cache for varying response content. + + Each key in self.store is a URI, and each value is an AntiStampedeCache. + The response for any given URI may vary based on the values of + "selecting request headers"; that is, those named in the Vary + response header. We assume the list of header names to be constant + for each URI throughout the lifetime of the application, and store + that list in ``self.store[uri].selecting_headers``. + + The items contained in ``self.store[uri]`` have keys which are tuples of + request header values (in the same order as the names in its + selecting_headers), and values which are the actual responses. + """ + + maxobjects = 1000 + """The maximum number of cached objects; defaults to 1000.""" + + maxobj_size = 100000 + """The maximum size of each cached object in bytes; defaults to 100 KB.""" + + maxsize = 10000000 + """The maximum size of the entire cache in bytes; defaults to 10 MB.""" + + delay = 600 + """Seconds until the cached content expires; defaults to 600 (10 minutes). + """ + + antistampede_timeout = 5 + """Seconds to wait for other threads to release a cache lock.""" + + expire_freq = 0.1 + """Seconds to sleep between cache expiration sweeps.""" + + debug = False + + def __init__(self): + self.clear() + + # Run self.expire_cache in a separate daemon thread. + t = threading.Thread(target=self.expire_cache, name='expire_cache') + self.expiration_thread = t + set_daemon(t, True) + t.start() + + def clear(self): + """Reset the cache to its initial, empty state.""" + self.store = {} + self.expirations = {} + self.tot_puts = 0 + self.tot_gets = 0 + self.tot_hist = 0 + self.tot_expires = 0 + self.tot_non_modified = 0 + self.cursize = 0 + + def expire_cache(self): + """Continuously examine cached objects, expiring stale ones. + + This function is designed to be run in its own daemon thread, + referenced at ``self.expiration_thread``. + """ + # It's possible that "time" will be set to None + # arbitrarily, so we check "while time" to avoid exceptions. + # See tickets #99 and #180 for more information. + while time: + now = time.time() + # Must make a copy of expirations so it doesn't change size + # during iteration + for expiration_time, objects in copyitems(self.expirations): + if expiration_time <= now: + for obj_size, uri, sel_header_values in objects: + try: + del self.store[uri][tuple(sel_header_values)] + self.tot_expires += 1 + self.cursize -= obj_size + except KeyError: + # the key may have been deleted elsewhere + pass + del self.expirations[expiration_time] + time.sleep(self.expire_freq) + + def get(self): + """Return the current variant if in the cache, else None.""" + request = cherrypy.serving.request + self.tot_gets += 1 + + uri = cherrypy.url(qs=request.query_string) + uricache = self.store.get(uri) + if uricache is None: + return None + + header_values = [request.headers.get(h, '') + for h in uricache.selecting_headers] + variant = uricache.wait(key=tuple(sorted(header_values)), + timeout=self.antistampede_timeout, + debug=self.debug) + if variant is not None: + self.tot_hist += 1 + return variant + + def put(self, variant, size): + """Store the current variant in the cache.""" + request = cherrypy.serving.request + response = cherrypy.serving.response + + uri = cherrypy.url(qs=request.query_string) + uricache = self.store.get(uri) + if uricache is None: + uricache = AntiStampedeCache() + uricache.selecting_headers = [ + e.value for e in response.headers.elements('Vary')] + self.store[uri] = uricache + + if len(self.store) < self.maxobjects: + total_size = self.cursize + size + + # checks if there's space for the object + if (size < self.maxobj_size and total_size < self.maxsize): + # add to the expirations list + expiration_time = response.time + self.delay + bucket = self.expirations.setdefault(expiration_time, []) + bucket.append((size, uri, uricache.selecting_headers)) + + # add to the cache + header_values = [request.headers.get(h, '') + for h in uricache.selecting_headers] + uricache[tuple(sorted(header_values))] = variant + self.tot_puts += 1 + self.cursize = total_size + + def delete(self): + """Remove ALL cached variants of the current resource.""" + uri = cherrypy.url(qs=cherrypy.serving.request.query_string) + self.store.pop(uri, None) + + +def get(invalid_methods=("POST", "PUT", "DELETE"), debug=False, **kwargs): + """Try to obtain cached output. If fresh enough, raise HTTPError(304). + + If POST, PUT, or DELETE: + * invalidates (deletes) any cached response for this resource + * sets request.cached = False + * sets request.cacheable = False + + else if a cached copy exists: + * sets request.cached = True + * sets request.cacheable = False + * sets response.headers to the cached values + * checks the cached Last-Modified response header against the + current If-(Un)Modified-Since request headers; raises 304 + if necessary. + * sets response.status and response.body to the cached values + * returns True + + otherwise: + * sets request.cached = False + * sets request.cacheable = True + * returns False + """ + request = cherrypy.serving.request + response = cherrypy.serving.response + + if not hasattr(cherrypy, "_cache"): + # Make a process-wide Cache object. + cherrypy._cache = kwargs.pop("cache_class", MemoryCache)() + + # Take all remaining kwargs and set them on the Cache object. + for k, v in kwargs.items(): + setattr(cherrypy._cache, k, v) + cherrypy._cache.debug = debug + + # POST, PUT, DELETE should invalidate (delete) the cached copy. + # See http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10. + if request.method in invalid_methods: + if debug: + cherrypy.log('request.method %r in invalid_methods %r' % + (request.method, invalid_methods), 'TOOLS.CACHING') + cherrypy._cache.delete() + request.cached = False + request.cacheable = False + return False + + if 'no-cache' in [e.value for e in request.headers.elements('Pragma')]: + request.cached = False + request.cacheable = True + return False + + cache_data = cherrypy._cache.get() + request.cached = bool(cache_data) + request.cacheable = not request.cached + if request.cached: + # Serve the cached copy. + max_age = cherrypy._cache.delay + for v in [e.value for e in request.headers.elements('Cache-Control')]: + atoms = v.split('=', 1) + directive = atoms.pop(0) + if directive == 'max-age': + if len(atoms) != 1 or not atoms[0].isdigit(): + raise cherrypy.HTTPError( + 400, "Invalid Cache-Control header") + max_age = int(atoms[0]) + break + elif directive == 'no-cache': + if debug: + cherrypy.log( + 'Ignoring cache due to Cache-Control: no-cache', + 'TOOLS.CACHING') + request.cached = False + request.cacheable = True + return False + + if debug: + cherrypy.log('Reading response from cache', 'TOOLS.CACHING') + s, h, b, create_time = cache_data + age = int(response.time - create_time) + if (age > max_age): + if debug: + cherrypy.log('Ignoring cache due to age > %d' % max_age, + 'TOOLS.CACHING') + request.cached = False + request.cacheable = True + return False + + # Copy the response headers. See + # https://bitbucket.org/cherrypy/cherrypy/issue/721. + response.headers = rh = httputil.HeaderMap() + for k in h: + dict.__setitem__(rh, k, dict.__getitem__(h, k)) + + # Add the required Age header + response.headers["Age"] = str(age) + + try: + # Note that validate_since depends on a Last-Modified header; + # this was put into the cached copy, and should have been + # resurrected just above (response.headers = cache_data[1]). + cptools.validate_since() + except cherrypy.HTTPRedirect: + x = sys.exc_info()[1] + if x.status == 304: + cherrypy._cache.tot_non_modified += 1 + raise + + # serve it & get out from the request + response.status = s + response.body = b + else: + if debug: + cherrypy.log('request is not cached', 'TOOLS.CACHING') + return request.cached + + +def tee_output(): + """Tee response output to cache storage. Internal.""" + # Used by CachingTool by attaching to request.hooks + + request = cherrypy.serving.request + if 'no-store' in request.headers.values('Cache-Control'): + return + + def tee(body): + """Tee response.body into a list.""" + if ('no-cache' in response.headers.values('Pragma') or + 'no-store' in response.headers.values('Cache-Control')): + for chunk in body: + yield chunk + return + + output = [] + for chunk in body: + output.append(chunk) + yield chunk + + # save the cache data + body = ntob('').join(output) + cherrypy._cache.put((response.status, response.headers or {}, + body, response.time), len(body)) + + response = cherrypy.serving.response + response.body = tee(response.body) + + +def expires(secs=0, force=False, debug=False): + """Tool for influencing cache mechanisms using the 'Expires' header. + + secs + Must be either an int or a datetime.timedelta, and indicates the + number of seconds between response.time and when the response should + expire. The 'Expires' header will be set to response.time + secs. + If secs is zero, the 'Expires' header is set one year in the past, and + the following "cache prevention" headers are also set: + + * Pragma: no-cache + * Cache-Control': no-cache, must-revalidate + + force + If False, the following headers are checked: + + * Etag + * Last-Modified + * Age + * Expires + + If any are already present, none of the above response headers are set. + + """ + + response = cherrypy.serving.response + headers = response.headers + + cacheable = False + if not force: + # some header names that indicate that the response can be cached + for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'): + if indicator in headers: + cacheable = True + break + + if not cacheable and not force: + if debug: + cherrypy.log('request is not cacheable', 'TOOLS.EXPIRES') + else: + if debug: + cherrypy.log('request is cacheable', 'TOOLS.EXPIRES') + if isinstance(secs, datetime.timedelta): + secs = (86400 * secs.days) + secs.seconds + + if secs == 0: + if force or ("Pragma" not in headers): + headers["Pragma"] = "no-cache" + if cherrypy.serving.request.protocol >= (1, 1): + if force or "Cache-Control" not in headers: + headers["Cache-Control"] = "no-cache, must-revalidate" + # Set an explicit Expires date in the past. + expiry = httputil.HTTPDate(1169942400.0) + else: + expiry = httputil.HTTPDate(response.time + secs) + if force or "Expires" not in headers: + headers["Expires"] = expiry diff --git a/lib/cherrypy/lib/covercp.py b/lib/cherrypy/lib/covercp.py new file mode 100644 index 00000000..a74ec342 --- /dev/null +++ b/lib/cherrypy/lib/covercp.py @@ -0,0 +1,387 @@ +"""Code-coverage tools for CherryPy. + +To use this module, or the coverage tools in the test suite, +you need to download 'coverage.py', either Gareth Rees' `original +implementation `_ +or Ned Batchelder's `enhanced version: +`_ + +To turn on coverage tracing, use the following code:: + + cherrypy.engine.subscribe('start', covercp.start) + +DO NOT subscribe anything on the 'start_thread' channel, as previously +recommended. Calling start once in the main thread should be sufficient +to start coverage on all threads. Calling start again in each thread +effectively clears any coverage data gathered up to that point. + +Run your code, then use the ``covercp.serve()`` function to browse the +results in a web browser. If you run this module from the command line, +it will call ``serve()`` for you. +""" + +import re +import sys +import cgi +from cherrypy._cpcompat import quote_plus +import os +import os.path +localFile = os.path.join(os.path.dirname(__file__), "coverage.cache") + +the_coverage = None +try: + from coverage import coverage + the_coverage = coverage(data_file=localFile) + + def start(): + the_coverage.start() +except ImportError: + # Setting the_coverage to None will raise errors + # that need to be trapped downstream. + the_coverage = None + + import warnings + warnings.warn( + "No code coverage will be performed; " + "coverage.py could not be imported.") + + def start(): + pass +start.priority = 20 + +TEMPLATE_MENU = """ + + CherryPy Coverage Menu + + + +

CherryPy Coverage

""" + +TEMPLATE_FORM = """ +
+
+ + Show percentages +
+ Hide files over + %%
+ Exclude files matching
+ +
+ + +
+
""" + +TEMPLATE_FRAMESET = """ +CherryPy coverage data + + + + + +""" + +TEMPLATE_COVERAGE = """ + + Coverage for %(name)s + + + +

%(name)s

+

%(fullpath)s

+

Coverage: %(pc)s%%

""" + +TEMPLATE_LOC_COVERED = """ + %s  + %s +\n""" +TEMPLATE_LOC_NOT_COVERED = """ + %s  + %s +\n""" +TEMPLATE_LOC_EXCLUDED = """ + %s  + %s +\n""" + +TEMPLATE_ITEM = ( + "%s%s%s\n" +) + + +def _percent(statements, missing): + s = len(statements) + e = s - len(missing) + if s > 0: + return int(round(100.0 * e / s)) + return 0 + + +def _show_branch(root, base, path, pct=0, showpct=False, exclude="", + coverage=the_coverage): + + # Show the directory name and any of our children + dirs = [k for k, v in root.items() if v] + dirs.sort() + for name in dirs: + newpath = os.path.join(path, name) + + if newpath.lower().startswith(base): + relpath = newpath[len(base):] + yield "| " * relpath.count(os.sep) + yield ( + "%s\n" % + (newpath, quote_plus(exclude), name) + ) + + for chunk in _show_branch( + root[name], base, newpath, pct, showpct, + exclude, coverage=coverage + ): + yield chunk + + # Now list the files + if path.lower().startswith(base): + relpath = path[len(base):] + files = [k for k, v in root.items() if not v] + files.sort() + for name in files: + newpath = os.path.join(path, name) + + pc_str = "" + if showpct: + try: + _, statements, _, missing, _ = coverage.analysis2(newpath) + except: + # Yes, we really want to pass on all errors. + pass + else: + pc = _percent(statements, missing) + pc_str = ("%3d%% " % pc).replace(' ', ' ') + if pc < float(pct) or pc == -1: + pc_str = "%s" % pc_str + else: + pc_str = "%s" % pc_str + + yield TEMPLATE_ITEM % ("| " * (relpath.count(os.sep) + 1), + pc_str, newpath, name) + + +def _skip_file(path, exclude): + if exclude: + return bool(re.search(exclude, path)) + + +def _graft(path, tree): + d = tree + + p = path + atoms = [] + while True: + p, tail = os.path.split(p) + if not tail: + break + atoms.append(tail) + atoms.append(p) + if p != "/": + atoms.append("/") + + atoms.reverse() + for node in atoms: + if node: + d = d.setdefault(node, {}) + + +def get_tree(base, exclude, coverage=the_coverage): + """Return covered module names as a nested dict.""" + tree = {} + runs = coverage.data.executed_files() + for path in runs: + if not _skip_file(path, exclude) and not os.path.isdir(path): + _graft(path, tree) + return tree + + +class CoverStats(object): + + def __init__(self, coverage, root=None): + self.coverage = coverage + if root is None: + # Guess initial depth. Files outside this path will not be + # reachable from the web interface. + import cherrypy + root = os.path.dirname(cherrypy.__file__) + self.root = root + + def index(self): + return TEMPLATE_FRAMESET % self.root.lower() + index.exposed = True + + def menu(self, base="/", pct="50", showpct="", + exclude=r'python\d\.\d|test|tut\d|tutorial'): + + # The coverage module uses all-lower-case names. + base = base.lower().rstrip(os.sep) + + yield TEMPLATE_MENU + yield TEMPLATE_FORM % locals() + + # Start by showing links for parent paths + yield "
" + path = "" + atoms = base.split(os.sep) + atoms.pop() + for atom in atoms: + path += atom + os.sep + yield ("%s %s" + % (path, quote_plus(exclude), atom, os.sep)) + yield "
" + + yield "
" + + # Then display the tree + tree = get_tree(base, exclude, self.coverage) + if not tree: + yield "

No modules covered.

" + else: + for chunk in _show_branch(tree, base, "/", pct, + showpct == 'checked', exclude, + coverage=self.coverage): + yield chunk + + yield "
" + yield "" + menu.exposed = True + + def annotated_file(self, filename, statements, excluded, missing): + source = open(filename, 'r') + buffer = [] + for lineno, line in enumerate(source.readlines()): + lineno += 1 + line = line.strip("\n\r") + empty_the_buffer = True + if lineno in excluded: + template = TEMPLATE_LOC_EXCLUDED + elif lineno in missing: + template = TEMPLATE_LOC_NOT_COVERED + elif lineno in statements: + template = TEMPLATE_LOC_COVERED + else: + empty_the_buffer = False + buffer.append((lineno, line)) + if empty_the_buffer: + for lno, pastline in buffer: + yield template % (lno, cgi.escape(pastline)) + buffer = [] + yield template % (lineno, cgi.escape(line)) + + def report(self, name): + filename, statements, excluded, missing, _ = self.coverage.analysis2( + name) + pc = _percent(statements, missing) + yield TEMPLATE_COVERAGE % dict(name=os.path.basename(name), + fullpath=name, + pc=pc) + yield '\n' + for line in self.annotated_file(filename, statements, excluded, + missing): + yield line + yield '
' + yield '' + yield '' + report.exposed = True + + +def serve(path=localFile, port=8080, root=None): + if coverage is None: + raise ImportError("The coverage module could not be imported.") + from coverage import coverage + cov = coverage(data_file=path) + cov.load() + + import cherrypy + cherrypy.config.update({'server.socket_port': int(port), + 'server.thread_pool': 10, + 'environment': "production", + }) + cherrypy.quickstart(CoverStats(cov, root)) + +if __name__ == "__main__": + serve(*tuple(sys.argv[1:])) diff --git a/lib/cherrypy/lib/cpstats.py b/lib/cherrypy/lib/cpstats.py new file mode 100644 index 00000000..a8661a14 --- /dev/null +++ b/lib/cherrypy/lib/cpstats.py @@ -0,0 +1,687 @@ +"""CPStats, a package for collecting and reporting on program statistics. + +Overview +======== + +Statistics about program operation are an invaluable monitoring and debugging +tool. Unfortunately, the gathering and reporting of these critical values is +usually ad-hoc. This package aims to add a centralized place for gathering +statistical performance data, a structure for recording that data which +provides for extrapolation of that data into more useful information, +and a method of serving that data to both human investigators and +monitoring software. Let's examine each of those in more detail. + +Data Gathering +-------------- + +Just as Python's `logging` module provides a common importable for gathering +and sending messages, performance statistics would benefit from a similar +common mechanism, and one that does *not* require each package which wishes +to collect stats to import a third-party module. Therefore, we choose to +re-use the `logging` module by adding a `statistics` object to it. + +That `logging.statistics` object is a nested dict. It is not a custom class, +because that would: + + 1. require libraries and applications to import a third-party module in + order to participate + 2. inhibit innovation in extrapolation approaches and in reporting tools, and + 3. be slow. + +There are, however, some specifications regarding the structure of the dict.:: + + { + +----"SQLAlchemy": { + | "Inserts": 4389745, + | "Inserts per Second": + | lambda s: s["Inserts"] / (time() - s["Start"]), + | C +---"Table Statistics": { + | o | "widgets": {-----------+ + N | l | "Rows": 1.3M, | Record + a | l | "Inserts": 400, | + m | e | },---------------------+ + e | c | "froobles": { + s | t | "Rows": 7845, + p | i | "Inserts": 0, + a | o | }, + c | n +---}, + e | "Slow Queries": + | [{"Query": "SELECT * FROM widgets;", + | "Processing Time": 47.840923343, + | }, + | ], + +----}, + } + +The `logging.statistics` dict has four levels. The topmost level is nothing +more than a set of names to introduce modularity, usually along the lines of +package names. If the SQLAlchemy project wanted to participate, for example, +it might populate the item `logging.statistics['SQLAlchemy']`, whose value +would be a second-layer dict we call a "namespace". Namespaces help multiple +packages to avoid collisions over key names, and make reports easier to read, +to boot. The maintainers of SQLAlchemy should feel free to use more than one +namespace if needed (such as 'SQLAlchemy ORM'). Note that there are no case +or other syntax constraints on the namespace names; they should be chosen +to be maximally readable by humans (neither too short nor too long). + +Each namespace, then, is a dict of named statistical values, such as +'Requests/sec' or 'Uptime'. You should choose names which will look +good on a report: spaces and capitalization are just fine. + +In addition to scalars, values in a namespace MAY be a (third-layer) +dict, or a list, called a "collection". For example, the CherryPy +:class:`StatsTool` keeps track of what each request is doing (or has most +recently done) in a 'Requests' collection, where each key is a thread ID; each +value in the subdict MUST be a fourth dict (whew!) of statistical data about +each thread. We call each subdict in the collection a "record". Similarly, +the :class:`StatsTool` also keeps a list of slow queries, where each record +contains data about each slow query, in order. + +Values in a namespace or record may also be functions, which brings us to: + +Extrapolation +------------- + +The collection of statistical data needs to be fast, as close to unnoticeable +as possible to the host program. That requires us to minimize I/O, for example, +but in Python it also means we need to minimize function calls. So when you +are designing your namespace and record values, try to insert the most basic +scalar values you already have on hand. + +When it comes time to report on the gathered data, however, we usually have +much more freedom in what we can calculate. Therefore, whenever reporting +tools (like the provided :class:`StatsPage` CherryPy class) fetch the contents +of `logging.statistics` for reporting, they first call +`extrapolate_statistics` (passing the whole `statistics` dict as the only +argument). This makes a deep copy of the statistics dict so that the +reporting tool can both iterate over it and even change it without harming +the original. But it also expands any functions in the dict by calling them. +For example, you might have a 'Current Time' entry in the namespace with the +value "lambda scope: time.time()". The "scope" parameter is the current +namespace dict (or record, if we're currently expanding one of those +instead), allowing you access to existing static entries. If you're truly +evil, you can even modify more than one entry at a time. + +However, don't try to calculate an entry and then use its value in further +extrapolations; the order in which the functions are called is not guaranteed. +This can lead to a certain amount of duplicated work (or a redesign of your +schema), but that's better than complicating the spec. + +After the whole thing has been extrapolated, it's time for: + +Reporting +--------- + +The :class:`StatsPage` class grabs the `logging.statistics` dict, extrapolates +it all, and then transforms it to HTML for easy viewing. Each namespace gets +its own header and attribute table, plus an extra table for each collection. +This is NOT part of the statistics specification; other tools can format how +they like. + +You can control which columns are output and how they are formatted by updating +StatsPage.formatting, which is a dict that mirrors the keys and nesting of +`logging.statistics`. The difference is that, instead of data values, it has +formatting values. Use None for a given key to indicate to the StatsPage that a +given column should not be output. Use a string with formatting +(such as '%.3f') to interpolate the value(s), or use a callable (such as +lambda v: v.isoformat()) for more advanced formatting. Any entry which is not +mentioned in the formatting dict is output unchanged. + +Monitoring +---------- + +Although the HTML output takes pains to assign unique id's to each with +statistical data, you're probably better off fetching /cpstats/data, which +outputs the whole (extrapolated) `logging.statistics` dict in JSON format. +That is probably easier to parse, and doesn't have any formatting controls, +so you get the "original" data in a consistently-serialized format. +Note: there's no treatment yet for datetime objects. Try time.time() instead +for now if you can. Nagios will probably thank you. + +Turning Collection Off +---------------------- + +It is recommended each namespace have an "Enabled" item which, if False, +stops collection (but not reporting) of statistical data. Applications +SHOULD provide controls to pause and resume collection by setting these +entries to False or True, if present. + + +Usage +===== + +To collect statistics on CherryPy applications:: + + from cherrypy.lib import cpstats + appconfig['/']['tools.cpstats.on'] = True + +To collect statistics on your own code:: + + import logging + # Initialize the repository + if not hasattr(logging, 'statistics'): logging.statistics = {} + # Initialize my namespace + mystats = logging.statistics.setdefault('My Stuff', {}) + # Initialize my namespace's scalars and collections + mystats.update({ + 'Enabled': True, + 'Start Time': time.time(), + 'Important Events': 0, + 'Events/Second': lambda s: ( + (s['Important Events'] / (time.time() - s['Start Time']))), + }) + ... + for event in events: + ... + # Collect stats + if mystats.get('Enabled', False): + mystats['Important Events'] += 1 + +To report statistics:: + + root.cpstats = cpstats.StatsPage() + +To format statistics reports:: + + See 'Reporting', above. + +""" + +# ------------------------------- Statistics -------------------------------- # + +import logging +if not hasattr(logging, 'statistics'): + logging.statistics = {} + + +def extrapolate_statistics(scope): + """Return an extrapolated copy of the given scope.""" + c = {} + for k, v in list(scope.items()): + if isinstance(v, dict): + v = extrapolate_statistics(v) + elif isinstance(v, (list, tuple)): + v = [extrapolate_statistics(record) for record in v] + elif hasattr(v, '__call__'): + v = v(scope) + c[k] = v + return c + + +# -------------------- CherryPy Applications Statistics --------------------- # + +import threading +import time + +import cherrypy + +appstats = logging.statistics.setdefault('CherryPy Applications', {}) +appstats.update({ + 'Enabled': True, + 'Bytes Read/Request': lambda s: ( + s['Total Requests'] and + (s['Total Bytes Read'] / float(s['Total Requests'])) or + 0.0 + ), + 'Bytes Read/Second': lambda s: s['Total Bytes Read'] / s['Uptime'](s), + 'Bytes Written/Request': lambda s: ( + s['Total Requests'] and + (s['Total Bytes Written'] / float(s['Total Requests'])) or + 0.0 + ), + 'Bytes Written/Second': lambda s: ( + s['Total Bytes Written'] / s['Uptime'](s) + ), + 'Current Time': lambda s: time.time(), + 'Current Requests': 0, + 'Requests/Second': lambda s: float(s['Total Requests']) / s['Uptime'](s), + 'Server Version': cherrypy.__version__, + 'Start Time': time.time(), + 'Total Bytes Read': 0, + 'Total Bytes Written': 0, + 'Total Requests': 0, + 'Total Time': 0, + 'Uptime': lambda s: time.time() - s['Start Time'], + 'Requests': {}, +}) + +proc_time = lambda s: time.time() - s['Start Time'] + + +class ByteCountWrapper(object): + + """Wraps a file-like object, counting the number of bytes read.""" + + def __init__(self, rfile): + self.rfile = rfile + self.bytes_read = 0 + + def read(self, size=-1): + data = self.rfile.read(size) + self.bytes_read += len(data) + return data + + def readline(self, size=-1): + data = self.rfile.readline(size) + self.bytes_read += len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + return data + + +average_uriset_time = lambda s: s['Count'] and (s['Sum'] / s['Count']) or 0 + + +class StatsTool(cherrypy.Tool): + + """Record various information about the current request.""" + + def __init__(self): + cherrypy.Tool.__init__(self, 'on_end_request', self.record_stop) + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + if appstats.get('Enabled', False): + cherrypy.Tool._setup(self) + self.record_start() + + def record_start(self): + """Record the beginning of a request.""" + request = cherrypy.serving.request + if not hasattr(request.rfile, 'bytes_read'): + request.rfile = ByteCountWrapper(request.rfile) + request.body.fp = request.rfile + + r = request.remote + + appstats['Current Requests'] += 1 + appstats['Total Requests'] += 1 + appstats['Requests'][threading._get_ident()] = { + 'Bytes Read': None, + 'Bytes Written': None, + # Use a lambda so the ip gets updated by tools.proxy later + 'Client': lambda s: '%s:%s' % (r.ip, r.port), + 'End Time': None, + 'Processing Time': proc_time, + 'Request-Line': request.request_line, + 'Response Status': None, + 'Start Time': time.time(), + } + + def record_stop( + self, uriset=None, slow_queries=1.0, slow_queries_count=100, + debug=False, **kwargs): + """Record the end of a request.""" + resp = cherrypy.serving.response + w = appstats['Requests'][threading._get_ident()] + + r = cherrypy.request.rfile.bytes_read + w['Bytes Read'] = r + appstats['Total Bytes Read'] += r + + if resp.stream: + w['Bytes Written'] = 'chunked' + else: + cl = int(resp.headers.get('Content-Length', 0)) + w['Bytes Written'] = cl + appstats['Total Bytes Written'] += cl + + w['Response Status'] = getattr( + resp, 'output_status', None) or resp.status + + w['End Time'] = time.time() + p = w['End Time'] - w['Start Time'] + w['Processing Time'] = p + appstats['Total Time'] += p + + appstats['Current Requests'] -= 1 + + if debug: + cherrypy.log('Stats recorded: %s' % repr(w), 'TOOLS.CPSTATS') + + if uriset: + rs = appstats.setdefault('URI Set Tracking', {}) + r = rs.setdefault(uriset, { + 'Min': None, 'Max': None, 'Count': 0, 'Sum': 0, + 'Avg': average_uriset_time}) + if r['Min'] is None or p < r['Min']: + r['Min'] = p + if r['Max'] is None or p > r['Max']: + r['Max'] = p + r['Count'] += 1 + r['Sum'] += p + + if slow_queries and p > slow_queries: + sq = appstats.setdefault('Slow Queries', []) + sq.append(w.copy()) + if len(sq) > slow_queries_count: + sq.pop(0) + + +import cherrypy +cherrypy.tools.cpstats = StatsTool() + + +# ---------------------- CherryPy Statistics Reporting ---------------------- # + +import os +thisdir = os.path.abspath(os.path.dirname(__file__)) + +try: + import json +except ImportError: + try: + import simplejson as json + except ImportError: + json = None + + +missing = object() + +locale_date = lambda v: time.strftime('%c', time.gmtime(v)) +iso_format = lambda v: time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(v)) + + +def pause_resume(ns): + def _pause_resume(enabled): + pause_disabled = '' + resume_disabled = '' + if enabled: + resume_disabled = 'disabled="disabled" ' + else: + pause_disabled = 'disabled="disabled" ' + return """ +
+ + +
+
+ + +
+ """ % (ns, pause_disabled, ns, resume_disabled) + return _pause_resume + + +class StatsPage(object): + + formatting = { + 'CherryPy Applications': { + 'Enabled': pause_resume('CherryPy Applications'), + 'Bytes Read/Request': '%.3f', + 'Bytes Read/Second': '%.3f', + 'Bytes Written/Request': '%.3f', + 'Bytes Written/Second': '%.3f', + 'Current Time': iso_format, + 'Requests/Second': '%.3f', + 'Start Time': iso_format, + 'Total Time': '%.3f', + 'Uptime': '%.3f', + 'Slow Queries': { + 'End Time': None, + 'Processing Time': '%.3f', + 'Start Time': iso_format, + }, + 'URI Set Tracking': { + 'Avg': '%.3f', + 'Max': '%.3f', + 'Min': '%.3f', + 'Sum': '%.3f', + }, + 'Requests': { + 'Bytes Read': '%s', + 'Bytes Written': '%s', + 'End Time': None, + 'Processing Time': '%.3f', + 'Start Time': None, + }, + }, + 'CherryPy WSGIServer': { + 'Enabled': pause_resume('CherryPy WSGIServer'), + 'Connections/second': '%.3f', + 'Start time': iso_format, + }, + } + + def index(self): + # Transform the raw data into pretty output for HTML + yield """ + + + Statistics + + + +""" + for title, scalars, collections in self.get_namespaces(): + yield """ +

%s

+ + + +""" % title + for i, (key, value) in enumerate(scalars): + colnum = i % 3 + if colnum == 0: + yield """ + """ + yield ( + """ + """ % + vars() + ) + if colnum == 2: + yield """ + """ + + if colnum == 0: + yield """ + + + """ + elif colnum == 1: + yield """ + + """ + yield """ + +
%(key)s%(value)s
""" + + for subtitle, headers, subrows in collections: + yield """ +

%s

+ + + """ % subtitle + for key in headers: + yield """ + """ % key + yield """ + + + """ + for subrow in subrows: + yield """ + """ + for value in subrow: + yield """ + """ % value + yield """ + """ + yield """ + +
%s
%s
""" + yield """ + + +""" + index.exposed = True + + def get_namespaces(self): + """Yield (title, scalars, collections) for each namespace.""" + s = extrapolate_statistics(logging.statistics) + for title, ns in sorted(s.items()): + scalars = [] + collections = [] + ns_fmt = self.formatting.get(title, {}) + for k, v in sorted(ns.items()): + fmt = ns_fmt.get(k, {}) + if isinstance(v, dict): + headers, subrows = self.get_dict_collection(v, fmt) + collections.append((k, ['ID'] + headers, subrows)) + elif isinstance(v, (list, tuple)): + headers, subrows = self.get_list_collection(v, fmt) + collections.append((k, headers, subrows)) + else: + format = ns_fmt.get(k, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v = format(v) + elif format is not missing: + v = format % v + scalars.append((k, v)) + yield title, scalars, collections + + def get_dict_collection(self, v, formatting): + """Return ([headers], [rows]) for the given collection.""" + # E.g., the 'Requests' dict. + headers = [] + for record in v.itervalues(): + for k3 in record: + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if k3 not in headers: + headers.append(k3) + headers.sort() + + subrows = [] + for k2, record in sorted(v.items()): + subrow = [k2] + for k3 in headers: + v3 = record.get(k3, '') + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v3 = format(v3) + elif format is not missing: + v3 = format % v3 + subrow.append(v3) + subrows.append(subrow) + + return headers, subrows + + def get_list_collection(self, v, formatting): + """Return ([headers], [subrows]) for the given collection.""" + # E.g., the 'Slow Queries' list. + headers = [] + for record in v: + for k3 in record: + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if k3 not in headers: + headers.append(k3) + headers.sort() + + subrows = [] + for record in v: + subrow = [] + for k3 in headers: + v3 = record.get(k3, '') + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v3 = format(v3) + elif format is not missing: + v3 = format % v3 + subrow.append(v3) + subrows.append(subrow) + + return headers, subrows + + if json is not None: + def data(self): + s = extrapolate_statistics(logging.statistics) + cherrypy.response.headers['Content-Type'] = 'application/json' + return json.dumps(s, sort_keys=True, indent=4) + data.exposed = True + + def pause(self, namespace): + logging.statistics.get(namespace, {})['Enabled'] = False + raise cherrypy.HTTPRedirect('./') + pause.exposed = True + pause.cp_config = {'tools.allow.on': True, + 'tools.allow.methods': ['POST']} + + def resume(self, namespace): + logging.statistics.get(namespace, {})['Enabled'] = True + raise cherrypy.HTTPRedirect('./') + resume.exposed = True + resume.cp_config = {'tools.allow.on': True, + 'tools.allow.methods': ['POST']} diff --git a/lib/cherrypy/lib/cptools.py b/lib/cherrypy/lib/cptools.py new file mode 100644 index 00000000..f376282c --- /dev/null +++ b/lib/cherrypy/lib/cptools.py @@ -0,0 +1,630 @@ +"""Functions for builtin CherryPy tools.""" + +import logging +import re + +import cherrypy +from cherrypy._cpcompat import basestring, md5, set, unicodestr +from cherrypy.lib import httputil as _httputil +from cherrypy.lib import is_iterator + + +# Conditional HTTP request support # + +def validate_etags(autotags=False, debug=False): + """Validate the current ETag against If-Match, If-None-Match headers. + + If autotags is True, an ETag response-header value will be provided + from an MD5 hash of the response body (unless some other code has + already provided an ETag header). If False (the default), the ETag + will not be automatic. + + WARNING: the autotags feature is not designed for URL's which allow + methods other than GET. For example, if a POST to the same URL returns + no content, the automatic ETag will be incorrect, breaking a fundamental + use for entity tags in a possibly destructive fashion. Likewise, if you + raise 304 Not Modified, the response body will be empty, the ETag hash + will be incorrect, and your application will break. + See :rfc:`2616` Section 14.24. + """ + response = cherrypy.serving.response + + # Guard against being run twice. + if hasattr(response, "ETag"): + return + + status, reason, msg = _httputil.valid_status(response.status) + + etag = response.headers.get('ETag') + + # Automatic ETag generation. See warning in docstring. + if etag: + if debug: + cherrypy.log('ETag already set: %s' % etag, 'TOOLS.ETAGS') + elif not autotags: + if debug: + cherrypy.log('Autotags off', 'TOOLS.ETAGS') + elif status != 200: + if debug: + cherrypy.log('Status not 200', 'TOOLS.ETAGS') + else: + etag = response.collapse_body() + etag = '"%s"' % md5(etag).hexdigest() + if debug: + cherrypy.log('Setting ETag: %s' % etag, 'TOOLS.ETAGS') + response.headers['ETag'] = etag + + response.ETag = etag + + # "If the request would, without the If-Match header field, result in + # anything other than a 2xx or 412 status, then the If-Match header + # MUST be ignored." + if debug: + cherrypy.log('Status: %s' % status, 'TOOLS.ETAGS') + if status >= 200 and status <= 299: + request = cherrypy.serving.request + + conditions = request.headers.elements('If-Match') or [] + conditions = [str(x) for x in conditions] + if debug: + cherrypy.log('If-Match conditions: %s' % repr(conditions), + 'TOOLS.ETAGS') + if conditions and not (conditions == ["*"] or etag in conditions): + raise cherrypy.HTTPError(412, "If-Match failed: ETag %r did " + "not match %r" % (etag, conditions)) + + conditions = request.headers.elements('If-None-Match') or [] + conditions = [str(x) for x in conditions] + if debug: + cherrypy.log('If-None-Match conditions: %s' % repr(conditions), + 'TOOLS.ETAGS') + if conditions == ["*"] or etag in conditions: + if debug: + cherrypy.log('request.method: %s' % + request.method, 'TOOLS.ETAGS') + if request.method in ("GET", "HEAD"): + raise cherrypy.HTTPRedirect([], 304) + else: + raise cherrypy.HTTPError(412, "If-None-Match failed: ETag %r " + "matched %r" % (etag, conditions)) + + +def validate_since(): + """Validate the current Last-Modified against If-Modified-Since headers. + + If no code has set the Last-Modified response header, then no validation + will be performed. + """ + response = cherrypy.serving.response + lastmod = response.headers.get('Last-Modified') + if lastmod: + status, reason, msg = _httputil.valid_status(response.status) + + request = cherrypy.serving.request + + since = request.headers.get('If-Unmodified-Since') + if since and since != lastmod: + if (status >= 200 and status <= 299) or status == 412: + raise cherrypy.HTTPError(412) + + since = request.headers.get('If-Modified-Since') + if since and since == lastmod: + if (status >= 200 and status <= 299) or status == 304: + if request.method in ("GET", "HEAD"): + raise cherrypy.HTTPRedirect([], 304) + else: + raise cherrypy.HTTPError(412) + + +# Tool code # + +def allow(methods=None, debug=False): + """Raise 405 if request.method not in methods (default ['GET', 'HEAD']). + + The given methods are case-insensitive, and may be in any order. + If only one method is allowed, you may supply a single string; + if more than one, supply a list of strings. + + Regardless of whether the current method is allowed or not, this + also emits an 'Allow' response header, containing the given methods. + """ + if not isinstance(methods, (tuple, list)): + methods = [methods] + methods = [m.upper() for m in methods if m] + if not methods: + methods = ['GET', 'HEAD'] + elif 'GET' in methods and 'HEAD' not in methods: + methods.append('HEAD') + + cherrypy.response.headers['Allow'] = ', '.join(methods) + if cherrypy.request.method not in methods: + if debug: + cherrypy.log('request.method %r not in methods %r' % + (cherrypy.request.method, methods), 'TOOLS.ALLOW') + raise cherrypy.HTTPError(405) + else: + if debug: + cherrypy.log('request.method %r in methods %r' % + (cherrypy.request.method, methods), 'TOOLS.ALLOW') + + +def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', + scheme='X-Forwarded-Proto', debug=False): + """Change the base URL (scheme://host[:port][/path]). + + For running a CP server behind Apache, lighttpd, or other HTTP server. + + For Apache and lighttpd, you should leave the 'local' argument at the + default value of 'X-Forwarded-Host'. For Squid, you probably want to set + tools.proxy.local = 'Origin'. + + If you want the new request.base to include path info (not just the host), + you must explicitly set base to the full base path, and ALSO set 'local' + to '', so that the X-Forwarded-Host request header (which never includes + path info) does not override it. Regardless, the value for 'base' MUST + NOT end in a slash. + + cherrypy.request.remote.ip (the IP address of the client) will be + rewritten if the header specified by the 'remote' arg is valid. + By default, 'remote' is set to 'X-Forwarded-For'. If you do not + want to rewrite remote.ip, set the 'remote' arg to an empty string. + """ + + request = cherrypy.serving.request + + if scheme: + s = request.headers.get(scheme, None) + if debug: + cherrypy.log('Testing scheme %r:%r' % (scheme, s), 'TOOLS.PROXY') + if s == 'on' and 'ssl' in scheme.lower(): + # This handles e.g. webfaction's 'X-Forwarded-Ssl: on' header + scheme = 'https' + else: + # This is for lighttpd/pound/Mongrel's 'X-Forwarded-Proto: https' + scheme = s + if not scheme: + scheme = request.base[:request.base.find("://")] + + if local: + lbase = request.headers.get(local, None) + if debug: + cherrypy.log('Testing local %r:%r' % (local, lbase), 'TOOLS.PROXY') + if lbase is not None: + base = lbase.split(',')[0] + if not base: + port = request.local.port + if port == 80: + base = '127.0.0.1' + else: + base = '127.0.0.1:%s' % port + + if base.find("://") == -1: + # add http:// or https:// if needed + base = scheme + "://" + base + + request.base = base + + if remote: + xff = request.headers.get(remote) + if debug: + cherrypy.log('Testing remote %r:%r' % (remote, xff), 'TOOLS.PROXY') + if xff: + if remote == 'X-Forwarded-For': + #Bug #1268 + xff = xff.split(',')[0].strip() + request.remote.ip = xff + + +def ignore_headers(headers=('Range',), debug=False): + """Delete request headers whose field names are included in 'headers'. + + This is a useful tool for working behind certain HTTP servers; + for example, Apache duplicates the work that CP does for 'Range' + headers, and will doubly-truncate the response. + """ + request = cherrypy.serving.request + for name in headers: + if name in request.headers: + if debug: + cherrypy.log('Ignoring request header %r' % name, + 'TOOLS.IGNORE_HEADERS') + del request.headers[name] + + +def response_headers(headers=None, debug=False): + """Set headers on the response.""" + if debug: + cherrypy.log('Setting response headers: %s' % repr(headers), + 'TOOLS.RESPONSE_HEADERS') + for name, value in (headers or []): + cherrypy.serving.response.headers[name] = value +response_headers.failsafe = True + + +def referer(pattern, accept=True, accept_missing=False, error=403, + message='Forbidden Referer header.', debug=False): + """Raise HTTPError if Referer header does/does not match the given pattern. + + pattern + A regular expression pattern to test against the Referer. + + accept + If True, the Referer must match the pattern; if False, + the Referer must NOT match the pattern. + + accept_missing + If True, permit requests with no Referer header. + + error + The HTTP error code to return to the client on failure. + + message + A string to include in the response body on failure. + + """ + try: + ref = cherrypy.serving.request.headers['Referer'] + match = bool(re.match(pattern, ref)) + if debug: + cherrypy.log('Referer %r matches %r' % (ref, pattern), + 'TOOLS.REFERER') + if accept == match: + return + except KeyError: + if debug: + cherrypy.log('No Referer header', 'TOOLS.REFERER') + if accept_missing: + return + + raise cherrypy.HTTPError(error, message) + + +class SessionAuth(object): + + """Assert that the user is logged in.""" + + session_key = "username" + debug = False + + def check_username_and_password(self, username, password): + pass + + def anonymous(self): + """Provide a temporary user name for anonymous users.""" + pass + + def on_login(self, username): + pass + + def on_logout(self, username): + pass + + def on_check(self, username): + pass + + def login_screen(self, from_page='..', username='', error_msg='', + **kwargs): + return (unicodestr(""" +Message: %(error_msg)s +
+ Login: +
+ Password: +
+ +
+ +
+""") % vars()).encode("utf-8") + + def do_login(self, username, password, from_page='..', **kwargs): + """Login. May raise redirect, or return True if request handled.""" + response = cherrypy.serving.response + error_msg = self.check_username_and_password(username, password) + if error_msg: + body = self.login_screen(from_page, username, error_msg) + response.body = body + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + return True + else: + cherrypy.serving.request.login = username + cherrypy.session[self.session_key] = username + self.on_login(username) + raise cherrypy.HTTPRedirect(from_page or "/") + + def do_logout(self, from_page='..', **kwargs): + """Logout. May raise redirect, or return True if request handled.""" + sess = cherrypy.session + username = sess.get(self.session_key) + sess[self.session_key] = None + if username: + cherrypy.serving.request.login = None + self.on_logout(username) + raise cherrypy.HTTPRedirect(from_page) + + def do_check(self): + """Assert username. Raise redirect, or return True if request handled. + """ + sess = cherrypy.session + request = cherrypy.serving.request + response = cherrypy.serving.response + + username = sess.get(self.session_key) + if not username: + sess[self.session_key] = username = self.anonymous() + self._debug_message('No session[username], trying anonymous') + if not username: + url = cherrypy.url(qs=request.query_string) + self._debug_message( + 'No username, routing to login_screen with from_page %(url)r', + locals(), + ) + response.body = self.login_screen(url) + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + return True + self._debug_message('Setting request.login to %(username)r', locals()) + request.login = username + self.on_check(username) + + def _debug_message(self, template, context={}): + if not self.debug: + return + cherrypy.log(template % context, 'TOOLS.SESSAUTH') + + def run(self): + request = cherrypy.serving.request + response = cherrypy.serving.response + + path = request.path_info + if path.endswith('login_screen'): + self._debug_message('routing %(path)r to login_screen', locals()) + response.body = self.login_screen() + return True + elif path.endswith('do_login'): + if request.method != 'POST': + response.headers['Allow'] = "POST" + self._debug_message('do_login requires POST') + raise cherrypy.HTTPError(405) + self._debug_message('routing %(path)r to do_login', locals()) + return self.do_login(**request.params) + elif path.endswith('do_logout'): + if request.method != 'POST': + response.headers['Allow'] = "POST" + raise cherrypy.HTTPError(405) + self._debug_message('routing %(path)r to do_logout', locals()) + return self.do_logout(**request.params) + else: + self._debug_message('No special path, running do_check') + return self.do_check() + + +def session_auth(**kwargs): + sa = SessionAuth() + for k, v in kwargs.items(): + setattr(sa, k, v) + return sa.run() +session_auth.__doc__ = """Session authentication hook. + +Any attribute of the SessionAuth class may be overridden via a keyword arg +to this function: + +""" + "\n".join(["%s: %s" % (k, type(getattr(SessionAuth, k)).__name__) + for k in dir(SessionAuth) if not k.startswith("__")]) + + +def log_traceback(severity=logging.ERROR, debug=False): + """Write the last error's traceback to the cherrypy error log.""" + cherrypy.log("", "HTTP", severity=severity, traceback=True) + + +def log_request_headers(debug=False): + """Write request headers to the cherrypy error log.""" + h = [" %s: %s" % (k, v) for k, v in cherrypy.serving.request.header_list] + cherrypy.log('\nRequest Headers:\n' + '\n'.join(h), "HTTP") + + +def log_hooks(debug=False): + """Write request.hooks to the cherrypy error log.""" + request = cherrypy.serving.request + + msg = [] + # Sort by the standard points if possible. + from cherrypy import _cprequest + points = _cprequest.hookpoints + for k in request.hooks.keys(): + if k not in points: + points.append(k) + + for k in points: + msg.append(" %s:" % k) + v = request.hooks.get(k, []) + v.sort() + for h in v: + msg.append(" %r" % h) + cherrypy.log('\nRequest Hooks for ' + cherrypy.url() + + ':\n' + '\n'.join(msg), "HTTP") + + +def redirect(url='', internal=True, debug=False): + """Raise InternalRedirect or HTTPRedirect to the given url.""" + if debug: + cherrypy.log('Redirecting %sto: %s' % + ({True: 'internal ', False: ''}[internal], url), + 'TOOLS.REDIRECT') + if internal: + raise cherrypy.InternalRedirect(url) + else: + raise cherrypy.HTTPRedirect(url) + + +def trailing_slash(missing=True, extra=False, status=None, debug=False): + """Redirect if path_info has (missing|extra) trailing slash.""" + request = cherrypy.serving.request + pi = request.path_info + + if debug: + cherrypy.log('is_index: %r, missing: %r, extra: %r, path_info: %r' % + (request.is_index, missing, extra, pi), + 'TOOLS.TRAILING_SLASH') + if request.is_index is True: + if missing: + if not pi.endswith('/'): + new_url = cherrypy.url(pi + '/', request.query_string) + raise cherrypy.HTTPRedirect(new_url, status=status or 301) + elif request.is_index is False: + if extra: + # If pi == '/', don't redirect to ''! + if pi.endswith('/') and pi != '/': + new_url = cherrypy.url(pi[:-1], request.query_string) + raise cherrypy.HTTPRedirect(new_url, status=status or 301) + + +def flatten(debug=False): + """Wrap response.body in a generator that recursively iterates over body. + + This allows cherrypy.response.body to consist of 'nested generators'; + that is, a set of generators that yield generators. + """ + def flattener(input): + numchunks = 0 + for x in input: + if not is_iterator(x): + numchunks += 1 + yield x + else: + for y in flattener(x): + numchunks += 1 + yield y + if debug: + cherrypy.log('Flattened %d chunks' % numchunks, 'TOOLS.FLATTEN') + response = cherrypy.serving.response + response.body = flattener(response.body) + + +def accept(media=None, debug=False): + """Return the client's preferred media-type (from the given Content-Types). + + If 'media' is None (the default), no test will be performed. + + If 'media' is provided, it should be the Content-Type value (as a string) + or values (as a list or tuple of strings) which the current resource + can emit. The client's acceptable media ranges (as declared in the + Accept request header) will be matched in order to these Content-Type + values; the first such string is returned. That is, the return value + will always be one of the strings provided in the 'media' arg (or None + if 'media' is None). + + If no match is found, then HTTPError 406 (Not Acceptable) is raised. + Note that most web browsers send */* as a (low-quality) acceptable + media range, which should match any Content-Type. In addition, "...if + no Accept header field is present, then it is assumed that the client + accepts all media types." + + Matching types are checked in order of client preference first, + and then in the order of the given 'media' values. + + Note that this function does not honor accept-params (other than "q"). + """ + if not media: + return + if isinstance(media, basestring): + media = [media] + request = cherrypy.serving.request + + # Parse the Accept request header, and try to match one + # of the requested media-ranges (in order of preference). + ranges = request.headers.elements('Accept') + if not ranges: + # Any media type is acceptable. + if debug: + cherrypy.log('No Accept header elements', 'TOOLS.ACCEPT') + return media[0] + else: + # Note that 'ranges' is sorted in order of preference + for element in ranges: + if element.qvalue > 0: + if element.value == "*/*": + # Matches any type or subtype + if debug: + cherrypy.log('Match due to */*', 'TOOLS.ACCEPT') + return media[0] + elif element.value.endswith("/*"): + # Matches any subtype + mtype = element.value[:-1] # Keep the slash + for m in media: + if m.startswith(mtype): + if debug: + cherrypy.log('Match due to %s' % element.value, + 'TOOLS.ACCEPT') + return m + else: + # Matches exact value + if element.value in media: + if debug: + cherrypy.log('Match due to %s' % element.value, + 'TOOLS.ACCEPT') + return element.value + + # No suitable media-range found. + ah = request.headers.get('Accept') + if ah is None: + msg = "Your client did not send an Accept header." + else: + msg = "Your client sent this Accept header: %s." % ah + msg += (" But this resource only emits these media types: %s." % + ", ".join(media)) + raise cherrypy.HTTPError(406, msg) + + +class MonitoredHeaderMap(_httputil.HeaderMap): + + def __init__(self): + self.accessed_headers = set() + + def __getitem__(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.__getitem__(self, key) + + def __contains__(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.__contains__(self, key) + + def get(self, key, default=None): + self.accessed_headers.add(key) + return _httputil.HeaderMap.get(self, key, default=default) + + if hasattr({}, 'has_key'): + # Python 2 + def has_key(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.has_key(self, key) + + +def autovary(ignore=None, debug=False): + """Auto-populate the Vary response header based on request.header access. + """ + request = cherrypy.serving.request + + req_h = request.headers + request.headers = MonitoredHeaderMap() + request.headers.update(req_h) + if ignore is None: + ignore = set(['Content-Disposition', 'Content-Length', 'Content-Type']) + + def set_response_header(): + resp_h = cherrypy.serving.response.headers + v = set([e.value for e in resp_h.elements('Vary')]) + if debug: + cherrypy.log( + 'Accessed headers: %s' % request.headers.accessed_headers, + 'TOOLS.AUTOVARY') + v = v.union(request.headers.accessed_headers) + v = v.difference(ignore) + v = list(v) + v.sort() + resp_h['Vary'] = ', '.join(v) + request.hooks.attach('before_finalize', set_response_header, 95) diff --git a/lib/cherrypy/lib/encoding.py b/lib/cherrypy/lib/encoding.py new file mode 100644 index 00000000..a4c2cbd6 --- /dev/null +++ b/lib/cherrypy/lib/encoding.py @@ -0,0 +1,421 @@ +import struct +import time + +import cherrypy +from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr +from cherrypy.lib import file_generator +from cherrypy.lib import is_closable_iterator +from cherrypy.lib import set_vary_header + + +def decode(encoding=None, default_encoding='utf-8'): + """Replace or extend the list of charsets used to decode a request entity. + + Either argument may be a single string or a list of strings. + + encoding + If not None, restricts the set of charsets attempted while decoding + a request entity to the given set (even if a different charset is + given in the Content-Type request header). + + default_encoding + Only in effect if the 'encoding' argument is not given. + If given, the set of charsets attempted while decoding a request + entity is *extended* with the given value(s). + + """ + body = cherrypy.request.body + if encoding is not None: + if not isinstance(encoding, list): + encoding = [encoding] + body.attempt_charsets = encoding + elif default_encoding: + if not isinstance(default_encoding, list): + default_encoding = [default_encoding] + body.attempt_charsets = body.attempt_charsets + default_encoding + +class UTF8StreamEncoder: + def __init__(self, iterator): + self._iterator = iterator + + def __iter__(self): + return self + + def next(self): + return self.__next__() + + def __next__(self): + res = next(self._iterator) + if isinstance(res, unicodestr): + res = res.encode('utf-8') + return res + + def close(self): + if is_closable_iterator(self._iterator): + self._iterator.close() + + def __getattr__(self, attr): + if attr.startswith('__'): + raise AttributeError(self, attr) + return getattr(self._iterator, attr) + + +class ResponseEncoder: + + default_encoding = 'utf-8' + failmsg = "Response body could not be encoded with %r." + encoding = None + errors = 'strict' + text_only = True + add_charset = True + debug = False + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + self.attempted_charsets = set() + request = cherrypy.serving.request + if request.handler is not None: + # Replace request.handler with self + if self.debug: + cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') + self.oldhandler = request.handler + request.handler = self + + def encode_stream(self, encoding): + """Encode a streaming response body. + + Use a generator wrapper, and just pray it works as the stream is + being written out. + """ + if encoding in self.attempted_charsets: + return False + self.attempted_charsets.add(encoding) + + def encoder(body): + for chunk in body: + if isinstance(chunk, unicodestr): + chunk = chunk.encode(encoding, self.errors) + yield chunk + self.body = encoder(self.body) + return True + + def encode_string(self, encoding): + """Encode a buffered response body.""" + if encoding in self.attempted_charsets: + return False + self.attempted_charsets.add(encoding) + body = [] + for chunk in self.body: + if isinstance(chunk, unicodestr): + try: + chunk = chunk.encode(encoding, self.errors) + except (LookupError, UnicodeError): + return False + body.append(chunk) + self.body = body + return True + + def find_acceptable_charset(self): + request = cherrypy.serving.request + response = cherrypy.serving.response + + if self.debug: + cherrypy.log('response.stream %r' % + response.stream, 'TOOLS.ENCODE') + if response.stream: + encoder = self.encode_stream + else: + encoder = self.encode_string + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + # Encoded strings may be of different lengths from their + # unicode equivalents, and even from each other. For example: + # >>> t = u"\u7007\u3040" + # >>> len(t) + # 2 + # >>> len(t.encode("UTF-8")) + # 6 + # >>> len(t.encode("utf7")) + # 8 + del response.headers["Content-Length"] + + # Parse the Accept-Charset request header, and try to provide one + # of the requested charsets (in order of user preference). + encs = request.headers.elements('Accept-Charset') + charsets = [enc.value.lower() for enc in encs] + if self.debug: + cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') + + if self.encoding is not None: + # If specified, force this encoding to be used, or fail. + encoding = self.encoding.lower() + if self.debug: + cherrypy.log('Specified encoding %r' % + encoding, 'TOOLS.ENCODE') + if (not charsets) or "*" in charsets or encoding in charsets: + if self.debug: + cherrypy.log('Attempting encoding %r' % + encoding, 'TOOLS.ENCODE') + if encoder(encoding): + return encoding + else: + if not encs: + if self.debug: + cherrypy.log('Attempting default encoding %r' % + self.default_encoding, 'TOOLS.ENCODE') + # Any character-set is acceptable. + if encoder(self.default_encoding): + return self.default_encoding + else: + raise cherrypy.HTTPError(500, self.failmsg % + self.default_encoding) + else: + for element in encs: + if element.qvalue > 0: + if element.value == "*": + # Matches any charset. Try our default. + if self.debug: + cherrypy.log('Attempting default encoding due ' + 'to %r' % element, 'TOOLS.ENCODE') + if encoder(self.default_encoding): + return self.default_encoding + else: + encoding = element.value + if self.debug: + cherrypy.log('Attempting encoding %s (qvalue >' + '0)' % element, 'TOOLS.ENCODE') + if encoder(encoding): + return encoding + + if "*" not in charsets: + # If no "*" is present in an Accept-Charset field, then all + # character sets not explicitly mentioned get a quality + # value of 0, except for ISO-8859-1, which gets a quality + # value of 1 if not explicitly mentioned. + iso = 'iso-8859-1' + if iso not in charsets: + if self.debug: + cherrypy.log('Attempting ISO-8859-1 encoding', + 'TOOLS.ENCODE') + if encoder(iso): + return iso + + # No suitable encoding found. + ac = request.headers.get('Accept-Charset') + if ac is None: + msg = "Your client did not send an Accept-Charset header." + else: + msg = "Your client sent this Accept-Charset header: %s." % ac + _charsets = ", ".join(sorted(self.attempted_charsets)) + msg += " We tried these charsets: %s." % (_charsets,) + raise cherrypy.HTTPError(406, msg) + + def __call__(self, *args, **kwargs): + response = cherrypy.serving.response + self.body = self.oldhandler(*args, **kwargs) + + if isinstance(self.body, basestring): + # strings get wrapped in a list because iterating over a single + # item list is much faster than iterating over every character + # in a long string. + if self.body: + self.body = [self.body] + else: + # [''] doesn't evaluate to False, so replace it with []. + self.body = [] + elif hasattr(self.body, 'read'): + self.body = file_generator(self.body) + elif self.body is None: + self.body = [] + + ct = response.headers.elements("Content-Type") + if self.debug: + cherrypy.log('Content-Type: %r' % [str(h) + for h in ct], 'TOOLS.ENCODE') + if ct and self.add_charset: + ct = ct[0] + if self.text_only: + if ct.value.lower().startswith("text/"): + if self.debug: + cherrypy.log( + 'Content-Type %s starts with "text/"' % ct, + 'TOOLS.ENCODE') + do_find = True + else: + if self.debug: + cherrypy.log('Not finding because Content-Type %s ' + 'does not start with "text/"' % ct, + 'TOOLS.ENCODE') + do_find = False + else: + if self.debug: + cherrypy.log('Finding because not text_only', + 'TOOLS.ENCODE') + do_find = True + + if do_find: + # Set "charset=..." param on response Content-Type header + ct.params['charset'] = self.find_acceptable_charset() + if self.debug: + cherrypy.log('Setting Content-Type %s' % ct, + 'TOOLS.ENCODE') + response.headers["Content-Type"] = str(ct) + + return self.body + +# GZIP + + +def compress(body, compress_level): + """Compress 'body' at the given compress_level.""" + import zlib + + # See http://www.gzip.org/zlib/rfc-gzip.html + yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker + yield ntob('\x08') # CM: compression method + yield ntob('\x00') # FLG: none set + # MTIME: 4 bytes + yield struct.pack(" 0 is present + * The 'identity' value is given with a qvalue > 0. + + """ + request = cherrypy.serving.request + response = cherrypy.serving.response + + set_vary_header(response, "Accept-Encoding") + + if not response.body: + # Response body is empty (might be a 304 for instance) + if debug: + cherrypy.log('No response body', context='TOOLS.GZIP') + return + + # If returning cached content (which should already have been gzipped), + # don't re-zip. + if getattr(request, "cached", False): + if debug: + cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') + return + + acceptable = request.headers.elements('Accept-Encoding') + if not acceptable: + # If no Accept-Encoding field is present in a request, + # the server MAY assume that the client will accept any + # content coding. In this case, if "identity" is one of + # the available content-codings, then the server SHOULD use + # the "identity" content-coding, unless it has additional + # information that a different content-coding is meaningful + # to the client. + if debug: + cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') + return + + ct = response.headers.get('Content-Type', '').split(';')[0] + for coding in acceptable: + if coding.value == 'identity' and coding.qvalue != 0: + if debug: + cherrypy.log('Non-zero identity qvalue: %s' % coding, + context='TOOLS.GZIP') + return + if coding.value in ('gzip', 'x-gzip'): + if coding.qvalue == 0: + if debug: + cherrypy.log('Zero gzip qvalue: %s' % coding, + context='TOOLS.GZIP') + return + + if ct not in mime_types: + # If the list of provided mime-types contains tokens + # such as 'text/*' or 'application/*+xml', + # we go through them and find the most appropriate one + # based on the given content-type. + # The pattern matching is only caring about the most + # common cases, as stated above, and doesn't support + # for extra parameters. + found = False + if '/' in ct: + ct_media_type, ct_sub_type = ct.split('/') + for mime_type in mime_types: + if '/' in mime_type: + media_type, sub_type = mime_type.split('/') + if ct_media_type == media_type: + if sub_type == '*': + found = True + break + elif '+' in sub_type and '+' in ct_sub_type: + ct_left, ct_right = ct_sub_type.split('+') + left, right = sub_type.split('+') + if left == '*' and ct_right == right: + found = True + break + + if not found: + if debug: + cherrypy.log('Content-Type %s not in mime_types %r' % + (ct, mime_types), context='TOOLS.GZIP') + return + + if debug: + cherrypy.log('Gzipping', context='TOOLS.GZIP') + # Return a generator that compresses the page + response.headers['Content-Encoding'] = 'gzip' + response.body = compress(response.body, compress_level) + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + + return + + if debug: + cherrypy.log('No acceptable encoding found.', context='GZIP') + cherrypy.HTTPError(406, "identity, gzip").set_response() diff --git a/lib/cherrypy/lib/gctools.py b/lib/cherrypy/lib/gctools.py new file mode 100644 index 00000000..4b616c59 --- /dev/null +++ b/lib/cherrypy/lib/gctools.py @@ -0,0 +1,217 @@ +import gc +import inspect +import os +import sys +import time + +try: + import objgraph +except ImportError: + objgraph = None + +import cherrypy +from cherrypy import _cprequest, _cpwsgi +from cherrypy.process.plugins import SimplePlugin + + +class ReferrerTree(object): + + """An object which gathers all referrers of an object to a given depth.""" + + peek_length = 40 + + def __init__(self, ignore=None, maxdepth=2, maxparents=10): + self.ignore = ignore or [] + self.ignore.append(inspect.currentframe().f_back) + self.maxdepth = maxdepth + self.maxparents = maxparents + + def ascend(self, obj, depth=1): + """Return a nested list containing referrers of the given object.""" + depth += 1 + parents = [] + + # Gather all referrers in one step to minimize + # cascading references due to repr() logic. + refs = gc.get_referrers(obj) + self.ignore.append(refs) + if len(refs) > self.maxparents: + return [("[%s referrers]" % len(refs), [])] + + try: + ascendcode = self.ascend.__code__ + except AttributeError: + ascendcode = self.ascend.im_func.func_code + for parent in refs: + if inspect.isframe(parent) and parent.f_code is ascendcode: + continue + if parent in self.ignore: + continue + if depth <= self.maxdepth: + parents.append((parent, self.ascend(parent, depth))) + else: + parents.append((parent, [])) + + return parents + + def peek(self, s): + """Return s, restricted to a sane length.""" + if len(s) > (self.peek_length + 3): + half = self.peek_length // 2 + return s[:half] + '...' + s[-half:] + else: + return s + + def _format(self, obj, descend=True): + """Return a string representation of a single object.""" + if inspect.isframe(obj): + filename, lineno, func, context, index = inspect.getframeinfo(obj) + return "" % func + + if not descend: + return self.peek(repr(obj)) + + if isinstance(obj, dict): + return "{" + ", ".join(["%s: %s" % (self._format(k, descend=False), + self._format(v, descend=False)) + for k, v in obj.items()]) + "}" + elif isinstance(obj, list): + return "[" + ", ".join([self._format(item, descend=False) + for item in obj]) + "]" + elif isinstance(obj, tuple): + return "(" + ", ".join([self._format(item, descend=False) + for item in obj]) + ")" + + r = self.peek(repr(obj)) + if isinstance(obj, (str, int, float)): + return r + return "%s: %s" % (type(obj), r) + + def format(self, tree): + """Return a list of string reprs from a nested list of referrers.""" + output = [] + + def ascend(branch, depth=1): + for parent, grandparents in branch: + output.append((" " * depth) + self._format(parent)) + if grandparents: + ascend(grandparents, depth + 1) + ascend(tree) + return output + + +def get_instances(cls): + return [x for x in gc.get_objects() if isinstance(x, cls)] + + +class RequestCounter(SimplePlugin): + + def start(self): + self.count = 0 + + def before_request(self): + self.count += 1 + + def after_request(self): + self.count -= 1 +request_counter = RequestCounter(cherrypy.engine) +request_counter.subscribe() + + +def get_context(obj): + if isinstance(obj, _cprequest.Request): + return "path=%s;stage=%s" % (obj.path_info, obj.stage) + elif isinstance(obj, _cprequest.Response): + return "status=%s" % obj.status + elif isinstance(obj, _cpwsgi.AppResponse): + return "PATH_INFO=%s" % obj.environ.get('PATH_INFO', '') + elif hasattr(obj, "tb_lineno"): + return "tb_lineno=%s" % obj.tb_lineno + return "" + + +class GCRoot(object): + + """A CherryPy page handler for testing reference leaks.""" + + classes = [ + (_cprequest.Request, 2, 2, + "Should be 1 in this request thread and 1 in the main thread."), + (_cprequest.Response, 2, 2, + "Should be 1 in this request thread and 1 in the main thread."), + (_cpwsgi.AppResponse, 1, 1, + "Should be 1 in this request thread only."), + ] + + def index(self): + return "Hello, world!" + index.exposed = True + + def stats(self): + output = ["Statistics:"] + + for trial in range(10): + if request_counter.count > 0: + break + time.sleep(0.5) + else: + output.append("\nNot all requests closed properly.") + + # gc_collect isn't perfectly synchronous, because it may + # break reference cycles that then take time to fully + # finalize. Call it thrice and hope for the best. + gc.collect() + gc.collect() + unreachable = gc.collect() + if unreachable: + if objgraph is not None: + final = objgraph.by_type('Nondestructible') + if final: + objgraph.show_backrefs(final, filename='finalizers.png') + + trash = {} + for x in gc.garbage: + trash[type(x)] = trash.get(type(x), 0) + 1 + if trash: + output.insert(0, "\n%s unreachable objects:" % unreachable) + trash = [(v, k) for k, v in trash.items()] + trash.sort() + for pair in trash: + output.append(" " + repr(pair)) + + # Check declared classes to verify uncollected instances. + # These don't have to be part of a cycle; they can be + # any objects that have unanticipated referrers that keep + # them from being collected. + allobjs = {} + for cls, minobj, maxobj, msg in self.classes: + allobjs[cls] = get_instances(cls) + + for cls, minobj, maxobj, msg in self.classes: + objs = allobjs[cls] + lenobj = len(objs) + if lenobj < minobj or lenobj > maxobj: + if minobj == maxobj: + output.append( + "\nExpected %s %r references, got %s." % + (minobj, cls, lenobj)) + else: + output.append( + "\nExpected %s to %s %r references, got %s." % + (minobj, maxobj, cls, lenobj)) + + for obj in objs: + if objgraph is not None: + ig = [id(objs), id(inspect.currentframe())] + fname = "graph_%s_%s.png" % (cls.__name__, id(obj)) + objgraph.show_backrefs( + obj, extra_ignore=ig, max_depth=4, too_many=20, + filename=fname, extra_info=get_context) + output.append("\nReferrers for %s (refcount=%s):" % + (repr(obj), sys.getrefcount(obj))) + t = ReferrerTree(ignore=[objs], maxdepth=3) + tree = t.ascend(obj) + output.extend(t.format(tree)) + + return "\n".join(output) + stats.exposed = True diff --git a/lib/cherrypy/lib/http.py b/lib/cherrypy/lib/http.py new file mode 100644 index 00000000..12043ad1 --- /dev/null +++ b/lib/cherrypy/lib/http.py @@ -0,0 +1,6 @@ +import warnings +warnings.warn('cherrypy.lib.http has been deprecated and will be removed ' + 'in CherryPy 3.3 use cherrypy.lib.httputil instead.', + DeprecationWarning) + +from cherrypy.lib.httputil import * diff --git a/lib/cherrypy/lib/httpauth.py b/lib/cherrypy/lib/httpauth.py new file mode 100644 index 00000000..0897ea2a --- /dev/null +++ b/lib/cherrypy/lib/httpauth.py @@ -0,0 +1,371 @@ +""" +This module defines functions to implement HTTP Digest Authentication +(:rfc:`2617`). +This has full compliance with 'Digest' and 'Basic' authentication methods. In +'Digest' it supports both MD5 and MD5-sess algorithms. + +Usage: + First use 'doAuth' to request the client authentication for a + certain resource. You should send an httplib.UNAUTHORIZED response to the + client so he knows he has to authenticate itself. + + Then use 'parseAuthorization' to retrieve the 'auth_map' used in + 'checkResponse'. + + To use 'checkResponse' you must have already verified the password + associated with the 'username' key in 'auth_map' dict. Then you use the + 'checkResponse' function to verify if the password matches the one sent + by the client. + +SUPPORTED_ALGORITHM - list of supported 'Digest' algorithms +SUPPORTED_QOP - list of supported 'Digest' 'qop'. +""" +__version__ = 1, 0, 1 +__author__ = "Tiago Cogumbreiro " +__credits__ = """ + Peter van Kampen for its recipe which implement most of Digest + authentication: + http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302378 +""" + +__license__ = """ +Copyright (c) 2005, Tiago Cogumbreiro +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Sylvain Hellegouarch nor the names of his + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +__all__ = ("digestAuth", "basicAuth", "doAuth", "checkResponse", + "parseAuthorization", "SUPPORTED_ALGORITHM", "md5SessionKey", + "calculateNonce", "SUPPORTED_QOP") + +########################################################################## +import time +from cherrypy._cpcompat import base64_decode, ntob, md5 +from cherrypy._cpcompat import parse_http_list, parse_keqv_list + +MD5 = "MD5" +MD5_SESS = "MD5-sess" +AUTH = "auth" +AUTH_INT = "auth-int" + +SUPPORTED_ALGORITHM = (MD5, MD5_SESS) +SUPPORTED_QOP = (AUTH, AUTH_INT) + +########################################################################## +# doAuth +# +DIGEST_AUTH_ENCODERS = { + MD5: lambda val: md5(ntob(val)).hexdigest(), + MD5_SESS: lambda val: md5(ntob(val)).hexdigest(), + # SHA: lambda val: sha.new(ntob(val)).hexdigest (), +} + + +def calculateNonce(realm, algorithm=MD5): + """This is an auxaliary function that calculates 'nonce' value. It is used + to handle sessions.""" + + global SUPPORTED_ALGORITHM, DIGEST_AUTH_ENCODERS + assert algorithm in SUPPORTED_ALGORITHM + + try: + encoder = DIGEST_AUTH_ENCODERS[algorithm] + except KeyError: + raise NotImplementedError("The chosen algorithm (%s) does not have " + "an implementation yet" % algorithm) + + return encoder("%d:%s" % (time.time(), realm)) + + +def digestAuth(realm, algorithm=MD5, nonce=None, qop=AUTH): + """Challenges the client for a Digest authentication.""" + global SUPPORTED_ALGORITHM, DIGEST_AUTH_ENCODERS, SUPPORTED_QOP + assert algorithm in SUPPORTED_ALGORITHM + assert qop in SUPPORTED_QOP + + if nonce is None: + nonce = calculateNonce(realm, algorithm) + + return 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % ( + realm, nonce, algorithm, qop + ) + + +def basicAuth(realm): + """Challengenes the client for a Basic authentication.""" + assert '"' not in realm, "Realms cannot contain the \" (quote) character." + + return 'Basic realm="%s"' % realm + + +def doAuth(realm): + """'doAuth' function returns the challenge string b giving priority over + Digest and fallback to Basic authentication when the browser doesn't + support the first one. + + This should be set in the HTTP header under the key 'WWW-Authenticate'.""" + + return digestAuth(realm) + " " + basicAuth(realm) + + +########################################################################## +# Parse authorization parameters +# +def _parseDigestAuthorization(auth_params): + # Convert the auth params to a dict + items = parse_http_list(auth_params) + params = parse_keqv_list(items) + + # Now validate the params + + # Check for required parameters + required = ["username", "realm", "nonce", "uri", "response"] + for k in required: + if k not in params: + return None + + # If qop is sent then cnonce and nc MUST be present + if "qop" in params and not ("cnonce" in params + and "nc" in params): + return None + + # If qop is not sent, neither cnonce nor nc can be present + if ("cnonce" in params or "nc" in params) and \ + "qop" not in params: + return None + + return params + + +def _parseBasicAuthorization(auth_params): + username, password = base64_decode(auth_params).split(":", 1) + return {"username": username, "password": password} + +AUTH_SCHEMES = { + "basic": _parseBasicAuthorization, + "digest": _parseDigestAuthorization, +} + + +def parseAuthorization(credentials): + """parseAuthorization will convert the value of the 'Authorization' key in + the HTTP header to a map itself. If the parsing fails 'None' is returned. + """ + + global AUTH_SCHEMES + + auth_scheme, auth_params = credentials.split(" ", 1) + auth_scheme = auth_scheme.lower() + + parser = AUTH_SCHEMES[auth_scheme] + params = parser(auth_params) + + if params is None: + return + + assert "auth_scheme" not in params + params["auth_scheme"] = auth_scheme + return params + + +########################################################################## +# Check provided response for a valid password +# +def md5SessionKey(params, password): + """ + If the "algorithm" directive's value is "MD5-sess", then A1 + [the session key] is calculated only once - on the first request by the + client following receipt of a WWW-Authenticate challenge from the server. + + This creates a 'session key' for the authentication of subsequent + requests and responses which is different for each "authentication + session", thus limiting the amount of material hashed with any one + key. + + Because the server need only use the hash of the user + credentials in order to create the A1 value, this construction could + be used in conjunction with a third party authentication service so + that the web server would not need the actual password value. The + specification of such a protocol is beyond the scope of this + specification. +""" + + keys = ("username", "realm", "nonce", "cnonce") + params_copy = {} + for key in keys: + params_copy[key] = params[key] + + params_copy["algorithm"] = MD5_SESS + return _A1(params_copy, password) + + +def _A1(params, password): + algorithm = params.get("algorithm", MD5) + H = DIGEST_AUTH_ENCODERS[algorithm] + + if algorithm == MD5: + # If the "algorithm" directive's value is "MD5" or is + # unspecified, then A1 is: + # A1 = unq(username-value) ":" unq(realm-value) ":" passwd + return "%s:%s:%s" % (params["username"], params["realm"], password) + + elif algorithm == MD5_SESS: + + # This is A1 if qop is set + # A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd ) + # ":" unq(nonce-value) ":" unq(cnonce-value) + h_a1 = H("%s:%s:%s" % (params["username"], params["realm"], password)) + return "%s:%s:%s" % (h_a1, params["nonce"], params["cnonce"]) + + +def _A2(params, method, kwargs): + # If the "qop" directive's value is "auth" or is unspecified, then A2 is: + # A2 = Method ":" digest-uri-value + + qop = params.get("qop", "auth") + if qop == "auth": + return method + ":" + params["uri"] + elif qop == "auth-int": + # If the "qop" value is "auth-int", then A2 is: + # A2 = Method ":" digest-uri-value ":" H(entity-body) + entity_body = kwargs.get("entity_body", "") + H = kwargs["H"] + + return "%s:%s:%s" % ( + method, + params["uri"], + H(entity_body) + ) + + else: + raise NotImplementedError("The 'qop' method is unknown: %s" % qop) + + +def _computeDigestResponse(auth_map, password, method="GET", A1=None, + **kwargs): + """ + Generates a response respecting the algorithm defined in RFC 2617 + """ + params = auth_map + + algorithm = params.get("algorithm", MD5) + + H = DIGEST_AUTH_ENCODERS[algorithm] + KD = lambda secret, data: H(secret + ":" + data) + + qop = params.get("qop", None) + + H_A2 = H(_A2(params, method, kwargs)) + + if algorithm == MD5_SESS and A1 is not None: + H_A1 = H(A1) + else: + H_A1 = H(_A1(params, password)) + + if qop in ("auth", "auth-int"): + # If the "qop" value is "auth" or "auth-int": + # request-digest = <"> < KD ( H(A1), unq(nonce-value) + # ":" nc-value + # ":" unq(cnonce-value) + # ":" unq(qop-value) + # ":" H(A2) + # ) <"> + request = "%s:%s:%s:%s:%s" % ( + params["nonce"], + params["nc"], + params["cnonce"], + params["qop"], + H_A2, + ) + elif qop is None: + # If the "qop" directive is not present (this construction is + # for compatibility with RFC 2069): + # request-digest = + # <"> < KD ( H(A1), unq(nonce-value) ":" H(A2) ) > <"> + request = "%s:%s" % (params["nonce"], H_A2) + + return KD(H_A1, request) + + +def _checkDigestResponse(auth_map, password, method="GET", A1=None, **kwargs): + """This function is used to verify the response given by the client when + he tries to authenticate. + Optional arguments: + entity_body - when 'qop' is set to 'auth-int' you MUST provide the + raw data you are going to send to the client (usually the + HTML page. + request_uri - the uri from the request line compared with the 'uri' + directive of the authorization map. They must represent + the same resource (unused at this time). + """ + + if auth_map['realm'] != kwargs.get('realm', None): + return False + + response = _computeDigestResponse( + auth_map, password, method, A1, **kwargs) + + return response == auth_map["response"] + + +def _checkBasicResponse(auth_map, password, method='GET', encrypt=None, + **kwargs): + # Note that the Basic response doesn't provide the realm value so we cannot + # test it + pass_through = lambda password, username=None: password + encrypt = encrypt or pass_through + try: + candidate = encrypt(auth_map["password"], auth_map["username"]) + except TypeError: + # if encrypt only takes one parameter, it's the password + candidate = encrypt(auth_map["password"]) + return candidate == password + +AUTH_RESPONSES = { + "basic": _checkBasicResponse, + "digest": _checkDigestResponse, +} + + +def checkResponse(auth_map, password, method="GET", encrypt=None, **kwargs): + """'checkResponse' compares the auth_map with the password and optionally + other arguments that each implementation might need. + + If the response is of type 'Basic' then the function has the following + signature:: + + checkBasicResponse(auth_map, password) -> bool + + If the response is of type 'Digest' then the function has the following + signature:: + + checkDigestResponse(auth_map, password, method='GET', A1=None) -> bool + + The 'A1' argument is only used in MD5_SESS algorithm based responses. + Check md5SessionKey() for more info. + """ + checker = AUTH_RESPONSES[auth_map["auth_scheme"]] + return checker(auth_map, password, method=method, encrypt=encrypt, + **kwargs) diff --git a/lib/cherrypy/lib/httputil.py b/lib/cherrypy/lib/httputil.py new file mode 100644 index 00000000..69a18d45 --- /dev/null +++ b/lib/cherrypy/lib/httputil.py @@ -0,0 +1,536 @@ +"""HTTP library functions. + +This module contains functions for building an HTTP application +framework: any one, not just one whose name starts with "Ch". ;) If you +reference any modules from some popular framework inside *this* module, +FuManChu will personally hang you up by your thumbs and submit you +to a public caning. +""" + +from binascii import b2a_base64 +from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou +from cherrypy._cpcompat import basestring, bytestr, iteritems, nativestr +from cherrypy._cpcompat import reversed, sorted, unicodestr, unquote_qs +response_codes = BaseHTTPRequestHandler.responses.copy() + +# From https://bitbucket.org/cherrypy/cherrypy/issue/361 +response_codes[500] = ('Internal Server Error', + 'The server encountered an unexpected condition ' + 'which prevented it from fulfilling the request.') +response_codes[503] = ('Service Unavailable', + 'The server is currently unable to handle the ' + 'request due to a temporary overloading or ' + 'maintenance of the server.') + +import re +import urllib + + +def urljoin(*atoms): + """Return the given path \*atoms, joined into a single URL. + + This will correctly join a SCRIPT_NAME and PATH_INFO into the + original URL, even if either atom is blank. + """ + url = "/".join([x for x in atoms if x]) + while "//" in url: + url = url.replace("//", "/") + # Special-case the final url of "", and return "/" instead. + return url or "/" + + +def urljoin_bytes(*atoms): + """Return the given path *atoms, joined into a single URL. + + This will correctly join a SCRIPT_NAME and PATH_INFO into the + original URL, even if either atom is blank. + """ + url = ntob("/").join([x for x in atoms if x]) + while ntob("//") in url: + url = url.replace(ntob("//"), ntob("/")) + # Special-case the final url of "", and return "/" instead. + return url or ntob("/") + + +def protocol_from_http(protocol_str): + """Return a protocol tuple from the given 'HTTP/x.y' string.""" + return int(protocol_str[5]), int(protocol_str[7]) + + +def get_ranges(headervalue, content_length): + """Return a list of (start, stop) indices from a Range header, or None. + + Each (start, stop) tuple will be composed of two ints, which are suitable + for use in a slicing operation. That is, the header "Range: bytes=3-6", + if applied against a Python string, is requesting resource[3:7]. This + function will return the list [(3, 7)]. + + If this function returns an empty list, you should return HTTP 416. + """ + + if not headervalue: + return None + + result = [] + bytesunit, byteranges = headervalue.split("=", 1) + for brange in byteranges.split(","): + start, stop = [x.strip() for x in brange.split("-", 1)] + if start: + if not stop: + stop = content_length - 1 + start, stop = int(start), int(stop) + if start >= content_length: + # From rfc 2616 sec 14.16: + # "If the server receives a request (other than one + # including an If-Range request-header field) with an + # unsatisfiable Range request-header field (that is, + # all of whose byte-range-spec values have a first-byte-pos + # value greater than the current length of the selected + # resource), it SHOULD return a response code of 416 + # (Requested range not satisfiable)." + continue + if stop < start: + # From rfc 2616 sec 14.16: + # "If the server ignores a byte-range-spec because it + # is syntactically invalid, the server SHOULD treat + # the request as if the invalid Range header field + # did not exist. (Normally, this means return a 200 + # response containing the full entity)." + return None + result.append((start, stop + 1)) + else: + if not stop: + # See rfc quote above. + return None + # Negative subscript (last N bytes) + # + # RFC 2616 Section 14.35.1: + # If the entity is shorter than the specified suffix-length, + # the entire entity-body is used. + if int(stop) > content_length: + result.append((0, content_length)) + else: + result.append((content_length - int(stop), content_length)) + + return result + + +class HeaderElement(object): + + """An element (with parameters) from an HTTP header's element list.""" + + def __init__(self, value, params=None): + self.value = value + if params is None: + params = {} + self.params = params + + def __cmp__(self, other): + return cmp(self.value, other.value) + + def __lt__(self, other): + return self.value < other.value + + def __str__(self): + p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)] + return str("%s%s" % (self.value, "".join(p))) + + def __bytes__(self): + return ntob(self.__str__()) + + def __unicode__(self): + return ntou(self.__str__()) + + def parse(elementstr): + """Transform 'token;key=val' to ('token', {'key': 'val'}).""" + # Split the element into a value and parameters. The 'value' may + # be of the form, "token=token", but we don't split that here. + atoms = [x.strip() for x in elementstr.split(";") if x.strip()] + if not atoms: + initial_value = '' + else: + initial_value = atoms.pop(0).strip() + params = {} + for atom in atoms: + atom = [x.strip() for x in atom.split("=", 1) if x.strip()] + key = atom.pop(0) + if atom: + val = atom[0] + else: + val = "" + params[key] = val + return initial_value, params + parse = staticmethod(parse) + + def from_str(cls, elementstr): + """Construct an instance from a string of the form 'token;key=val'.""" + ival, params = cls.parse(elementstr) + return cls(ival, params) + from_str = classmethod(from_str) + + +q_separator = re.compile(r'; *q *=') + + +class AcceptElement(HeaderElement): + + """An element (with parameters) from an Accept* header's element list. + + AcceptElement objects are comparable; the more-preferred object will be + "less than" the less-preferred object. They are also therefore sortable; + if you sort a list of AcceptElement objects, they will be listed in + priority order; the most preferred value will be first. Yes, it should + have been the other way around, but it's too late to fix now. + """ + + def from_str(cls, elementstr): + qvalue = None + # The first "q" parameter (if any) separates the initial + # media-range parameter(s) (if any) from the accept-params. + atoms = q_separator.split(elementstr, 1) + media_range = atoms.pop(0).strip() + if atoms: + # The qvalue for an Accept header can have extensions. The other + # headers cannot, but it's easier to parse them as if they did. + qvalue = HeaderElement.from_str(atoms[0].strip()) + + media_type, params = cls.parse(media_range) + if qvalue is not None: + params["q"] = qvalue + return cls(media_type, params) + from_str = classmethod(from_str) + + def qvalue(self): + val = self.params.get("q", "1") + if isinstance(val, HeaderElement): + val = val.value + return float(val) + qvalue = property(qvalue, doc="The qvalue, or priority, of this value.") + + def __cmp__(self, other): + diff = cmp(self.qvalue, other.qvalue) + if diff == 0: + diff = cmp(str(self), str(other)) + return diff + + def __lt__(self, other): + if self.qvalue == other.qvalue: + return str(self) < str(other) + else: + return self.qvalue < other.qvalue + +RE_HEADER_SPLIT = re.compile(',(?=(?:[^"]*"[^"]*")*[^"]*$)') +def header_elements(fieldname, fieldvalue): + """Return a sorted HeaderElement list from a comma-separated header string. + """ + if not fieldvalue: + return [] + + result = [] + for element in RE_HEADER_SPLIT.split(fieldvalue): + if fieldname.startswith("Accept") or fieldname == 'TE': + hv = AcceptElement.from_str(element) + else: + hv = HeaderElement.from_str(element) + result.append(hv) + + return list(reversed(sorted(result))) + + +def decode_TEXT(value): + r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr").""" + try: + # Python 3 + from email.header import decode_header + except ImportError: + from email.Header import decode_header + atoms = decode_header(value) + decodedvalue = "" + for atom, charset in atoms: + if charset is not None: + atom = atom.decode(charset) + decodedvalue += atom + return decodedvalue + + +def valid_status(status): + """Return legal HTTP status Code, Reason-phrase and Message. + + The status arg must be an int, or a str that begins with an int. + + If status is an int, or a str and no reason-phrase is supplied, + a default reason-phrase will be provided. + """ + + if not status: + status = 200 + + status = str(status) + parts = status.split(" ", 1) + if len(parts) == 1: + # No reason supplied. + code, = parts + reason = None + else: + code, reason = parts + reason = reason.strip() + + try: + code = int(code) + except ValueError: + raise ValueError("Illegal response status from server " + "(%s is non-numeric)." % repr(code)) + + if code < 100 or code > 599: + raise ValueError("Illegal response status from server " + "(%s is out of range)." % repr(code)) + + if code not in response_codes: + # code is unknown but not illegal + default_reason, message = "", "" + else: + default_reason, message = response_codes[code] + + if reason is None: + reason = default_reason + + return code, reason, message + + +# NOTE: the parse_qs functions that follow are modified version of those +# in the python3.0 source - we need to pass through an encoding to the unquote +# method, but the default parse_qs function doesn't allow us to. These do. + +def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'): + """Parse a query given as a string argument. + + Arguments: + + qs: URL-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + URL encoded queries should be treated as blank strings. A + true value indicates that blanks should be retained as blank + strings. The default false value indicates that blank values + are to be ignored and treated as if they were not included. + + strict_parsing: flag indicating what to do with parsing errors. If + false (the default), errors are silently ignored. If true, + errors raise a ValueError exception. + + Returns a dict, as G-d intended. + """ + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + d = {} + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError("bad query field: %r" % (name_value,)) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = unquote_qs(nv[0], encoding) + value = unquote_qs(nv[1], encoding) + if name in d: + if not isinstance(d[name], list): + d[name] = [d[name]] + d[name].append(value) + else: + d[name] = value + return d + + +image_map_pattern = re.compile(r"[0-9]+,[0-9]+") + + +def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'): + """Build a params dictionary from a query_string. + + Duplicate key/value pairs in the provided query_string will be + returned as {'key': [val1, val2, ...]}. Single key/values will + be returned as strings: {'key': 'value'}. + """ + if image_map_pattern.match(query_string): + # Server-side image map. Map the coords to 'x' and 'y' + # (like CGI::Request does). + pm = query_string.split(",") + pm = {'x': int(pm[0]), 'y': int(pm[1])} + else: + pm = _parse_qs(query_string, keep_blank_values, encoding=encoding) + return pm + + +class CaseInsensitiveDict(dict): + + """A case-insensitive dict subclass. + + Each key is changed on entry to str(key).title(). + """ + + def __getitem__(self, key): + return dict.__getitem__(self, str(key).title()) + + def __setitem__(self, key, value): + dict.__setitem__(self, str(key).title(), value) + + def __delitem__(self, key): + dict.__delitem__(self, str(key).title()) + + def __contains__(self, key): + return dict.__contains__(self, str(key).title()) + + def get(self, key, default=None): + return dict.get(self, str(key).title(), default) + + if hasattr({}, 'has_key'): + def has_key(self, key): + return str(key).title() in self + + def update(self, E): + for k in E.keys(): + self[str(k).title()] = E[k] + + def fromkeys(cls, seq, value=None): + newdict = cls() + for k in seq: + newdict[str(k).title()] = value + return newdict + fromkeys = classmethod(fromkeys) + + def setdefault(self, key, x=None): + key = str(key).title() + try: + return self[key] + except KeyError: + self[key] = x + return x + + def pop(self, key, default): + return dict.pop(self, str(key).title(), default) + + +# TEXT = +# +# A CRLF is allowed in the definition of TEXT only as part of a header +# field continuation. It is expected that the folding LWS will be +# replaced with a single SP before interpretation of the TEXT value." +if nativestr == bytestr: + header_translate_table = ''.join([chr(i) for i in xrange(256)]) + header_translate_deletechars = ''.join( + [chr(i) for i in xrange(32)]) + chr(127) +else: + header_translate_table = None + header_translate_deletechars = bytes(range(32)) + bytes([127]) + + +class HeaderMap(CaseInsensitiveDict): + + """A dict subclass for HTTP request and response headers. + + Each key is changed on entry to str(key).title(). This allows headers + to be case-insensitive and avoid duplicates. + + Values are header values (decoded according to :rfc:`2047` if necessary). + """ + + protocol = (1, 1) + encodings = ["ISO-8859-1"] + + # Someday, when http-bis is done, this will probably get dropped + # since few servers, clients, or intermediaries do it. But until then, + # we're going to obey the spec as is. + # "Words of *TEXT MAY contain characters from character sets other than + # ISO-8859-1 only when encoded according to the rules of RFC 2047." + use_rfc_2047 = True + + def elements(self, key): + """Return a sorted list of HeaderElements for the given header.""" + key = str(key).title() + value = self.get(key) + return header_elements(key, value) + + def values(self, key): + """Return a sorted list of HeaderElement.value for the given header.""" + return [e.value for e in self.elements(key)] + + def output(self): + """Transform self into a list of (name, value) tuples.""" + return list(self.encode_header_items(self.items())) + + def encode_header_items(cls, header_items): + """ + Prepare the sequence of name, value tuples into a form suitable for + transmitting on the wire for HTTP. + """ + for k, v in header_items: + if isinstance(k, unicodestr): + k = cls.encode(k) + + if not isinstance(v, basestring): + v = str(v) + + if isinstance(v, unicodestr): + v = cls.encode(v) + + # See header_translate_* constants above. + # Replace only if you really know what you're doing. + k = k.translate(header_translate_table, + header_translate_deletechars) + v = v.translate(header_translate_table, + header_translate_deletechars) + + yield (k, v) + encode_header_items = classmethod(encode_header_items) + + def encode(cls, v): + """Return the given header name or value, encoded for HTTP output.""" + for enc in cls.encodings: + try: + return v.encode(enc) + except UnicodeEncodeError: + continue + + if cls.protocol == (1, 1) and cls.use_rfc_2047: + # Encode RFC-2047 TEXT + # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?="). + # We do our own here instead of using the email module + # because we never want to fold lines--folding has + # been deprecated by the HTTP working group. + v = b2a_base64(v.encode('utf-8')) + return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?=')) + + raise ValueError("Could not encode header part %r using " + "any of the encodings %r." % + (v, cls.encodings)) + encode = classmethod(encode) + + +class Host(object): + + """An internet address. + + name + Should be the client's host name. If not available (because no DNS + lookup is performed), the IP address should be used instead. + + """ + + ip = "0.0.0.0" + port = 80 + name = "unknown.tld" + + def __init__(self, ip, port, name=None): + self.ip = ip + self.port = port + if name is None: + name = ip + self.name = name + + def __repr__(self): + return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name) diff --git a/lib/cherrypy/lib/jsontools.py b/lib/cherrypy/lib/jsontools.py new file mode 100644 index 00000000..90b3ff8a --- /dev/null +++ b/lib/cherrypy/lib/jsontools.py @@ -0,0 +1,96 @@ +import cherrypy +from cherrypy._cpcompat import basestring, ntou, json_encode, json_decode + + +def json_processor(entity): + """Read application/json data into request.json.""" + if not entity.headers.get(ntou("Content-Length"), ntou("")): + raise cherrypy.HTTPError(411) + + body = entity.fp.read() + try: + cherrypy.serving.request.json = json_decode(body.decode('utf-8')) + except ValueError: + raise cherrypy.HTTPError(400, 'Invalid JSON document') + + +def json_in(content_type=[ntou('application/json'), ntou('text/javascript')], + force=True, debug=False, processor=json_processor): + """Add a processor to parse JSON request entities: + The default processor places the parsed data into request.json. + + Incoming request entities which match the given content_type(s) will + be deserialized from JSON to the Python equivalent, and the result + stored at cherrypy.request.json. The 'content_type' argument may + be a Content-Type string or a list of allowable Content-Type strings. + + If the 'force' argument is True (the default), then entities of other + content types will not be allowed; "415 Unsupported Media Type" is + raised instead. + + Supply your own processor to use a custom decoder, or to handle the parsed + data differently. The processor can be configured via + tools.json_in.processor or via the decorator method. + + Note that the deserializer requires the client send a Content-Length + request header, or it will raise "411 Length Required". If for any + other reason the request entity cannot be deserialized from JSON, + it will raise "400 Bad Request: Invalid JSON document". + + You must be using Python 2.6 or greater, or have the 'simplejson' + package importable; otherwise, ValueError is raised during processing. + """ + request = cherrypy.serving.request + if isinstance(content_type, basestring): + content_type = [content_type] + + if force: + if debug: + cherrypy.log('Removing body processors %s' % + repr(request.body.processors.keys()), 'TOOLS.JSON_IN') + request.body.processors.clear() + request.body.default_proc = cherrypy.HTTPError( + 415, 'Expected an entity of content type %s' % + ', '.join(content_type)) + + for ct in content_type: + if debug: + cherrypy.log('Adding body processor for %s' % ct, 'TOOLS.JSON_IN') + request.body.processors[ct] = processor + + +def json_handler(*args, **kwargs): + value = cherrypy.serving.request._json_inner_handler(*args, **kwargs) + return json_encode(value) + + +def json_out(content_type='application/json', debug=False, + handler=json_handler): + """Wrap request.handler to serialize its output to JSON. Sets Content-Type. + + If the given content_type is None, the Content-Type response header + is not set. + + Provide your own handler to use a custom encoder. For example + cherrypy.config['tools.json_out.handler'] = , or + @json_out(handler=function). + + You must be using Python 2.6 or greater, or have the 'simplejson' + package importable; otherwise, ValueError is raised during processing. + """ + request = cherrypy.serving.request + # request.handler may be set to None by e.g. the caching tool + # to signal to all components that a response body has already + # been attached, in which case we don't need to wrap anything. + if request.handler is None: + return + if debug: + cherrypy.log('Replacing %s with JSON handler' % request.handler, + 'TOOLS.JSON_OUT') + request._json_inner_handler = request.handler + request.handler = handler + if content_type is not None: + if debug: + cherrypy.log('Setting Content-Type to %s' % + content_type, 'TOOLS.JSON_OUT') + cherrypy.serving.response.headers['Content-Type'] = content_type diff --git a/lib/cherrypy/lib/lockfile.py b/lib/cherrypy/lib/lockfile.py new file mode 100644 index 00000000..4cf7b1b6 --- /dev/null +++ b/lib/cherrypy/lib/lockfile.py @@ -0,0 +1,147 @@ +""" +Platform-independent file locking. Inspired by and modeled after zc.lockfile. +""" + +import os + +try: + import msvcrt +except ImportError: + pass + +try: + import fcntl +except ImportError: + pass + + +class LockError(Exception): + + "Could not obtain a lock" + + msg = "Unable to lock %r" + + def __init__(self, path): + super(LockError, self).__init__(self.msg % path) + + +class UnlockError(LockError): + + "Could not release a lock" + + msg = "Unable to unlock %r" + + +# first, a default, naive locking implementation +class LockFile(object): + + """ + A default, naive locking implementation. Always fails if the file + already exists. + """ + + def __init__(self, path): + self.path = path + try: + fd = os.open(path, os.O_CREAT | os.O_WRONLY | os.O_EXCL) + except OSError: + raise LockError(self.path) + os.close(fd) + + def release(self): + os.remove(self.path) + + def remove(self): + pass + + +class SystemLockFile(object): + + """ + An abstract base class for platform-specific locking. + """ + + def __init__(self, path): + self.path = path + + try: + # Open lockfile for writing without truncation: + self.fp = open(path, 'r+') + except IOError: + # If the file doesn't exist, IOError is raised; Use a+ instead. + # Note that there may be a race here. Multiple processes + # could fail on the r+ open and open the file a+, but only + # one will get the the lock and write a pid. + self.fp = open(path, 'a+') + + try: + self._lock_file() + except: + self.fp.seek(1) + self.fp.close() + del self.fp + raise + + self.fp.write(" %s\n" % os.getpid()) + self.fp.truncate() + self.fp.flush() + + def release(self): + if not hasattr(self, 'fp'): + return + self._unlock_file() + self.fp.close() + del self.fp + + def remove(self): + """ + Attempt to remove the file + """ + try: + os.remove(self.path) + except: + pass + + #@abc.abstract_method + # def _lock_file(self): + # """Attempt to obtain the lock on self.fp. Raise LockError if not + # acquired.""" + + def _unlock_file(self): + """Attempt to obtain the lock on self.fp. Raise UnlockError if not + released.""" + + +class WindowsLockFile(SystemLockFile): + + def _lock_file(self): + # Lock just the first byte + try: + msvcrt.locking(self.fp.fileno(), msvcrt.LK_NBLCK, 1) + except IOError: + raise LockError(self.fp.name) + + def _unlock_file(self): + try: + self.fp.seek(0) + msvcrt.locking(self.fp.fileno(), msvcrt.LK_UNLCK, 1) + except IOError: + raise UnlockError(self.fp.name) + +if 'msvcrt' in globals(): + LockFile = WindowsLockFile + + +class UnixLockFile(SystemLockFile): + + def _lock_file(self): + flags = fcntl.LOCK_EX | fcntl.LOCK_NB + try: + fcntl.flock(self.fp.fileno(), flags) + except IOError: + raise LockError(self.fp.name) + + # no need to implement _unlock_file, it will be unlocked on close() + +if 'fcntl' in globals(): + LockFile = UnixLockFile diff --git a/lib/cherrypy/lib/locking.py b/lib/cherrypy/lib/locking.py new file mode 100644 index 00000000..72dda9b3 --- /dev/null +++ b/lib/cherrypy/lib/locking.py @@ -0,0 +1,47 @@ +import datetime + + +class NeverExpires(object): + def expired(self): + return False + + +class Timer(object): + """ + A simple timer that will indicate when an expiration time has passed. + """ + def __init__(self, expiration): + "Create a timer that expires at `expiration` (UTC datetime)" + self.expiration = expiration + + @classmethod + def after(cls, elapsed): + """ + Return a timer that will expire after `elapsed` passes. + """ + return cls(datetime.datetime.utcnow() + elapsed) + + def expired(self): + return datetime.datetime.utcnow() >= self.expiration + + +class LockTimeout(Exception): + "An exception when a lock could not be acquired before a timeout period" + + +class LockChecker(object): + """ + Keep track of the time and detect if a timeout has expired + """ + def __init__(self, session_id, timeout): + self.session_id = session_id + if timeout: + self.timer = Timer.after(timeout) + else: + self.timer = NeverExpires() + + def expired(self): + if self.timer.expired(): + raise LockTimeout( + "Timeout acquiring lock for %(session_id)s" % vars(self)) + return False diff --git a/lib/cherrypy/lib/profiler.py b/lib/cherrypy/lib/profiler.py new file mode 100644 index 00000000..5dac386e --- /dev/null +++ b/lib/cherrypy/lib/profiler.py @@ -0,0 +1,216 @@ +"""Profiler tools for CherryPy. + +CherryPy users +============== + +You can profile any of your pages as follows:: + + from cherrypy.lib import profiler + + class Root: + p = profile.Profiler("/path/to/profile/dir") + + def index(self): + self.p.run(self._index) + index.exposed = True + + def _index(self): + return "Hello, world!" + + cherrypy.tree.mount(Root()) + +You can also turn on profiling for all requests +using the ``make_app`` function as WSGI middleware. + +CherryPy developers +=================== + +This module can be used whenever you make changes to CherryPy, +to get a quick sanity-check on overall CP performance. Use the +``--profile`` flag when running the test suite. Then, use the ``serve()`` +function to browse the results in a web browser. If you run this +module from the command line, it will call ``serve()`` for you. + +""" + + +def new_func_strip_path(func_name): + """Make profiler output more readable by adding `__init__` modules' parents + """ + filename, line, name = func_name + if filename.endswith("__init__.py"): + return os.path.basename(filename[:-12]) + filename[-12:], line, name + return os.path.basename(filename), line, name + +try: + import profile + import pstats + pstats.func_strip_path = new_func_strip_path +except ImportError: + profile = None + pstats = None + +import os +import os.path +import sys +import warnings + +from cherrypy._cpcompat import StringIO + +_count = 0 + + +class Profiler(object): + + def __init__(self, path=None): + if not path: + path = os.path.join(os.path.dirname(__file__), "profile") + self.path = path + if not os.path.exists(path): + os.makedirs(path) + + def run(self, func, *args, **params): + """Dump profile data into self.path.""" + global _count + c = _count = _count + 1 + path = os.path.join(self.path, "cp_%04d.prof" % c) + prof = profile.Profile() + result = prof.runcall(func, *args, **params) + prof.dump_stats(path) + return result + + def statfiles(self): + """:rtype: list of available profiles. + """ + return [f for f in os.listdir(self.path) + if f.startswith("cp_") and f.endswith(".prof")] + + def stats(self, filename, sortby='cumulative'): + """:rtype stats(index): output of print_stats() for the given profile. + """ + sio = StringIO() + if sys.version_info >= (2, 5): + s = pstats.Stats(os.path.join(self.path, filename), stream=sio) + s.strip_dirs() + s.sort_stats(sortby) + s.print_stats() + else: + # pstats.Stats before Python 2.5 didn't take a 'stream' arg, + # but just printed to stdout. So re-route stdout. + s = pstats.Stats(os.path.join(self.path, filename)) + s.strip_dirs() + s.sort_stats(sortby) + oldout = sys.stdout + try: + sys.stdout = sio + s.print_stats() + finally: + sys.stdout = oldout + response = sio.getvalue() + sio.close() + return response + + def index(self): + return """ + CherryPy profile data + + + + + + """ + index.exposed = True + + def menu(self): + yield "

Profiling runs

" + yield "

Click on one of the runs below to see profiling data.

" + runs = self.statfiles() + runs.sort() + for i in runs: + yield "%s
" % ( + i, i) + menu.exposed = True + + def report(self, filename): + import cherrypy + cherrypy.response.headers['Content-Type'] = 'text/plain' + return self.stats(filename) + report.exposed = True + + +class ProfileAggregator(Profiler): + + def __init__(self, path=None): + Profiler.__init__(self, path) + global _count + self.count = _count = _count + 1 + self.profiler = profile.Profile() + + def run(self, func, *args, **params): + path = os.path.join(self.path, "cp_%04d.prof" % self.count) + result = self.profiler.runcall(func, *args, **params) + self.profiler.dump_stats(path) + return result + + +class make_app: + + def __init__(self, nextapp, path=None, aggregate=False): + """Make a WSGI middleware app which wraps 'nextapp' with profiling. + + nextapp + the WSGI application to wrap, usually an instance of + cherrypy.Application. + + path + where to dump the profiling output. + + aggregate + if True, profile data for all HTTP requests will go in + a single file. If False (the default), each HTTP request will + dump its profile data into a separate file. + + """ + if profile is None or pstats is None: + msg = ("Your installation of Python does not have a profile " + "module. If you're on Debian, try " + "`sudo apt-get install python-profiler`. " + "See http://www.cherrypy.org/wiki/ProfilingOnDebian " + "for details.") + warnings.warn(msg) + + self.nextapp = nextapp + self.aggregate = aggregate + if aggregate: + self.profiler = ProfileAggregator(path) + else: + self.profiler = Profiler(path) + + def __call__(self, environ, start_response): + def gather(): + result = [] + for line in self.nextapp(environ, start_response): + result.append(line) + return result + return self.profiler.run(gather) + + +def serve(path=None, port=8080): + if profile is None or pstats is None: + msg = ("Your installation of Python does not have a profile module. " + "If you're on Debian, try " + "`sudo apt-get install python-profiler`. " + "See http://www.cherrypy.org/wiki/ProfilingOnDebian " + "for details.") + warnings.warn(msg) + + import cherrypy + cherrypy.config.update({'server.socket_port': int(port), + 'server.thread_pool': 10, + 'environment': "production", + }) + cherrypy.quickstart(Profiler(path)) + + +if __name__ == "__main__": + serve(*tuple(sys.argv[1:])) diff --git a/lib/cherrypy/lib/reprconf.py b/lib/cherrypy/lib/reprconf.py new file mode 100644 index 00000000..6e70b5ec --- /dev/null +++ b/lib/cherrypy/lib/reprconf.py @@ -0,0 +1,503 @@ +"""Generic configuration system using unrepr. + +Configuration data may be supplied as a Python dictionary, as a filename, +or as an open file object. When you supply a filename or file, Python's +builtin ConfigParser is used (with some extensions). + +Namespaces +---------- + +Configuration keys are separated into namespaces by the first "." in the key. + +The only key that cannot exist in a namespace is the "environment" entry. +This special entry 'imports' other config entries from a template stored in +the Config.environments dict. + +You can define your own namespaces to be called when new config is merged +by adding a named handler to Config.namespaces. The name can be any string, +and the handler must be either a callable or a context manager. +""" + +try: + # Python 3.0+ + from configparser import ConfigParser +except ImportError: + from ConfigParser import ConfigParser + +try: + set +except NameError: + from sets import Set as set + +try: + basestring +except NameError: + basestring = str + +try: + # Python 3 + import builtins +except ImportError: + # Python 2 + import __builtin__ as builtins + +import operator as _operator +import sys + + +def as_dict(config): + """Return a dict from 'config' whether it is a dict, file, or filename.""" + if isinstance(config, basestring): + config = Parser().dict_from_file(config) + elif hasattr(config, 'read'): + config = Parser().dict_from_file(config) + return config + + +class NamespaceSet(dict): + + """A dict of config namespace names and handlers. + + Each config entry should begin with a namespace name; the corresponding + namespace handler will be called once for each config entry in that + namespace, and will be passed two arguments: the config key (with the + namespace removed) and the config value. + + Namespace handlers may be any Python callable; they may also be + Python 2.5-style 'context managers', in which case their __enter__ + method should return a callable to be used as the handler. + See cherrypy.tools (the Toolbox class) for an example. + """ + + def __call__(self, config): + """Iterate through config and pass it to each namespace handler. + + config + A flat dict, where keys use dots to separate + namespaces, and values are arbitrary. + + The first name in each config key is used to look up the corresponding + namespace handler. For example, a config entry of {'tools.gzip.on': v} + will call the 'tools' namespace handler with the args: ('gzip.on', v) + """ + # Separate the given config into namespaces + ns_confs = {} + for k in config: + if "." in k: + ns, name = k.split(".", 1) + bucket = ns_confs.setdefault(ns, {}) + bucket[name] = config[k] + + # I chose __enter__ and __exit__ so someday this could be + # rewritten using Python 2.5's 'with' statement: + # for ns, handler in self.iteritems(): + # with handler as callable: + # for k, v in ns_confs.get(ns, {}).iteritems(): + # callable(k, v) + for ns, handler in self.items(): + exit = getattr(handler, "__exit__", None) + if exit: + callable = handler.__enter__() + no_exc = True + try: + try: + for k, v in ns_confs.get(ns, {}).items(): + callable(k, v) + except: + # The exceptional case is handled here + no_exc = False + if exit is None: + raise + if not exit(*sys.exc_info()): + raise + # The exception is swallowed if exit() returns true + finally: + # The normal and non-local-goto cases are handled here + if no_exc and exit: + exit(None, None, None) + else: + for k, v in ns_confs.get(ns, {}).items(): + handler(k, v) + + def __repr__(self): + return "%s.%s(%s)" % (self.__module__, self.__class__.__name__, + dict.__repr__(self)) + + def __copy__(self): + newobj = self.__class__() + newobj.update(self) + return newobj + copy = __copy__ + + +class Config(dict): + + """A dict-like set of configuration data, with defaults and namespaces. + + May take a file, filename, or dict. + """ + + defaults = {} + environments = {} + namespaces = NamespaceSet() + + def __init__(self, file=None, **kwargs): + self.reset() + if file is not None: + self.update(file) + if kwargs: + self.update(kwargs) + + def reset(self): + """Reset self to default values.""" + self.clear() + dict.update(self, self.defaults) + + def update(self, config): + """Update self from a dict, file or filename.""" + if isinstance(config, basestring): + # Filename + config = Parser().dict_from_file(config) + elif hasattr(config, 'read'): + # Open file object + config = Parser().dict_from_file(config) + else: + config = config.copy() + self._apply(config) + + def _apply(self, config): + """Update self from a dict.""" + which_env = config.get('environment') + if which_env: + env = self.environments[which_env] + for k in env: + if k not in config: + config[k] = env[k] + + dict.update(self, config) + self.namespaces(config) + + def __setitem__(self, k, v): + dict.__setitem__(self, k, v) + self.namespaces({k: v}) + + +class Parser(ConfigParser): + + """Sub-class of ConfigParser that keeps the case of options and that + raises an exception if the file cannot be read. + """ + + def optionxform(self, optionstr): + return optionstr + + def read(self, filenames): + if isinstance(filenames, basestring): + filenames = [filenames] + for filename in filenames: + # try: + # fp = open(filename) + # except IOError: + # continue + fp = open(filename) + try: + self._read(fp, filename) + finally: + fp.close() + + def as_dict(self, raw=False, vars=None): + """Convert an INI file to a dictionary""" + # Load INI file into a dict + result = {} + for section in self.sections(): + if section not in result: + result[section] = {} + for option in self.options(section): + value = self.get(section, option, raw=raw, vars=vars) + try: + value = unrepr(value) + except Exception: + x = sys.exc_info()[1] + msg = ("Config error in section: %r, option: %r, " + "value: %r. Config values must be valid Python." % + (section, option, value)) + raise ValueError(msg, x.__class__.__name__, x.args) + result[section][option] = value + return result + + def dict_from_file(self, file): + if hasattr(file, 'read'): + self.readfp(file) + else: + self.read(file) + return self.as_dict() + + +# public domain "unrepr" implementation, found on the web and then improved. + + +class _Builder2: + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise TypeError("unrepr does not recognize %s" % + repr(o.__class__.__name__)) + return m(o) + + def astnode(self, s): + """Return a Python2 ast Node compiled from a string.""" + try: + import compiler + except ImportError: + # Fallback to eval when compiler package is not available, + # e.g. IronPython 1.0. + return eval(s) + + p = compiler.parse("__tempvalue__ = " + s) + return p.getChildren()[1].getChildren()[0].getChildren()[1] + + def build_Subscript(self, o): + expr, flags, subs = o.getChildren() + expr = self.build(expr) + subs = self.build(subs) + return expr[subs] + + def build_CallFunc(self, o): + children = o.getChildren() + # Build callee from first child + callee = self.build(children[0]) + # Build args and kwargs from remaining children + args = [] + kwargs = {} + for child in children[1:]: + class_name = child.__class__.__name__ + # None is ignored + if class_name == 'NoneType': + continue + # Keywords become kwargs + if class_name == 'Keyword': + kwargs.update(self.build(child)) + # Everything else becomes args + else : + args.append(self.build(child)) + return callee(*args, **kwargs) + + def build_Keyword(self, o): + key, value_obj = o.getChildren() + value = self.build(value_obj) + kw_dict = {key: value} + return kw_dict + + def build_List(self, o): + return map(self.build, o.getChildren()) + + def build_Const(self, o): + return o.value + + def build_Dict(self, o): + d = {} + i = iter(map(self.build, o.getChildren())) + for el in i: + d[el] = i.next() + return d + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + name = o.name + if name == 'None': + return None + if name == 'True': + return True + if name == 'False': + return False + + # See if the Name is a package or module. If it is, import it. + try: + return modules(name) + except ImportError: + pass + + # See if the Name is in builtins. + try: + return getattr(builtins, name) + except AttributeError: + pass + + raise TypeError("unrepr could not resolve the name %s" % repr(name)) + + def build_Add(self, o): + left, right = map(self.build, o.getChildren()) + return left + right + + def build_Mul(self, o): + left, right = map(self.build, o.getChildren()) + return left * right + + def build_Getattr(self, o): + parent = self.build(o.expr) + return getattr(parent, o.attrname) + + def build_NoneType(self, o): + return None + + def build_UnarySub(self, o): + return -self.build(o.getChildren()[0]) + + def build_UnaryAdd(self, o): + return self.build(o.getChildren()[0]) + + +class _Builder3: + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise TypeError("unrepr does not recognize %s" % + repr(o.__class__.__name__)) + return m(o) + + def astnode(self, s): + """Return a Python3 ast Node compiled from a string.""" + try: + import ast + except ImportError: + # Fallback to eval when ast package is not available, + # e.g. IronPython 1.0. + return eval(s) + + p = ast.parse("__tempvalue__ = " + s) + return p.body[0].value + + def build_Subscript(self, o): + return self.build(o.value)[self.build(o.slice)] + + def build_Index(self, o): + return self.build(o.value) + + def build_Call(self, o): + callee = self.build(o.func) + + if o.args is None: + args = () + else: + args = tuple([self.build(a) for a in o.args]) + + if o.starargs is None: + starargs = () + else: + starargs = self.build(o.starargs) + + if o.kwargs is None: + kwargs = {} + else: + kwargs = self.build(o.kwargs) + + return callee(*(args + starargs), **kwargs) + + def build_List(self, o): + return list(map(self.build, o.elts)) + + def build_Str(self, o): + return o.s + + def build_Num(self, o): + return o.n + + def build_Dict(self, o): + return dict([(self.build(k), self.build(v)) + for k, v in zip(o.keys, o.values)]) + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + name = o.id + if name == 'None': + return None + if name == 'True': + return True + if name == 'False': + return False + + # See if the Name is a package or module. If it is, import it. + try: + return modules(name) + except ImportError: + pass + + # See if the Name is in builtins. + try: + import builtins + return getattr(builtins, name) + except AttributeError: + pass + + raise TypeError("unrepr could not resolve the name %s" % repr(name)) + + def build_NameConstant(self, o): + return o.value + + def build_UnaryOp(self, o): + op, operand = map(self.build, [o.op, o.operand]) + return op(operand) + + def build_BinOp(self, o): + left, op, right = map(self.build, [o.left, o.op, o.right]) + return op(left, right) + + def build_Add(self, o): + return _operator.add + + def build_Mult(self, o): + return _operator.mul + + def build_USub(self, o): + return _operator.neg + + def build_Attribute(self, o): + parent = self.build(o.value) + return getattr(parent, o.attr) + + def build_NoneType(self, o): + return None + + +def unrepr(s): + """Return a Python object compiled from a string.""" + if not s: + return s + if sys.version_info < (3, 0): + b = _Builder2() + else: + b = _Builder3() + obj = b.astnode(s) + return b.build(obj) + + +def modules(modulePath): + """Load a module and retrieve a reference to that module.""" + __import__(modulePath) + return sys.modules[modulePath] + + +def attributes(full_attribute_name): + """Load a module and retrieve an attribute of that module.""" + + # Parse out the path, module, and attribute + last_dot = full_attribute_name.rfind(".") + attr_name = full_attribute_name[last_dot + 1:] + mod_path = full_attribute_name[:last_dot] + + mod = modules(mod_path) + # Let an AttributeError propagate outward. + try: + attr = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + # Return a reference to the attribute. + return attr diff --git a/lib/cherrypy/lib/sessions.py b/lib/cherrypy/lib/sessions.py new file mode 100644 index 00000000..37556363 --- /dev/null +++ b/lib/cherrypy/lib/sessions.py @@ -0,0 +1,968 @@ +"""Session implementation for CherryPy. + +You need to edit your config file to use sessions. Here's an example:: + + [/] + tools.sessions.on = True + tools.sessions.storage_type = "file" + tools.sessions.storage_path = "/home/site/sessions" + tools.sessions.timeout = 60 + +This sets the session to be stored in files in the directory +/home/site/sessions, and the session timeout to 60 minutes. If you omit +``storage_type`` the sessions will be saved in RAM. +``tools.sessions.on`` is the only required line for working sessions, +the rest are optional. + +By default, the session ID is passed in a cookie, so the client's browser must +have cookies enabled for your site. + +To set data for the current session, use +``cherrypy.session['fieldname'] = 'fieldvalue'``; +to get data use ``cherrypy.session.get('fieldname')``. + +================ +Locking sessions +================ + +By default, the ``'locking'`` mode of sessions is ``'implicit'``, which means +the session is locked early and unlocked late. Be mindful of this default mode +for any requests that take a long time to process (streaming responses, +expensive calculations, database lookups, API calls, etc), as other concurrent +requests that also utilize sessions will hang until the session is unlocked. + +If you want to control when the session data is locked and unlocked, +set ``tools.sessions.locking = 'explicit'``. Then call +``cherrypy.session.acquire_lock()`` and ``cherrypy.session.release_lock()``. +Regardless of which mode you use, the session is guaranteed to be unlocked when +the request is complete. + +================= +Expiring Sessions +================= + +You can force a session to expire with :func:`cherrypy.lib.sessions.expire`. +Simply call that function at the point you want the session to expire, and it +will cause the session cookie to expire client-side. + +=========================== +Session Fixation Protection +=========================== + +If CherryPy receives, via a request cookie, a session id that it does not +recognize, it will reject that id and create a new one to return in the +response cookie. This `helps prevent session fixation attacks +`_. +However, CherryPy "recognizes" a session id by looking up the saved session +data for that id. Therefore, if you never save any session data, +**you will get a new session id for every request**. + +================ +Sharing Sessions +================ + +If you run multiple instances of CherryPy (for example via mod_python behind +Apache prefork), you most likely cannot use the RAM session backend, since each +instance of CherryPy will have its own memory space. Use a different backend +instead, and verify that all instances are pointing at the same file or db +location. Alternately, you might try a load balancer which makes sessions +"sticky". Google is your friend, there. + +================ +Expiration Dates +================ + +The response cookie will possess an expiration date to inform the client at +which point to stop sending the cookie back in requests. If the server time +and client time differ, expect sessions to be unreliable. **Make sure the +system time of your server is accurate**. + +CherryPy defaults to a 60-minute session timeout, which also applies to the +cookie which is sent to the client. Unfortunately, some versions of Safari +("4 public beta" on Windows XP at least) appear to have a bug in their parsing +of the GMT expiration date--they appear to interpret the date as one hour in +the past. Sixty minutes minus one hour is pretty close to zero, so you may +experience this bug as a new session id for every request, unless the requests +are less than one second apart. To fix, try increasing the session.timeout. + +On the other extreme, some users report Firefox sending cookies after their +expiration date, although this was on a system with an inaccurate system time. +Maybe FF doesn't trust system time. +""" +import sys +import datetime +import os +import time +import threading +import types + +import cherrypy +from cherrypy._cpcompat import copyitems, pickle, random20, unicodestr +from cherrypy.lib import httputil +from cherrypy.lib import lockfile +from cherrypy.lib import locking +from cherrypy.lib import is_iterator + +missing = object() + + +class Session(object): + + """A CherryPy dict-like Session object (one per request).""" + + _id = None + + id_observers = None + "A list of callbacks to which to pass new id's." + + def _get_id(self): + return self._id + + def _set_id(self, value): + self._id = value + for o in self.id_observers: + o(value) + id = property(_get_id, _set_id, doc="The current session ID.") + + timeout = 60 + "Number of minutes after which to delete session data." + + locked = False + """ + If True, this session instance has exclusive read/write access + to session data.""" + + loaded = False + """ + If True, data has been retrieved from storage. This should happen + automatically on the first attempt to access session data.""" + + clean_thread = None + "Class-level Monitor which calls self.clean_up." + + clean_freq = 5 + "The poll rate for expired session cleanup in minutes." + + originalid = None + "The session id passed by the client. May be missing or unsafe." + + missing = False + "True if the session requested by the client did not exist." + + regenerated = False + """ + True if the application called session.regenerate(). This is not set by + internal calls to regenerate the session id.""" + + debug = False + "If True, log debug information." + + # --------------------- Session management methods --------------------- # + + def __init__(self, id=None, **kwargs): + self.id_observers = [] + self._data = {} + + for k, v in kwargs.items(): + setattr(self, k, v) + + self.originalid = id + self.missing = False + if id is None: + if self.debug: + cherrypy.log('No id given; making a new one', 'TOOLS.SESSIONS') + self._regenerate() + else: + self.id = id + if self._exists(): + if self.debug: + cherrypy.log('Set id to %s.' % id, 'TOOLS.SESSIONS') + else: + if self.debug: + cherrypy.log('Expired or malicious session %r; ' + 'making a new one' % id, 'TOOLS.SESSIONS') + # Expired or malicious session. Make a new one. + # See https://bitbucket.org/cherrypy/cherrypy/issue/709. + self.id = None + self.missing = True + self._regenerate() + + def now(self): + """Generate the session specific concept of 'now'. + + Other session providers can override this to use alternative, + possibly timezone aware, versions of 'now'. + """ + return datetime.datetime.now() + + def regenerate(self): + """Replace the current session (with a new id).""" + self.regenerated = True + self._regenerate() + + def _regenerate(self): + if self.id is not None: + if self.debug: + cherrypy.log( + 'Deleting the existing session %r before ' + 'regeneration.' % self.id, + 'TOOLS.SESSIONS') + self.delete() + + old_session_was_locked = self.locked + if old_session_was_locked: + self.release_lock() + if self.debug: + cherrypy.log('Old lock released.', 'TOOLS.SESSIONS') + + self.id = None + while self.id is None: + self.id = self.generate_id() + # Assert that the generated id is not already stored. + if self._exists(): + self.id = None + if self.debug: + cherrypy.log('Set id to generated %s.' % self.id, + 'TOOLS.SESSIONS') + + if old_session_was_locked: + self.acquire_lock() + if self.debug: + cherrypy.log('Regenerated lock acquired.', 'TOOLS.SESSIONS') + + def clean_up(self): + """Clean up expired sessions.""" + pass + + def generate_id(self): + """Return a new session id.""" + return random20() + + def save(self): + """Save session data.""" + try: + # If session data has never been loaded then it's never been + # accessed: no need to save it + if self.loaded: + t = datetime.timedelta(seconds=self.timeout * 60) + expiration_time = self.now() + t + if self.debug: + cherrypy.log('Saving session %r with expiry %s' % + (self.id, expiration_time), + 'TOOLS.SESSIONS') + self._save(expiration_time) + else: + if self.debug: + cherrypy.log( + 'Skipping save of session %r (no session loaded).' % + self.id, 'TOOLS.SESSIONS') + finally: + if self.locked: + # Always release the lock if the user didn't release it + self.release_lock() + if self.debug: + cherrypy.log('Lock released after save.', 'TOOLS.SESSIONS') + + def load(self): + """Copy stored session data into this session instance.""" + data = self._load() + # data is either None or a tuple (session_data, expiration_time) + if data is None or data[1] < self.now(): + if self.debug: + cherrypy.log('Expired session %r, flushing data.' % self.id, + 'TOOLS.SESSIONS') + self._data = {} + else: + if self.debug: + cherrypy.log('Data loaded for session %r.' % self.id, + 'TOOLS.SESSIONS') + self._data = data[0] + self.loaded = True + + # Stick the clean_thread in the class, not the instance. + # The instances are created and destroyed per-request. + cls = self.__class__ + if self.clean_freq and not cls.clean_thread: + # clean_up is an instancemethod and not a classmethod, + # so that tool config can be accessed inside the method. + t = cherrypy.process.plugins.Monitor( + cherrypy.engine, self.clean_up, self.clean_freq * 60, + name='Session cleanup') + t.subscribe() + cls.clean_thread = t + t.start() + if self.debug: + cherrypy.log('Started cleanup thread.', 'TOOLS.SESSIONS') + + def delete(self): + """Delete stored session data.""" + self._delete() + if self.debug: + cherrypy.log('Deleted session %s.' % self.id, + 'TOOLS.SESSIONS') + + # -------------------- Application accessor methods -------------------- # + + def __getitem__(self, key): + if not self.loaded: + self.load() + return self._data[key] + + def __setitem__(self, key, value): + if not self.loaded: + self.load() + self._data[key] = value + + def __delitem__(self, key): + if not self.loaded: + self.load() + del self._data[key] + + def pop(self, key, default=missing): + """Remove the specified key and return the corresponding value. + If key is not found, default is returned if given, + otherwise KeyError is raised. + """ + if not self.loaded: + self.load() + if default is missing: + return self._data.pop(key) + else: + return self._data.pop(key, default) + + def __contains__(self, key): + if not self.loaded: + self.load() + return key in self._data + + if hasattr({}, 'has_key'): + def has_key(self, key): + """D.has_key(k) -> True if D has a key k, else False.""" + if not self.loaded: + self.load() + return key in self._data + + def get(self, key, default=None): + """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" + if not self.loaded: + self.load() + return self._data.get(key, default) + + def update(self, d): + """D.update(E) -> None. Update D from E: for k in E: D[k] = E[k].""" + if not self.loaded: + self.load() + self._data.update(d) + + def setdefault(self, key, default=None): + """D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D.""" + if not self.loaded: + self.load() + return self._data.setdefault(key, default) + + def clear(self): + """D.clear() -> None. Remove all items from D.""" + if not self.loaded: + self.load() + self._data.clear() + + def keys(self): + """D.keys() -> list of D's keys.""" + if not self.loaded: + self.load() + return self._data.keys() + + def items(self): + """D.items() -> list of D's (key, value) pairs, as 2-tuples.""" + if not self.loaded: + self.load() + return self._data.items() + + def values(self): + """D.values() -> list of D's values.""" + if not self.loaded: + self.load() + return self._data.values() + + +class RamSession(Session): + + # Class-level objects. Don't rebind these! + cache = {} + locks = {} + + def clean_up(self): + """Clean up expired sessions.""" + + now = self.now() + for _id, (data, expiration_time) in copyitems(self.cache): + if expiration_time <= now: + try: + del self.cache[_id] + except KeyError: + pass + try: + if self.locks[_id].acquire(blocking=False): + lock = self.locks.pop(_id) + lock.release() + except KeyError: + pass + + # added to remove obsolete lock objects + for _id in list(self.locks): + if _id not in self.cache and self.locks[_id].acquire(blocking=False): + lock = self.locks.pop(_id) + lock.release() + + def _exists(self): + return self.id in self.cache + + def _load(self): + return self.cache.get(self.id) + + def _save(self, expiration_time): + self.cache[self.id] = (self._data, expiration_time) + + def _delete(self): + self.cache.pop(self.id, None) + + def acquire_lock(self): + """Acquire an exclusive lock on the currently-loaded session data.""" + self.locked = True + self.locks.setdefault(self.id, threading.RLock()).acquire() + + def release_lock(self): + """Release the lock on the currently-loaded session data.""" + self.locks[self.id].release() + self.locked = False + + def __len__(self): + """Return the number of active sessions.""" + return len(self.cache) + + +class FileSession(Session): + + """Implementation of the File backend for sessions + + storage_path + The folder where session data will be saved. Each session + will be saved as pickle.dump(data, expiration_time) in its own file; + the filename will be self.SESSION_PREFIX + self.id. + + lock_timeout + A timedelta or numeric seconds indicating how long + to block acquiring a lock. If None (default), acquiring a lock + will block indefinitely. + """ + + SESSION_PREFIX = 'session-' + LOCK_SUFFIX = '.lock' + pickle_protocol = pickle.HIGHEST_PROTOCOL + + def __init__(self, id=None, **kwargs): + # The 'storage_path' arg is required for file-based sessions. + kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) + kwargs.setdefault('lock_timeout', None) + + Session.__init__(self, id=id, **kwargs) + + # validate self.lock_timeout + if isinstance(self.lock_timeout, (int, float)): + self.lock_timeout = datetime.timedelta(seconds=self.lock_timeout) + if not isinstance(self.lock_timeout, (datetime.timedelta, type(None))): + raise ValueError("Lock timeout must be numeric seconds or " + "a timedelta instance.") + + def setup(cls, **kwargs): + """Set up the storage system for file-based sessions. + + This should only be called once per process; this will be done + automatically when using sessions.init (as the built-in Tool does). + """ + # The 'storage_path' arg is required for file-based sessions. + kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) + + for k, v in kwargs.items(): + setattr(cls, k, v) + setup = classmethod(setup) + + def _get_file_path(self): + f = os.path.join(self.storage_path, self.SESSION_PREFIX + self.id) + if not os.path.abspath(f).startswith(self.storage_path): + raise cherrypy.HTTPError(400, "Invalid session id in cookie.") + return f + + def _exists(self): + path = self._get_file_path() + return os.path.exists(path) + + def _load(self, path=None): + assert self.locked, ("The session load without being locked. " + "Check your tools' priority levels.") + if path is None: + path = self._get_file_path() + try: + f = open(path, "rb") + try: + return pickle.load(f) + finally: + f.close() + except (IOError, EOFError): + e = sys.exc_info()[1] + if self.debug: + cherrypy.log("Error loading the session pickle: %s" % + e, 'TOOLS.SESSIONS') + return None + + def _save(self, expiration_time): + assert self.locked, ("The session was saved without being locked. " + "Check your tools' priority levels.") + f = open(self._get_file_path(), "wb") + try: + pickle.dump((self._data, expiration_time), f, self.pickle_protocol) + finally: + f.close() + + def _delete(self): + assert self.locked, ("The session deletion without being locked. " + "Check your tools' priority levels.") + try: + os.unlink(self._get_file_path()) + except OSError: + pass + + def acquire_lock(self, path=None): + """Acquire an exclusive lock on the currently-loaded session data.""" + if path is None: + path = self._get_file_path() + path += self.LOCK_SUFFIX + checker = locking.LockChecker(self.id, self.lock_timeout) + while not checker.expired(): + try: + self.lock = lockfile.LockFile(path) + except lockfile.LockError: + time.sleep(0.1) + else: + break + self.locked = True + if self.debug: + cherrypy.log('Lock acquired.', 'TOOLS.SESSIONS') + + def release_lock(self, path=None): + """Release the lock on the currently-loaded session data.""" + self.lock.release() + self.lock.remove() + self.locked = False + + def clean_up(self): + """Clean up expired sessions.""" + now = self.now() + # Iterate over all session files in self.storage_path + for fname in os.listdir(self.storage_path): + if (fname.startswith(self.SESSION_PREFIX) + and not fname.endswith(self.LOCK_SUFFIX)): + # We have a session file: lock and load it and check + # if it's expired. If it fails, nevermind. + path = os.path.join(self.storage_path, fname) + self.acquire_lock(path) + if self.debug: + # This is a bit of a hack, since we're calling clean_up + # on the first instance rather than the entire class, + # so depending on whether you have "debug" set on the + # path of the first session called, this may not run. + cherrypy.log('Cleanup lock acquired.', 'TOOLS.SESSIONS') + + try: + contents = self._load(path) + # _load returns None on IOError + if contents is not None: + data, expiration_time = contents + if expiration_time < now: + # Session expired: deleting it + os.unlink(path) + finally: + self.release_lock(path) + + def __len__(self): + """Return the number of active sessions.""" + return len([fname for fname in os.listdir(self.storage_path) + if (fname.startswith(self.SESSION_PREFIX) + and not fname.endswith(self.LOCK_SUFFIX))]) + + +class PostgresqlSession(Session): + + """ Implementation of the PostgreSQL backend for sessions. It assumes + a table like this:: + + create table session ( + id varchar(40), + data text, + expiration_time timestamp + ) + + You must provide your own get_db function. + """ + + pickle_protocol = pickle.HIGHEST_PROTOCOL + + def __init__(self, id=None, **kwargs): + Session.__init__(self, id, **kwargs) + self.cursor = self.db.cursor() + + def setup(cls, **kwargs): + """Set up the storage system for Postgres-based sessions. + + This should only be called once per process; this will be done + automatically when using sessions.init (as the built-in Tool does). + """ + for k, v in kwargs.items(): + setattr(cls, k, v) + + self.db = self.get_db() + setup = classmethod(setup) + + def __del__(self): + if self.cursor: + self.cursor.close() + self.db.commit() + + def _exists(self): + # Select session data from table + self.cursor.execute('select data, expiration_time from session ' + 'where id=%s', (self.id,)) + rows = self.cursor.fetchall() + return bool(rows) + + def _load(self): + # Select session data from table + self.cursor.execute('select data, expiration_time from session ' + 'where id=%s', (self.id,)) + rows = self.cursor.fetchall() + if not rows: + return None + + pickled_data, expiration_time = rows[0] + data = pickle.loads(pickled_data) + return data, expiration_time + + def _save(self, expiration_time): + pickled_data = pickle.dumps(self._data, self.pickle_protocol) + self.cursor.execute('update session set data = %s, ' + 'expiration_time = %s where id = %s', + (pickled_data, expiration_time, self.id)) + + def _delete(self): + self.cursor.execute('delete from session where id=%s', (self.id,)) + + def acquire_lock(self): + """Acquire an exclusive lock on the currently-loaded session data.""" + # We use the "for update" clause to lock the row + self.locked = True + self.cursor.execute('select id from session where id=%s for update', + (self.id,)) + if self.debug: + cherrypy.log('Lock acquired.', 'TOOLS.SESSIONS') + + def release_lock(self): + """Release the lock on the currently-loaded session data.""" + # We just close the cursor and that will remove the lock + # introduced by the "for update" clause + self.cursor.close() + self.locked = False + + def clean_up(self): + """Clean up expired sessions.""" + self.cursor.execute('delete from session where expiration_time < %s', + (self.now(),)) + + +class MemcachedSession(Session): + + # The most popular memcached client for Python isn't thread-safe. + # Wrap all .get and .set operations in a single lock. + mc_lock = threading.RLock() + + # This is a seperate set of locks per session id. + locks = {} + + servers = ['127.0.0.1:11211'] + + def setup(cls, **kwargs): + """Set up the storage system for memcached-based sessions. + + This should only be called once per process; this will be done + automatically when using sessions.init (as the built-in Tool does). + """ + for k, v in kwargs.items(): + setattr(cls, k, v) + + import memcache + cls.cache = memcache.Client(cls.servers) + setup = classmethod(setup) + + def _get_id(self): + return self._id + + def _set_id(self, value): + # This encode() call is where we differ from the superclass. + # Memcache keys MUST be byte strings, not unicode. + if isinstance(value, unicodestr): + value = value.encode('utf-8') + + self._id = value + for o in self.id_observers: + o(value) + id = property(_get_id, _set_id, doc="The current session ID.") + + def _exists(self): + self.mc_lock.acquire() + try: + return bool(self.cache.get(self.id)) + finally: + self.mc_lock.release() + + def _load(self): + self.mc_lock.acquire() + try: + return self.cache.get(self.id) + finally: + self.mc_lock.release() + + def _save(self, expiration_time): + # Send the expiration time as "Unix time" (seconds since 1/1/1970) + td = int(time.mktime(expiration_time.timetuple())) + self.mc_lock.acquire() + try: + if not self.cache.set(self.id, (self._data, expiration_time), td): + raise AssertionError( + "Session data for id %r not set." % self.id) + finally: + self.mc_lock.release() + + def _delete(self): + self.cache.delete(self.id) + + def acquire_lock(self): + """Acquire an exclusive lock on the currently-loaded session data.""" + self.locked = True + self.locks.setdefault(self.id, threading.RLock()).acquire() + if self.debug: + cherrypy.log('Lock acquired.', 'TOOLS.SESSIONS') + + def release_lock(self): + """Release the lock on the currently-loaded session data.""" + self.locks[self.id].release() + self.locked = False + + def __len__(self): + """Return the number of active sessions.""" + raise NotImplementedError + + +# Hook functions (for CherryPy tools) + +def save(): + """Save any changed session data.""" + + if not hasattr(cherrypy.serving, "session"): + return + request = cherrypy.serving.request + response = cherrypy.serving.response + + # Guard against running twice + if hasattr(request, "_sessionsaved"): + return + request._sessionsaved = True + + if response.stream: + # If the body is being streamed, we have to save the data + # *after* the response has been written out + request.hooks.attach('on_end_request', cherrypy.session.save) + else: + # If the body is not being streamed, we save the data now + # (so we can release the lock). + if is_iterator(response.body): + response.collapse_body() + cherrypy.session.save() +save.failsafe = True + + +def close(): + """Close the session object for this request.""" + sess = getattr(cherrypy.serving, "session", None) + if getattr(sess, "locked", False): + # If the session is still locked we release the lock + sess.release_lock() + if sess.debug: + cherrypy.log('Lock released on close.', 'TOOLS.SESSIONS') +close.failsafe = True +close.priority = 90 + + +def init(storage_type='ram', path=None, path_header=None, name='session_id', + timeout=60, domain=None, secure=False, clean_freq=5, + persistent=True, httponly=False, debug=False, **kwargs): + """Initialize session object (using cookies). + + storage_type + One of 'ram', 'file', 'postgresql', 'memcached'. This will be + used to look up the corresponding class in cherrypy.lib.sessions + globals. For example, 'file' will use the FileSession class. + + path + The 'path' value to stick in the response cookie metadata. + + path_header + If 'path' is None (the default), then the response + cookie 'path' will be pulled from request.headers[path_header]. + + name + The name of the cookie. + + timeout + The expiration timeout (in minutes) for the stored session data. + If 'persistent' is True (the default), this is also the timeout + for the cookie. + + domain + The cookie domain. + + secure + If False (the default) the cookie 'secure' value will not + be set. If True, the cookie 'secure' value will be set (to 1). + + clean_freq (minutes) + The poll rate for expired session cleanup. + + persistent + If True (the default), the 'timeout' argument will be used + to expire the cookie. If False, the cookie will not have an expiry, + and the cookie will be a "session cookie" which expires when the + browser is closed. + + httponly + If False (the default) the cookie 'httponly' value will not be set. + If True, the cookie 'httponly' value will be set (to 1). + + Any additional kwargs will be bound to the new Session instance, + and may be specific to the storage type. See the subclass of Session + you're using for more information. + """ + + request = cherrypy.serving.request + + # Guard against running twice + if hasattr(request, "_session_init_flag"): + return + request._session_init_flag = True + + # Check if request came with a session ID + id = None + if name in request.cookie: + id = request.cookie[name].value + if debug: + cherrypy.log('ID obtained from request.cookie: %r' % id, + 'TOOLS.SESSIONS') + + # Find the storage class and call setup (first time only). + storage_class = storage_type.title() + 'Session' + storage_class = globals()[storage_class] + if not hasattr(cherrypy, "session"): + if hasattr(storage_class, "setup"): + storage_class.setup(**kwargs) + + # Create and attach a new Session instance to cherrypy.serving. + # It will possess a reference to (and lock, and lazily load) + # the requested session data. + kwargs['timeout'] = timeout + kwargs['clean_freq'] = clean_freq + cherrypy.serving.session = sess = storage_class(id, **kwargs) + sess.debug = debug + + def update_cookie(id): + """Update the cookie every time the session id changes.""" + cherrypy.serving.response.cookie[name] = id + sess.id_observers.append(update_cookie) + + # Create cherrypy.session which will proxy to cherrypy.serving.session + if not hasattr(cherrypy, "session"): + cherrypy.session = cherrypy._ThreadLocalProxy('session') + + if persistent: + cookie_timeout = timeout + else: + # See http://support.microsoft.com/kb/223799/EN-US/ + # and http://support.mozilla.com/en-US/kb/Cookies + cookie_timeout = None + set_response_cookie(path=path, path_header=path_header, name=name, + timeout=cookie_timeout, domain=domain, secure=secure, + httponly=httponly) + + +def set_response_cookie(path=None, path_header=None, name='session_id', + timeout=60, domain=None, secure=False, httponly=False): + """Set a response cookie for the client. + + path + the 'path' value to stick in the response cookie metadata. + + path_header + if 'path' is None (the default), then the response + cookie 'path' will be pulled from request.headers[path_header]. + + name + the name of the cookie. + + timeout + the expiration timeout for the cookie. If 0 or other boolean + False, no 'expires' param will be set, and the cookie will be a + "session cookie" which expires when the browser is closed. + + domain + the cookie domain. + + secure + if False (the default) the cookie 'secure' value will not + be set. If True, the cookie 'secure' value will be set (to 1). + + httponly + If False (the default) the cookie 'httponly' value will not be set. + If True, the cookie 'httponly' value will be set (to 1). + + """ + # Set response cookie + cookie = cherrypy.serving.response.cookie + cookie[name] = cherrypy.serving.session.id + cookie[name]['path'] = ( + path or + cherrypy.serving.request.headers.get(path_header) or + '/' + ) + + # We'd like to use the "max-age" param as indicated in + # http://www.faqs.org/rfcs/rfc2109.html but IE doesn't + # save it to disk and the session is lost if people close + # the browser. So we have to use the old "expires" ... sigh ... +## cookie[name]['max-age'] = timeout * 60 + if timeout: + e = time.time() + (timeout * 60) + cookie[name]['expires'] = httputil.HTTPDate(e) + if domain is not None: + cookie[name]['domain'] = domain + if secure: + cookie[name]['secure'] = 1 + if httponly: + if not cookie[name].isReservedKey('httponly'): + raise ValueError("The httponly cookie token is not supported.") + cookie[name]['httponly'] = 1 + + +def expire(): + """Expire the current session cookie.""" + name = cherrypy.serving.request.config.get( + 'tools.sessions.name', 'session_id') + one_year = 60 * 60 * 24 * 365 + e = time.time() - one_year + cherrypy.serving.response.cookie[name]['expires'] = httputil.HTTPDate(e) diff --git a/lib/cherrypy/lib/static.py b/lib/cherrypy/lib/static.py new file mode 100644 index 00000000..a630dae6 --- /dev/null +++ b/lib/cherrypy/lib/static.py @@ -0,0 +1,378 @@ +import os +import re +import stat +import mimetypes + +try: + from io import UnsupportedOperation +except ImportError: + UnsupportedOperation = object() + +import cherrypy +from cherrypy._cpcompat import ntob, unquote +from cherrypy.lib import cptools, httputil, file_generator_limited + + +mimetypes.init() +mimetypes.types_map['.dwg'] = 'image/x-dwg' +mimetypes.types_map['.ico'] = 'image/x-icon' +mimetypes.types_map['.bz2'] = 'application/x-bzip2' +mimetypes.types_map['.gz'] = 'application/x-gzip' + + +def serve_file(path, content_type=None, disposition=None, name=None, + debug=False): + """Set status, headers, and body in order to serve the given path. + + The Content-Type header will be set to the content_type arg, if provided. + If not provided, the Content-Type will be guessed by the file extension + of the 'path' argument. + + If disposition is not None, the Content-Disposition header will be set + to "; filename=". If name is None, it will be set + to the basename of path. If disposition is None, no Content-Disposition + header will be written. + """ + + response = cherrypy.serving.response + + # If path is relative, users should fix it by making path absolute. + # That is, CherryPy should not guess where the application root is. + # It certainly should *not* use cwd (since CP may be invoked from a + # variety of paths). If using tools.staticdir, you can make your relative + # paths become absolute by supplying a value for "tools.staticdir.root". + if not os.path.isabs(path): + msg = "'%s' is not an absolute path." % path + if debug: + cherrypy.log(msg, 'TOOLS.STATICFILE') + raise ValueError(msg) + + try: + st = os.stat(path) + except OSError: + if debug: + cherrypy.log('os.stat(%r) failed' % path, 'TOOLS.STATIC') + raise cherrypy.NotFound() + + # Check if path is a directory. + if stat.S_ISDIR(st.st_mode): + # Let the caller deal with it as they like. + if debug: + cherrypy.log('%r is a directory' % path, 'TOOLS.STATIC') + raise cherrypy.NotFound() + + # Set the Last-Modified response header, so that + # modified-since validation code can work. + response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime) + cptools.validate_since() + + if content_type is None: + # Set content-type based on filename extension + ext = "" + i = path.rfind('.') + if i != -1: + ext = path[i:].lower() + content_type = mimetypes.types_map.get(ext, None) + if content_type is not None: + response.headers['Content-Type'] = content_type + if debug: + cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC') + + cd = None + if disposition is not None: + if name is None: + name = os.path.basename(path) + cd = '%s; filename="%s"' % (disposition, name) + response.headers["Content-Disposition"] = cd + if debug: + cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') + + # Set Content-Length and use an iterable (file object) + # this way CP won't load the whole file in memory + content_length = st.st_size + fileobj = open(path, 'rb') + return _serve_fileobj(fileobj, content_type, content_length, debug=debug) + + +def serve_fileobj(fileobj, content_type=None, disposition=None, name=None, + debug=False): + """Set status, headers, and body in order to serve the given file object. + + The Content-Type header will be set to the content_type arg, if provided. + + If disposition is not None, the Content-Disposition header will be set + to "; filename=". If name is None, 'filename' will + not be set. If disposition is None, no Content-Disposition header will + be written. + + CAUTION: If the request contains a 'Range' header, one or more seek()s will + be performed on the file object. This may cause undesired behavior if + the file object is not seekable. It could also produce undesired results + if the caller set the read position of the file object prior to calling + serve_fileobj(), expecting that the data would be served starting from that + position. + """ + + response = cherrypy.serving.response + + try: + st = os.fstat(fileobj.fileno()) + except AttributeError: + if debug: + cherrypy.log('os has no fstat attribute', 'TOOLS.STATIC') + content_length = None + except UnsupportedOperation: + content_length = None + else: + # Set the Last-Modified response header, so that + # modified-since validation code can work. + response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime) + cptools.validate_since() + content_length = st.st_size + + if content_type is not None: + response.headers['Content-Type'] = content_type + if debug: + cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC') + + cd = None + if disposition is not None: + if name is None: + cd = disposition + else: + cd = '%s; filename="%s"' % (disposition, name) + response.headers["Content-Disposition"] = cd + if debug: + cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') + + return _serve_fileobj(fileobj, content_type, content_length, debug=debug) + + +def _serve_fileobj(fileobj, content_type, content_length, debug=False): + """Internal. Set response.body to the given file object, perhaps ranged.""" + response = cherrypy.serving.response + + # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code + request = cherrypy.serving.request + if request.protocol >= (1, 1): + response.headers["Accept-Ranges"] = "bytes" + r = httputil.get_ranges(request.headers.get('Range'), content_length) + if r == []: + response.headers['Content-Range'] = "bytes */%s" % content_length + message = ("Invalid Range (first-byte-pos greater than " + "Content-Length)") + if debug: + cherrypy.log(message, 'TOOLS.STATIC') + raise cherrypy.HTTPError(416, message) + + if r: + if len(r) == 1: + # Return a single-part response. + start, stop = r[0] + if stop > content_length: + stop = content_length + r_len = stop - start + if debug: + cherrypy.log( + 'Single part; start: %r, stop: %r' % (start, stop), + 'TOOLS.STATIC') + response.status = "206 Partial Content" + response.headers['Content-Range'] = ( + "bytes %s-%s/%s" % (start, stop - 1, content_length)) + response.headers['Content-Length'] = r_len + fileobj.seek(start) + response.body = file_generator_limited(fileobj, r_len) + else: + # Return a multipart/byteranges response. + response.status = "206 Partial Content" + try: + # Python 3 + from email.generator import _make_boundary as make_boundary + except ImportError: + # Python 2 + from mimetools import choose_boundary as make_boundary + boundary = make_boundary() + ct = "multipart/byteranges; boundary=%s" % boundary + response.headers['Content-Type'] = ct + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + + def file_ranges(): + # Apache compatibility: + yield ntob("\r\n") + + for start, stop in r: + if debug: + cherrypy.log( + 'Multipart; start: %r, stop: %r' % ( + start, stop), + 'TOOLS.STATIC') + yield ntob("--" + boundary, 'ascii') + yield ntob("\r\nContent-type: %s" % content_type, + 'ascii') + yield ntob( + "\r\nContent-range: bytes %s-%s/%s\r\n\r\n" % ( + start, stop - 1, content_length), + 'ascii') + fileobj.seek(start) + gen = file_generator_limited(fileobj, stop - start) + for chunk in gen: + yield chunk + yield ntob("\r\n") + # Final boundary + yield ntob("--" + boundary + "--", 'ascii') + + # Apache compatibility: + yield ntob("\r\n") + response.body = file_ranges() + return response.body + else: + if debug: + cherrypy.log('No byteranges requested', 'TOOLS.STATIC') + + # Set Content-Length and use an iterable (file object) + # this way CP won't load the whole file in memory + response.headers['Content-Length'] = content_length + response.body = fileobj + return response.body + + +def serve_download(path, name=None): + """Serve 'path' as an application/x-download attachment.""" + # This is such a common idiom I felt it deserved its own wrapper. + return serve_file(path, "application/x-download", "attachment", name) + + +def _attempt(filename, content_types, debug=False): + if debug: + cherrypy.log('Attempting %r (content_types %r)' % + (filename, content_types), 'TOOLS.STATICDIR') + try: + # you can set the content types for a + # complete directory per extension + content_type = None + if content_types: + r, ext = os.path.splitext(filename) + content_type = content_types.get(ext[1:], None) + serve_file(filename, content_type=content_type, debug=debug) + return True + except cherrypy.NotFound: + # If we didn't find the static file, continue handling the + # request. We might find a dynamic handler instead. + if debug: + cherrypy.log('NotFound', 'TOOLS.STATICFILE') + return False + + +def staticdir(section, dir, root="", match="", content_types=None, index="", + debug=False): + """Serve a static resource from the given (root +) dir. + + match + If given, request.path_info will be searched for the given + regular expression before attempting to serve static content. + + content_types + If given, it should be a Python dictionary of + {file-extension: content-type} pairs, where 'file-extension' is + a string (e.g. "gif") and 'content-type' is the value to write + out in the Content-Type response header (e.g. "image/gif"). + + index + If provided, it should be the (relative) name of a file to + serve for directory requests. For example, if the dir argument is + '/home/me', the Request-URI is 'myapp', and the index arg is + 'index.html', the file '/home/me/myapp/index.html' will be sought. + """ + request = cherrypy.serving.request + if request.method not in ('GET', 'HEAD'): + if debug: + cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICDIR') + return False + + if match and not re.search(match, request.path_info): + if debug: + cherrypy.log('request.path_info %r does not match pattern %r' % + (request.path_info, match), 'TOOLS.STATICDIR') + return False + + # Allow the use of '~' to refer to a user's home directory. + dir = os.path.expanduser(dir) + + # If dir is relative, make absolute using "root". + if not os.path.isabs(dir): + if not root: + msg = "Static dir requires an absolute dir (or root)." + if debug: + cherrypy.log(msg, 'TOOLS.STATICDIR') + raise ValueError(msg) + dir = os.path.join(root, dir) + + # Determine where we are in the object tree relative to 'section' + # (where the static tool was defined). + if section == 'global': + section = "/" + section = section.rstrip(r"\/") + branch = request.path_info[len(section) + 1:] + branch = unquote(branch.lstrip(r"\/")) + + # If branch is "", filename will end in a slash + filename = os.path.join(dir, branch) + if debug: + cherrypy.log('Checking file %r to fulfill %r' % + (filename, request.path_info), 'TOOLS.STATICDIR') + + # There's a chance that the branch pulled from the URL might + # have ".." or similar uplevel attacks in it. Check that the final + # filename is a child of dir. + if not os.path.normpath(filename).startswith(os.path.normpath(dir)): + raise cherrypy.HTTPError(403) # Forbidden + + handled = _attempt(filename, content_types) + if not handled: + # Check for an index file if a folder was requested. + if index: + handled = _attempt(os.path.join(filename, index), content_types) + if handled: + request.is_index = filename[-1] in (r"\/") + return handled + + +def staticfile(filename, root=None, match="", content_types=None, debug=False): + """Serve a static resource from the given (root +) filename. + + match + If given, request.path_info will be searched for the given + regular expression before attempting to serve static content. + + content_types + If given, it should be a Python dictionary of + {file-extension: content-type} pairs, where 'file-extension' is + a string (e.g. "gif") and 'content-type' is the value to write + out in the Content-Type response header (e.g. "image/gif"). + + """ + request = cherrypy.serving.request + if request.method not in ('GET', 'HEAD'): + if debug: + cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICFILE') + return False + + if match and not re.search(match, request.path_info): + if debug: + cherrypy.log('request.path_info %r does not match pattern %r' % + (request.path_info, match), 'TOOLS.STATICFILE') + return False + + # If filename is relative, make absolute using "root". + if not os.path.isabs(filename): + if not root: + msg = "Static tool requires an absolute filename (got '%s')." % ( + filename,) + if debug: + cherrypy.log(msg, 'TOOLS.STATICFILE') + raise ValueError(msg) + filename = os.path.join(root, filename) + + return _attempt(filename, content_types, debug=debug) diff --git a/lib/cherrypy/lib/xmlrpcutil.py b/lib/cherrypy/lib/xmlrpcutil.py new file mode 100644 index 00000000..9fc9564f --- /dev/null +++ b/lib/cherrypy/lib/xmlrpcutil.py @@ -0,0 +1,57 @@ +import sys + +import cherrypy +from cherrypy._cpcompat import ntob + + +def get_xmlrpclib(): + try: + import xmlrpc.client as x + except ImportError: + import xmlrpclib as x + return x + + +def process_body(): + """Return (params, method) from request body.""" + try: + return get_xmlrpclib().loads(cherrypy.request.body.read()) + except Exception: + return ('ERROR PARAMS', ), 'ERRORMETHOD' + + +def patched_path(path): + """Return 'path', doctored for RPC.""" + if not path.endswith('/'): + path += '/' + if path.startswith('/RPC2/'): + # strip the first /rpc2 + path = path[5:] + return path + + +def _set_response(body): + # The XML-RPC spec (http://www.xmlrpc.com/spec) says: + # "Unless there's a lower-level error, always return 200 OK." + # Since Python's xmlrpclib interprets a non-200 response + # as a "Protocol Error", we'll just return 200 every time. + response = cherrypy.response + response.status = '200 OK' + response.body = ntob(body, 'utf-8') + response.headers['Content-Type'] = 'text/xml' + response.headers['Content-Length'] = len(body) + + +def respond(body, encoding='utf-8', allow_none=0): + xmlrpclib = get_xmlrpclib() + if not isinstance(body, xmlrpclib.Fault): + body = (body,) + _set_response(xmlrpclib.dumps(body, methodresponse=1, + encoding=encoding, + allow_none=allow_none)) + + +def on_error(*args, **kwargs): + body = str(sys.exc_info()[1]) + xmlrpclib = get_xmlrpclib() + _set_response(xmlrpclib.dumps(xmlrpclib.Fault(1, body))) diff --git a/lib/cherrypy/process/__init__.py b/lib/cherrypy/process/__init__.py new file mode 100644 index 00000000..f15b1237 --- /dev/null +++ b/lib/cherrypy/process/__init__.py @@ -0,0 +1,14 @@ +"""Site container for an HTTP server. + +A Web Site Process Bus object is used to connect applications, servers, +and frameworks with site-wide services such as daemonization, process +reload, signal handling, drop privileges, PID file management, logging +for all of these, and many more. + +The 'plugins' module defines a few abstract and concrete services for +use with the bus. Some use tool-specific channels; see the documentation +for each class. +""" + +from cherrypy.process.wspbus import bus +from cherrypy.process import plugins, servers diff --git a/lib/cherrypy/process/plugins.py b/lib/cherrypy/process/plugins.py new file mode 100644 index 00000000..c787ba92 --- /dev/null +++ b/lib/cherrypy/process/plugins.py @@ -0,0 +1,717 @@ +"""Site services for use with a Web Site Process Bus.""" + +import os +import re +import signal as _signal +import sys +import time +import threading + +from cherrypy._cpcompat import basestring, get_daemon, get_thread_ident +from cherrypy._cpcompat import ntob, set, Timer, SetDaemonProperty + +# _module__file__base is used by Autoreload to make +# absolute any filenames retrieved from sys.modules which are not +# already absolute paths. This is to work around Python's quirk +# of importing the startup script and using a relative filename +# for it in sys.modules. +# +# Autoreload examines sys.modules afresh every time it runs. If an application +# changes the current directory by executing os.chdir(), then the next time +# Autoreload runs, it will not be able to find any filenames which are +# not absolute paths, because the current directory is not the same as when the +# module was first imported. Autoreload will then wrongly conclude the file +# has "changed", and initiate the shutdown/re-exec sequence. +# See ticket #917. +# For this workaround to have a decent probability of success, this module +# needs to be imported as early as possible, before the app has much chance +# to change the working directory. +_module__file__base = os.getcwd() + + +class SimplePlugin(object): + + """Plugin base class which auto-subscribes methods for known channels.""" + + bus = None + """A :class:`Bus `, usually cherrypy.engine. + """ + + def __init__(self, bus): + self.bus = bus + + def subscribe(self): + """Register this object as a (multi-channel) listener on the bus.""" + for channel in self.bus.listeners: + # Subscribe self.start, self.exit, etc. if present. + method = getattr(self, channel, None) + if method is not None: + self.bus.subscribe(channel, method) + + def unsubscribe(self): + """Unregister this object as a listener on the bus.""" + for channel in self.bus.listeners: + # Unsubscribe self.start, self.exit, etc. if present. + method = getattr(self, channel, None) + if method is not None: + self.bus.unsubscribe(channel, method) + + +class SignalHandler(object): + + """Register bus channels (and listeners) for system signals. + + You can modify what signals your application listens for, and what it does + when it receives signals, by modifying :attr:`SignalHandler.handlers`, + a dict of {signal name: callback} pairs. The default set is:: + + handlers = {'SIGTERM': self.bus.exit, + 'SIGHUP': self.handle_SIGHUP, + 'SIGUSR1': self.bus.graceful, + } + + The :func:`SignalHandler.handle_SIGHUP`` method calls + :func:`bus.restart()` + if the process is daemonized, but + :func:`bus.exit()` + if the process is attached to a TTY. This is because Unix window + managers tend to send SIGHUP to terminal windows when the user closes them. + + Feel free to add signals which are not available on every platform. + The :class:`SignalHandler` will ignore errors raised from attempting + to register handlers for unknown signals. + """ + + handlers = {} + """A map from signal names (e.g. 'SIGTERM') to handlers (e.g. bus.exit).""" + + signals = {} + """A map from signal numbers to names.""" + + for k, v in vars(_signal).items(): + if k.startswith('SIG') and not k.startswith('SIG_'): + signals[v] = k + del k, v + + def __init__(self, bus): + self.bus = bus + # Set default handlers + self.handlers = {'SIGTERM': self.bus.exit, + 'SIGHUP': self.handle_SIGHUP, + 'SIGUSR1': self.bus.graceful, + } + + if sys.platform[:4] == 'java': + del self.handlers['SIGUSR1'] + self.handlers['SIGUSR2'] = self.bus.graceful + self.bus.log("SIGUSR1 cannot be set on the JVM platform. " + "Using SIGUSR2 instead.") + self.handlers['SIGINT'] = self._jython_SIGINT_handler + + self._previous_handlers = {} + + def _jython_SIGINT_handler(self, signum=None, frame=None): + # See http://bugs.jython.org/issue1313 + self.bus.log('Keyboard Interrupt: shutting down bus') + self.bus.exit() + + def subscribe(self): + """Subscribe self.handlers to signals.""" + for sig, func in self.handlers.items(): + try: + self.set_handler(sig, func) + except ValueError: + pass + + def unsubscribe(self): + """Unsubscribe self.handlers from signals.""" + for signum, handler in self._previous_handlers.items(): + signame = self.signals[signum] + + if handler is None: + self.bus.log("Restoring %s handler to SIG_DFL." % signame) + handler = _signal.SIG_DFL + else: + self.bus.log("Restoring %s handler %r." % (signame, handler)) + + try: + our_handler = _signal.signal(signum, handler) + if our_handler is None: + self.bus.log("Restored old %s handler %r, but our " + "handler was not registered." % + (signame, handler), level=30) + except ValueError: + self.bus.log("Unable to restore %s handler %r." % + (signame, handler), level=40, traceback=True) + + def set_handler(self, signal, listener=None): + """Subscribe a handler for the given signal (number or name). + + If the optional 'listener' argument is provided, it will be + subscribed as a listener for the given signal's channel. + + If the given signal name or number is not available on the current + platform, ValueError is raised. + """ + if isinstance(signal, basestring): + signum = getattr(_signal, signal, None) + if signum is None: + raise ValueError("No such signal: %r" % signal) + signame = signal + else: + try: + signame = self.signals[signal] + except KeyError: + raise ValueError("No such signal: %r" % signal) + signum = signal + + prev = _signal.signal(signum, self._handle_signal) + self._previous_handlers[signum] = prev + + if listener is not None: + self.bus.log("Listening for %s." % signame) + self.bus.subscribe(signame, listener) + + def _handle_signal(self, signum=None, frame=None): + """Python signal handler (self.set_handler subscribes it for you).""" + signame = self.signals[signum] + self.bus.log("Caught signal %s." % signame) + self.bus.publish(signame) + + def handle_SIGHUP(self): + """Restart if daemonized, else exit.""" + if os.isatty(sys.stdin.fileno()): + # not daemonized (may be foreground or background) + self.bus.log("SIGHUP caught but not daemonized. Exiting.") + self.bus.exit() + else: + self.bus.log("SIGHUP caught while daemonized. Restarting.") + self.bus.restart() + + +try: + import pwd + import grp +except ImportError: + pwd, grp = None, None + + +class DropPrivileges(SimplePlugin): + + """Drop privileges. uid/gid arguments not available on Windows. + + Special thanks to `Gavin Baker `_ + """ + + def __init__(self, bus, umask=None, uid=None, gid=None): + SimplePlugin.__init__(self, bus) + self.finalized = False + self.uid = uid + self.gid = gid + self.umask = umask + + def _get_uid(self): + return self._uid + + def _set_uid(self, val): + if val is not None: + if pwd is None: + self.bus.log("pwd module not available; ignoring uid.", + level=30) + val = None + elif isinstance(val, basestring): + val = pwd.getpwnam(val)[2] + self._uid = val + uid = property(_get_uid, _set_uid, + doc="The uid under which to run. Availability: Unix.") + + def _get_gid(self): + return self._gid + + def _set_gid(self, val): + if val is not None: + if grp is None: + self.bus.log("grp module not available; ignoring gid.", + level=30) + val = None + elif isinstance(val, basestring): + val = grp.getgrnam(val)[2] + self._gid = val + gid = property(_get_gid, _set_gid, + doc="The gid under which to run. Availability: Unix.") + + def _get_umask(self): + return self._umask + + def _set_umask(self, val): + if val is not None: + try: + os.umask + except AttributeError: + self.bus.log("umask function not available; ignoring umask.", + level=30) + val = None + self._umask = val + umask = property( + _get_umask, + _set_umask, + doc="""The default permission mode for newly created files and + directories. + + Usually expressed in octal format, for example, ``0644``. + Availability: Unix, Windows. + """) + + def start(self): + # uid/gid + def current_ids(): + """Return the current (uid, gid) if available.""" + name, group = None, None + if pwd: + name = pwd.getpwuid(os.getuid())[0] + if grp: + group = grp.getgrgid(os.getgid())[0] + return name, group + + if self.finalized: + if not (self.uid is None and self.gid is None): + self.bus.log('Already running as uid: %r gid: %r' % + current_ids()) + else: + if self.uid is None and self.gid is None: + if pwd or grp: + self.bus.log('uid/gid not set', level=30) + else: + self.bus.log('Started as uid: %r gid: %r' % current_ids()) + if self.gid is not None: + os.setgid(self.gid) + os.setgroups([]) + if self.uid is not None: + os.setuid(self.uid) + self.bus.log('Running as uid: %r gid: %r' % current_ids()) + + # umask + if self.finalized: + if self.umask is not None: + self.bus.log('umask already set to: %03o' % self.umask) + else: + if self.umask is None: + self.bus.log('umask not set', level=30) + else: + old_umask = os.umask(self.umask) + self.bus.log('umask old: %03o, new: %03o' % + (old_umask, self.umask)) + + self.finalized = True + # This is slightly higher than the priority for server.start + # in order to facilitate the most common use: starting on a low + # port (which requires root) and then dropping to another user. + start.priority = 77 + + +class Daemonizer(SimplePlugin): + + """Daemonize the running script. + + Use this with a Web Site Process Bus via:: + + Daemonizer(bus).subscribe() + + When this component finishes, the process is completely decoupled from + the parent environment. Please note that when this component is used, + the return code from the parent process will still be 0 if a startup + error occurs in the forked children. Errors in the initial daemonizing + process still return proper exit codes. Therefore, if you use this + plugin to daemonize, don't use the return code as an accurate indicator + of whether the process fully started. In fact, that return code only + indicates if the process succesfully finished the first fork. + """ + + def __init__(self, bus, stdin='/dev/null', stdout='/dev/null', + stderr='/dev/null'): + SimplePlugin.__init__(self, bus) + self.stdin = stdin + self.stdout = stdout + self.stderr = stderr + self.finalized = False + + def start(self): + if self.finalized: + self.bus.log('Already deamonized.') + + # forking has issues with threads: + # http://www.opengroup.org/onlinepubs/000095399/functions/fork.html + # "The general problem with making fork() work in a multi-threaded + # world is what to do with all of the threads..." + # So we check for active threads: + if threading.activeCount() != 1: + self.bus.log('There are %r active threads. ' + 'Daemonizing now may cause strange failures.' % + threading.enumerate(), level=30) + + # See http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16 + # (or http://www.faqs.org/faqs/unix-faq/programmer/faq/ section 1.7) + # and http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66012 + + # Finish up with the current stdout/stderr + sys.stdout.flush() + sys.stderr.flush() + + # Do first fork. + try: + pid = os.fork() + if pid == 0: + # This is the child process. Continue. + pass + else: + # This is the first parent. Exit, now that we've forked. + self.bus.log('Forking once.') + os._exit(0) + except OSError: + # Python raises OSError rather than returning negative numbers. + exc = sys.exc_info()[1] + sys.exit("%s: fork #1 failed: (%d) %s\n" + % (sys.argv[0], exc.errno, exc.strerror)) + + os.setsid() + + # Do second fork + try: + pid = os.fork() + if pid > 0: + self.bus.log('Forking twice.') + os._exit(0) # Exit second parent + except OSError: + exc = sys.exc_info()[1] + sys.exit("%s: fork #2 failed: (%d) %s\n" + % (sys.argv[0], exc.errno, exc.strerror)) + + os.chdir("/") + os.umask(0) + + si = open(self.stdin, "r") + so = open(self.stdout, "a+") + se = open(self.stderr, "a+") + + # os.dup2(fd, fd2) will close fd2 if necessary, + # so we don't explicitly close stdin/out/err. + # See http://docs.python.org/lib/os-fd-ops.html + os.dup2(si.fileno(), sys.stdin.fileno()) + os.dup2(so.fileno(), sys.stdout.fileno()) + os.dup2(se.fileno(), sys.stderr.fileno()) + + self.bus.log('Daemonized to PID: %s' % os.getpid()) + self.finalized = True + start.priority = 65 + + +class PIDFile(SimplePlugin): + + """Maintain a PID file via a WSPBus.""" + + def __init__(self, bus, pidfile): + SimplePlugin.__init__(self, bus) + self.pidfile = pidfile + self.finalized = False + + def start(self): + pid = os.getpid() + if self.finalized: + self.bus.log('PID %r already written to %r.' % (pid, self.pidfile)) + else: + open(self.pidfile, "wb").write(ntob("%s\n" % pid, 'utf8')) + self.bus.log('PID %r written to %r.' % (pid, self.pidfile)) + self.finalized = True + start.priority = 70 + + def exit(self): + try: + os.remove(self.pidfile) + self.bus.log('PID file removed: %r.' % self.pidfile) + except (KeyboardInterrupt, SystemExit): + raise + except: + pass + + +class PerpetualTimer(Timer): + + """A responsive subclass of threading.Timer whose run() method repeats. + + Use this timer only when you really need a very interruptible timer; + this checks its 'finished' condition up to 20 times a second, which can + results in pretty high CPU usage + """ + + def __init__(self, *args, **kwargs): + "Override parent constructor to allow 'bus' to be provided." + self.bus = kwargs.pop('bus', None) + super(PerpetualTimer, self).__init__(*args, **kwargs) + + def run(self): + while True: + self.finished.wait(self.interval) + if self.finished.isSet(): + return + try: + self.function(*self.args, **self.kwargs) + except Exception: + if self.bus: + self.bus.log( + "Error in perpetual timer thread function %r." % + self.function, level=40, traceback=True) + # Quit on first error to avoid massive logs. + raise + + +class BackgroundTask(SetDaemonProperty, threading.Thread): + + """A subclass of threading.Thread whose run() method repeats. + + Use this class for most repeating tasks. It uses time.sleep() to wait + for each interval, which isn't very responsive; that is, even if you call + self.cancel(), you'll have to wait until the sleep() call finishes before + the thread stops. To compensate, it defaults to being daemonic, which means + it won't delay stopping the whole process. + """ + + def __init__(self, interval, function, args=[], kwargs={}, bus=None): + threading.Thread.__init__(self) + self.interval = interval + self.function = function + self.args = args + self.kwargs = kwargs + self.running = False + self.bus = bus + + # default to daemonic + self.daemon = True + + def cancel(self): + self.running = False + + def run(self): + self.running = True + while self.running: + time.sleep(self.interval) + if not self.running: + return + try: + self.function(*self.args, **self.kwargs) + except Exception: + if self.bus: + self.bus.log("Error in background task thread function %r." + % self.function, level=40, traceback=True) + # Quit on first error to avoid massive logs. + raise + + +class Monitor(SimplePlugin): + + """WSPBus listener to periodically run a callback in its own thread.""" + + callback = None + """The function to call at intervals.""" + + frequency = 60 + """The time in seconds between callback runs.""" + + thread = None + """A :class:`BackgroundTask` + thread. + """ + + def __init__(self, bus, callback, frequency=60, name=None): + SimplePlugin.__init__(self, bus) + self.callback = callback + self.frequency = frequency + self.thread = None + self.name = name + + def start(self): + """Start our callback in its own background thread.""" + if self.frequency > 0: + threadname = self.name or self.__class__.__name__ + if self.thread is None: + self.thread = BackgroundTask(self.frequency, self.callback, + bus=self.bus) + self.thread.setName(threadname) + self.thread.start() + self.bus.log("Started monitor thread %r." % threadname) + else: + self.bus.log("Monitor thread %r already started." % threadname) + start.priority = 70 + + def stop(self): + """Stop our callback's background task thread.""" + if self.thread is None: + self.bus.log("No thread running for %s." % + self.name or self.__class__.__name__) + else: + if self.thread is not threading.currentThread(): + name = self.thread.getName() + self.thread.cancel() + if not get_daemon(self.thread): + self.bus.log("Joining %r" % name) + self.thread.join() + self.bus.log("Stopped thread %r." % name) + self.thread = None + + def graceful(self): + """Stop the callback's background task thread and restart it.""" + self.stop() + self.start() + + +class Autoreloader(Monitor): + + """Monitor which re-executes the process when files change. + + This :ref:`plugin` restarts the process (via :func:`os.execv`) + if any of the files it monitors change (or is deleted). By default, the + autoreloader monitors all imported modules; you can add to the + set by adding to ``autoreload.files``:: + + cherrypy.engine.autoreload.files.add(myFile) + + If there are imported files you do *not* wish to monitor, you can + adjust the ``match`` attribute, a regular expression. For example, + to stop monitoring cherrypy itself:: + + cherrypy.engine.autoreload.match = r'^(?!cherrypy).+' + + Like all :class:`Monitor` plugins, + the autoreload plugin takes a ``frequency`` argument. The default is + 1 second; that is, the autoreloader will examine files once each second. + """ + + files = None + """The set of files to poll for modifications.""" + + frequency = 1 + """The interval in seconds at which to poll for modified files.""" + + match = '.*' + """A regular expression by which to match filenames.""" + + def __init__(self, bus, frequency=1, match='.*'): + self.mtimes = {} + self.files = set() + self.match = match + Monitor.__init__(self, bus, self.run, frequency) + + def start(self): + """Start our own background task thread for self.run.""" + if self.thread is None: + self.mtimes = {} + Monitor.start(self) + start.priority = 70 + + def sysfiles(self): + """Return a Set of sys.modules filenames to monitor.""" + files = set() + for k, m in list(sys.modules.items()): + if re.match(self.match, k): + if ( + hasattr(m, '__loader__') and + hasattr(m.__loader__, 'archive') + ): + f = m.__loader__.archive + else: + f = getattr(m, '__file__', None) + if f is not None and not os.path.isabs(f): + # ensure absolute paths so a os.chdir() in the app + # doesn't break me + f = os.path.normpath( + os.path.join(_module__file__base, f)) + files.add(f) + return files + + def run(self): + """Reload the process if registered files have been modified.""" + for filename in self.sysfiles() | self.files: + if filename: + if filename.endswith('.pyc'): + filename = filename[:-1] + + oldtime = self.mtimes.get(filename, 0) + if oldtime is None: + # Module with no .py file. Skip it. + continue + + try: + mtime = os.stat(filename).st_mtime + except OSError: + # Either a module with no .py file, or it's been deleted. + mtime = None + + if filename not in self.mtimes: + # If a module has no .py file, this will be None. + self.mtimes[filename] = mtime + else: + if mtime is None or mtime > oldtime: + # The file has been deleted or modified. + self.bus.log("Restarting because %s changed." % + filename) + self.thread.cancel() + self.bus.log("Stopped thread %r." % + self.thread.getName()) + self.bus.restart() + return + + +class ThreadManager(SimplePlugin): + + """Manager for HTTP request threads. + + If you have control over thread creation and destruction, publish to + the 'acquire_thread' and 'release_thread' channels (for each thread). + This will register/unregister the current thread and publish to + 'start_thread' and 'stop_thread' listeners in the bus as needed. + + If threads are created and destroyed by code you do not control + (e.g., Apache), then, at the beginning of every HTTP request, + publish to 'acquire_thread' only. You should not publish to + 'release_thread' in this case, since you do not know whether + the thread will be re-used or not. The bus will call + 'stop_thread' listeners for you when it stops. + """ + + threads = None + """A map of {thread ident: index number} pairs.""" + + def __init__(self, bus): + self.threads = {} + SimplePlugin.__init__(self, bus) + self.bus.listeners.setdefault('acquire_thread', set()) + self.bus.listeners.setdefault('start_thread', set()) + self.bus.listeners.setdefault('release_thread', set()) + self.bus.listeners.setdefault('stop_thread', set()) + + def acquire_thread(self): + """Run 'start_thread' listeners for the current thread. + + If the current thread has already been seen, any 'start_thread' + listeners will not be run again. + """ + thread_ident = get_thread_ident() + if thread_ident not in self.threads: + # We can't just use get_ident as the thread ID + # because some platforms reuse thread ID's. + i = len(self.threads) + 1 + self.threads[thread_ident] = i + self.bus.publish('start_thread', i) + + def release_thread(self): + """Release the current thread and run 'stop_thread' listeners.""" + thread_ident = get_thread_ident() + i = self.threads.pop(thread_ident, None) + if i is not None: + self.bus.publish('stop_thread', i) + + def stop(self): + """Release all threads and run all 'stop_thread' listeners.""" + for thread_ident, i in self.threads.items(): + self.bus.publish('stop_thread', i) + self.threads.clear() + graceful = stop diff --git a/lib/cherrypy/process/servers.py b/lib/cherrypy/process/servers.py new file mode 100644 index 00000000..fef37f77 --- /dev/null +++ b/lib/cherrypy/process/servers.py @@ -0,0 +1,465 @@ +""" +Starting in CherryPy 3.1, cherrypy.server is implemented as an +:ref:`Engine Plugin`. It's an instance of +:class:`cherrypy._cpserver.Server`, which is a subclass of +:class:`cherrypy.process.servers.ServerAdapter`. The ``ServerAdapter`` class +is designed to control other servers, as well. + +Multiple servers/ports +====================== + +If you need to start more than one HTTP server (to serve on multiple ports, or +protocols, etc.), you can manually register each one and then start them all +with engine.start:: + + s1 = ServerAdapter(cherrypy.engine, MyWSGIServer(host='0.0.0.0', port=80)) + s2 = ServerAdapter(cherrypy.engine, + another.HTTPServer(host='127.0.0.1', + SSL=True)) + s1.subscribe() + s2.subscribe() + cherrypy.engine.start() + +.. index:: SCGI + +FastCGI/SCGI +============ + +There are also Flup\ **F**\ CGIServer and Flup\ **S**\ CGIServer classes in +:mod:`cherrypy.process.servers`. To start an fcgi server, for example, +wrap an instance of it in a ServerAdapter:: + + addr = ('0.0.0.0', 4000) + f = servers.FlupFCGIServer(application=cherrypy.tree, bindAddress=addr) + s = servers.ServerAdapter(cherrypy.engine, httpserver=f, bind_addr=addr) + s.subscribe() + +The :doc:`cherryd` startup script will do the above for +you via its `-f` flag. +Note that you need to download and install `flup `_ +yourself, whether you use ``cherryd`` or not. + +.. _fastcgi: +.. index:: FastCGI + +FastCGI +------- + +A very simple setup lets your cherry run with FastCGI. +You just need the flup library, +plus a running Apache server (with ``mod_fastcgi``) or lighttpd server. + +CherryPy code +^^^^^^^^^^^^^ + +hello.py:: + + #!/usr/bin/python + import cherrypy + + class HelloWorld: + \"""Sample request handler class.\""" + def index(self): + return "Hello world!" + index.exposed = True + + cherrypy.tree.mount(HelloWorld()) + # CherryPy autoreload must be disabled for the flup server to work + cherrypy.config.update({'engine.autoreload.on':False}) + +Then run :doc:`/deployguide/cherryd` with the '-f' arg:: + + cherryd -c -d -f -i hello.py + +Apache +^^^^^^ + +At the top level in httpd.conf:: + + FastCgiIpcDir /tmp + FastCgiServer /path/to/cherry.fcgi -idle-timeout 120 -processes 4 + +And inside the relevant VirtualHost section:: + + # FastCGI config + AddHandler fastcgi-script .fcgi + ScriptAliasMatch (.*$) /path/to/cherry.fcgi$1 + +Lighttpd +^^^^^^^^ + +For `Lighttpd `_ you can follow these +instructions. Within ``lighttpd.conf`` make sure ``mod_fastcgi`` is +active within ``server.modules``. Then, within your ``$HTTP["host"]`` +directive, configure your fastcgi script like the following:: + + $HTTP["url"] =~ "" { + fastcgi.server = ( + "/" => ( + "script.fcgi" => ( + "bin-path" => "/path/to/your/script.fcgi", + "socket" => "/tmp/script.sock", + "check-local" => "disable", + "disable-time" => 1, + "min-procs" => 1, + "max-procs" => 1, # adjust as needed + ), + ), + ) + } # end of $HTTP["url"] =~ "^/" + +Please see `Lighttpd FastCGI Docs +`_ for +an explanation of the possible configuration options. +""" + +import sys +import time +import warnings + + +class ServerAdapter(object): + + """Adapter for an HTTP server. + + If you need to start more than one HTTP server (to serve on multiple + ports, or protocols, etc.), you can manually register each one and then + start them all with bus.start: + + s1 = ServerAdapter(bus, MyWSGIServer(host='0.0.0.0', port=80)) + s2 = ServerAdapter(bus, another.HTTPServer(host='127.0.0.1', SSL=True)) + s1.subscribe() + s2.subscribe() + bus.start() + """ + + def __init__(self, bus, httpserver=None, bind_addr=None): + self.bus = bus + self.httpserver = httpserver + self.bind_addr = bind_addr + self.interrupt = None + self.running = False + + def subscribe(self): + self.bus.subscribe('start', self.start) + self.bus.subscribe('stop', self.stop) + + def unsubscribe(self): + self.bus.unsubscribe('start', self.start) + self.bus.unsubscribe('stop', self.stop) + + def start(self): + """Start the HTTP server.""" + if self.bind_addr is None: + on_what = "unknown interface (dynamic?)" + elif isinstance(self.bind_addr, tuple): + on_what = self._get_base() + else: + on_what = "socket file: %s" % self.bind_addr + + if self.running: + self.bus.log("Already serving on %s" % on_what) + return + + self.interrupt = None + if not self.httpserver: + raise ValueError("No HTTP server has been created.") + + # Start the httpserver in a new thread. + if isinstance(self.bind_addr, tuple): + wait_for_free_port(*self.bind_addr) + + import threading + t = threading.Thread(target=self._start_http_thread) + t.setName("HTTPServer " + t.getName()) + t.start() + + self.wait() + self.running = True + self.bus.log("Serving on %s" % on_what) + start.priority = 75 + + def _get_base(self): + if not self.httpserver: + return '' + host, port = self.bind_addr + if getattr(self.httpserver, 'ssl_certificate', None): + scheme = "https" + if port != 443: + host += ":%s" % port + else: + scheme = "http" + if port != 80: + host += ":%s" % port + + return "%s://%s" % (scheme, host) + + def _start_http_thread(self): + """HTTP servers MUST be running in new threads, so that the + main thread persists to receive KeyboardInterrupt's. If an + exception is raised in the httpserver's thread then it's + trapped here, and the bus (and therefore our httpserver) + are shut down. + """ + try: + self.httpserver.start() + except KeyboardInterrupt: + self.bus.log(" hit: shutting down HTTP server") + self.interrupt = sys.exc_info()[1] + self.bus.exit() + except SystemExit: + self.bus.log("SystemExit raised: shutting down HTTP server") + self.interrupt = sys.exc_info()[1] + self.bus.exit() + raise + except: + self.interrupt = sys.exc_info()[1] + self.bus.log("Error in HTTP server: shutting down", + traceback=True, level=40) + self.bus.exit() + raise + + def wait(self): + """Wait until the HTTP server is ready to receive requests.""" + while not getattr(self.httpserver, "ready", False): + if self.interrupt: + raise self.interrupt + time.sleep(.1) + + # Wait for port to be occupied + if isinstance(self.bind_addr, tuple): + host, port = self.bind_addr + wait_for_occupied_port(host, port) + + def stop(self): + """Stop the HTTP server.""" + if self.running: + # stop() MUST block until the server is *truly* stopped. + self.httpserver.stop() + # Wait for the socket to be truly freed. + if isinstance(self.bind_addr, tuple): + wait_for_free_port(*self.bind_addr) + self.running = False + self.bus.log("HTTP Server %s shut down" % self.httpserver) + else: + self.bus.log("HTTP Server %s already shut down" % self.httpserver) + stop.priority = 25 + + def restart(self): + """Restart the HTTP server.""" + self.stop() + self.start() + + +class FlupCGIServer(object): + + """Adapter for a flup.server.cgi.WSGIServer.""" + + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + self.ready = False + + def start(self): + """Start the CGI server.""" + # We have to instantiate the server class here because its __init__ + # starts a threadpool. If we do it too early, daemonize won't work. + from flup.server.cgi import WSGIServer + + self.cgiserver = WSGIServer(*self.args, **self.kwargs) + self.ready = True + self.cgiserver.run() + + def stop(self): + """Stop the HTTP server.""" + self.ready = False + + +class FlupFCGIServer(object): + + """Adapter for a flup.server.fcgi.WSGIServer.""" + + def __init__(self, *args, **kwargs): + if kwargs.get('bindAddress', None) is None: + import socket + if not hasattr(socket, 'fromfd'): + raise ValueError( + 'Dynamic FCGI server not available on this platform. ' + 'You must use a static or external one by providing a ' + 'legal bindAddress.') + self.args = args + self.kwargs = kwargs + self.ready = False + + def start(self): + """Start the FCGI server.""" + # We have to instantiate the server class here because its __init__ + # starts a threadpool. If we do it too early, daemonize won't work. + from flup.server.fcgi import WSGIServer + self.fcgiserver = WSGIServer(*self.args, **self.kwargs) + # TODO: report this bug upstream to flup. + # If we don't set _oldSIGs on Windows, we get: + # File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py", + # line 108, in run + # self._restoreSignalHandlers() + # File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py", + # line 156, in _restoreSignalHandlers + # for signum,handler in self._oldSIGs: + # AttributeError: 'WSGIServer' object has no attribute '_oldSIGs' + self.fcgiserver._installSignalHandlers = lambda: None + self.fcgiserver._oldSIGs = [] + self.ready = True + self.fcgiserver.run() + + def stop(self): + """Stop the HTTP server.""" + # Forcibly stop the fcgi server main event loop. + self.fcgiserver._keepGoing = False + # Force all worker threads to die off. + self.fcgiserver._threadPool.maxSpare = ( + self.fcgiserver._threadPool._idleCount) + self.ready = False + + +class FlupSCGIServer(object): + + """Adapter for a flup.server.scgi.WSGIServer.""" + + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + self.ready = False + + def start(self): + """Start the SCGI server.""" + # We have to instantiate the server class here because its __init__ + # starts a threadpool. If we do it too early, daemonize won't work. + from flup.server.scgi import WSGIServer + self.scgiserver = WSGIServer(*self.args, **self.kwargs) + # TODO: report this bug upstream to flup. + # If we don't set _oldSIGs on Windows, we get: + # File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py", + # line 108, in run + # self._restoreSignalHandlers() + # File "C:\Python24\Lib\site-packages\flup\server\threadedserver.py", + # line 156, in _restoreSignalHandlers + # for signum,handler in self._oldSIGs: + # AttributeError: 'WSGIServer' object has no attribute '_oldSIGs' + self.scgiserver._installSignalHandlers = lambda: None + self.scgiserver._oldSIGs = [] + self.ready = True + self.scgiserver.run() + + def stop(self): + """Stop the HTTP server.""" + self.ready = False + # Forcibly stop the scgi server main event loop. + self.scgiserver._keepGoing = False + # Force all worker threads to die off. + self.scgiserver._threadPool.maxSpare = 0 + + +def client_host(server_host): + """Return the host on which a client can connect to the given listener.""" + if server_host == '0.0.0.0': + # 0.0.0.0 is INADDR_ANY, which should answer on localhost. + return '127.0.0.1' + if server_host in ('::', '::0', '::0.0.0.0'): + # :: is IN6ADDR_ANY, which should answer on localhost. + # ::0 and ::0.0.0.0 are non-canonical but common + # ways to write IN6ADDR_ANY. + return '::1' + return server_host + + +def check_port(host, port, timeout=1.0): + """Raise an error if the given port is not free on the given host.""" + if not host: + raise ValueError("Host values of '' or None are not allowed.") + host = client_host(host) + port = int(port) + + import socket + + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 addresses) + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM) + except socket.gaierror: + if ':' in host: + info = [( + socket.AF_INET6, socket.SOCK_STREAM, 0, "", (host, port, 0, 0) + )] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", (host, port))] + + for res in info: + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(timeout) + s.connect((host, port)) + s.close() + except socket.error: + if s: + s.close() + else: + raise IOError("Port %s is in use on %s; perhaps the previous " + "httpserver did not shut down properly." % + (repr(port), repr(host))) + + +# Feel free to increase these defaults on slow systems: +free_port_timeout = 0.1 +occupied_port_timeout = 1.0 + + +def wait_for_free_port(host, port, timeout=None): + """Wait for the specified port to become free (drop requests).""" + if not host: + raise ValueError("Host values of '' or None are not allowed.") + if timeout is None: + timeout = free_port_timeout + + for trial in range(50): + try: + # we are expecting a free port, so reduce the timeout + check_port(host, port, timeout=timeout) + except IOError: + # Give the old server thread time to free the port. + time.sleep(timeout) + else: + return + + raise IOError("Port %r not free on %r" % (port, host)) + + +def wait_for_occupied_port(host, port, timeout=None): + """Wait for the specified port to become active (receive requests).""" + if not host: + raise ValueError("Host values of '' or None are not allowed.") + if timeout is None: + timeout = occupied_port_timeout + + for trial in range(50): + try: + check_port(host, port, timeout=timeout) + except IOError: + # port is occupied + return + else: + time.sleep(timeout) + + if host == client_host(host): + raise IOError("Port %r not bound on %r" % (port, host)) + + # On systems where a loopback interface is not available and the + # server is bound to all interfaces, it's difficult to determine + # whether the server is in fact occupying the port. In this case, + # just issue a warning and move on. See issue #1100. + msg = "Unable to verify that the server is bound on %r" % port + warnings.warn(msg) diff --git a/lib/cherrypy/process/win32.py b/lib/cherrypy/process/win32.py new file mode 100644 index 00000000..4afd3f14 --- /dev/null +++ b/lib/cherrypy/process/win32.py @@ -0,0 +1,180 @@ +"""Windows service. Requires pywin32.""" + +import os +import win32api +import win32con +import win32event +import win32service +import win32serviceutil + +from cherrypy.process import wspbus, plugins + + +class ConsoleCtrlHandler(plugins.SimplePlugin): + + """A WSPBus plugin for handling Win32 console events (like Ctrl-C).""" + + def __init__(self, bus): + self.is_set = False + plugins.SimplePlugin.__init__(self, bus) + + def start(self): + if self.is_set: + self.bus.log('Handler for console events already set.', level=40) + return + + result = win32api.SetConsoleCtrlHandler(self.handle, 1) + if result == 0: + self.bus.log('Could not SetConsoleCtrlHandler (error %r)' % + win32api.GetLastError(), level=40) + else: + self.bus.log('Set handler for console events.', level=40) + self.is_set = True + + def stop(self): + if not self.is_set: + self.bus.log('Handler for console events already off.', level=40) + return + + try: + result = win32api.SetConsoleCtrlHandler(self.handle, 0) + except ValueError: + # "ValueError: The object has not been registered" + result = 1 + + if result == 0: + self.bus.log('Could not remove SetConsoleCtrlHandler (error %r)' % + win32api.GetLastError(), level=40) + else: + self.bus.log('Removed handler for console events.', level=40) + self.is_set = False + + def handle(self, event): + """Handle console control events (like Ctrl-C).""" + if event in (win32con.CTRL_C_EVENT, win32con.CTRL_LOGOFF_EVENT, + win32con.CTRL_BREAK_EVENT, win32con.CTRL_SHUTDOWN_EVENT, + win32con.CTRL_CLOSE_EVENT): + self.bus.log('Console event %s: shutting down bus' % event) + + # Remove self immediately so repeated Ctrl-C doesn't re-call it. + try: + self.stop() + except ValueError: + pass + + self.bus.exit() + # 'First to return True stops the calls' + return 1 + return 0 + + +class Win32Bus(wspbus.Bus): + + """A Web Site Process Bus implementation for Win32. + + Instead of time.sleep, this bus blocks using native win32event objects. + """ + + def __init__(self): + self.events = {} + wspbus.Bus.__init__(self) + + def _get_state_event(self, state): + """Return a win32event for the given state (creating it if needed).""" + try: + return self.events[state] + except KeyError: + event = win32event.CreateEvent(None, 0, 0, + "WSPBus %s Event (pid=%r)" % + (state.name, os.getpid())) + self.events[state] = event + return event + + def _get_state(self): + return self._state + + def _set_state(self, value): + self._state = value + event = self._get_state_event(value) + win32event.PulseEvent(event) + state = property(_get_state, _set_state) + + def wait(self, state, interval=0.1, channel=None): + """Wait for the given state(s), KeyboardInterrupt or SystemExit. + + Since this class uses native win32event objects, the interval + argument is ignored. + """ + if isinstance(state, (tuple, list)): + # Don't wait for an event that beat us to the punch ;) + if self.state not in state: + events = tuple([self._get_state_event(s) for s in state]) + win32event.WaitForMultipleObjects( + events, 0, win32event.INFINITE) + else: + # Don't wait for an event that beat us to the punch ;) + if self.state != state: + event = self._get_state_event(state) + win32event.WaitForSingleObject(event, win32event.INFINITE) + + +class _ControlCodes(dict): + + """Control codes used to "signal" a service via ControlService. + + User-defined control codes are in the range 128-255. We generally use + the standard Python value for the Linux signal and add 128. Example: + + >>> signal.SIGUSR1 + 10 + control_codes['graceful'] = 128 + 10 + """ + + def key_for(self, obj): + """For the given value, return its corresponding key.""" + for key, val in self.items(): + if val is obj: + return key + raise ValueError("The given object could not be found: %r" % obj) + +control_codes = _ControlCodes({'graceful': 138}) + + +def signal_child(service, command): + if command == 'stop': + win32serviceutil.StopService(service) + elif command == 'restart': + win32serviceutil.RestartService(service) + else: + win32serviceutil.ControlService(service, control_codes[command]) + + +class PyWebService(win32serviceutil.ServiceFramework): + + """Python Web Service.""" + + _svc_name_ = "Python Web Service" + _svc_display_name_ = "Python Web Service" + _svc_deps_ = None # sequence of service names on which this depends + _exe_name_ = "pywebsvc" + _exe_args_ = None # Default to no arguments + + # Only exists on Windows 2000 or later, ignored on windows NT + _svc_description_ = "Python Web Service" + + def SvcDoRun(self): + from cherrypy import process + process.bus.start() + process.bus.block() + + def SvcStop(self): + from cherrypy import process + self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING) + process.bus.exit() + + def SvcOther(self, control): + process.bus.publish(control_codes.key_for(control)) + + +if __name__ == '__main__': + win32serviceutil.HandleCommandLine(PyWebService) diff --git a/lib/cherrypy/process/wspbus.py b/lib/cherrypy/process/wspbus.py new file mode 100644 index 00000000..5409d038 --- /dev/null +++ b/lib/cherrypy/process/wspbus.py @@ -0,0 +1,448 @@ +"""An implementation of the Web Site Process Bus. + +This module is completely standalone, depending only on the stdlib. + +Web Site Process Bus +-------------------- + +A Bus object is used to contain and manage site-wide behavior: +daemonization, HTTP server start/stop, process reload, signal handling, +drop privileges, PID file management, logging for all of these, +and many more. + +In addition, a Bus object provides a place for each web framework +to register code that runs in response to site-wide events (like +process start and stop), or which controls or otherwise interacts with +the site-wide components mentioned above. For example, a framework which +uses file-based templates would add known template filenames to an +autoreload component. + +Ideally, a Bus object will be flexible enough to be useful in a variety +of invocation scenarios: + + 1. The deployer starts a site from the command line via a + framework-neutral deployment script; applications from multiple frameworks + are mixed in a single site. Command-line arguments and configuration + files are used to define site-wide components such as the HTTP server, + WSGI component graph, autoreload behavior, signal handling, etc. + 2. The deployer starts a site via some other process, such as Apache; + applications from multiple frameworks are mixed in a single site. + Autoreload and signal handling (from Python at least) are disabled. + 3. The deployer starts a site via a framework-specific mechanism; + for example, when running tests, exploring tutorials, or deploying + single applications from a single framework. The framework controls + which site-wide components are enabled as it sees fit. + +The Bus object in this package uses topic-based publish-subscribe +messaging to accomplish all this. A few topic channels are built in +('start', 'stop', 'exit', 'graceful', 'log', and 'main'). Frameworks and +site containers are free to define their own. If a message is sent to a +channel that has not been defined or has no listeners, there is no effect. + +In general, there should only ever be a single Bus object per process. +Frameworks and site containers share a single Bus object by publishing +messages and subscribing listeners. + +The Bus object works as a finite state machine which models the current +state of the process. Bus methods move it from one state to another; +those methods then publish to subscribed listeners on the channel for +the new state.:: + + O + | + V + STOPPING --> STOPPED --> EXITING -> X + A A | + | \___ | + | \ | + | V V + STARTED <-- STARTING + +""" + +import atexit +import os +import sys +import threading +import time +import traceback as _traceback +import warnings + +from cherrypy._cpcompat import set + +# Here I save the value of os.getcwd(), which, if I am imported early enough, +# will be the directory from which the startup script was run. This is needed +# by _do_execv(), to change back to the original directory before execv()ing a +# new process. This is a defense against the application having changed the +# current working directory (which could make sys.executable "not found" if +# sys.executable is a relative-path, and/or cause other problems). +_startup_cwd = os.getcwd() + + +class ChannelFailures(Exception): + + """Exception raised when errors occur in a listener during Bus.publish(). + """ + delimiter = '\n' + + def __init__(self, *args, **kwargs): + # Don't use 'super' here; Exceptions are old-style in Py2.4 + # See https://bitbucket.org/cherrypy/cherrypy/issue/959 + Exception.__init__(self, *args, **kwargs) + self._exceptions = list() + + def handle_exception(self): + """Append the current exception to self.""" + self._exceptions.append(sys.exc_info()[1]) + + def get_instances(self): + """Return a list of seen exception instances.""" + return self._exceptions[:] + + def __str__(self): + exception_strings = map(repr, self.get_instances()) + return self.delimiter.join(exception_strings) + + __repr__ = __str__ + + def __bool__(self): + return bool(self._exceptions) + __nonzero__ = __bool__ + +# Use a flag to indicate the state of the bus. + + +class _StateEnum(object): + + class State(object): + name = None + + def __repr__(self): + return "states.%s" % self.name + + def __setattr__(self, key, value): + if isinstance(value, self.State): + value.name = key + object.__setattr__(self, key, value) +states = _StateEnum() +states.STOPPED = states.State() +states.STARTING = states.State() +states.STARTED = states.State() +states.STOPPING = states.State() +states.EXITING = states.State() + + +try: + import fcntl +except ImportError: + max_files = 0 +else: + try: + max_files = os.sysconf('SC_OPEN_MAX') + except AttributeError: + max_files = 1024 + + +class Bus(object): + + """Process state-machine and messenger for HTTP site deployment. + + All listeners for a given channel are guaranteed to be called even + if others at the same channel fail. Each failure is logged, but + execution proceeds on to the next listener. The only way to stop all + processing from inside a listener is to raise SystemExit and stop the + whole server. + """ + + states = states + state = states.STOPPED + execv = False + max_cloexec_files = max_files + + def __init__(self): + self.execv = False + self.state = states.STOPPED + self.listeners = dict( + [(channel, set()) for channel + in ('start', 'stop', 'exit', 'graceful', 'log', 'main')]) + self._priorities = {} + + def subscribe(self, channel, callback, priority=None): + """Add the given callback at the given channel (if not present).""" + if channel not in self.listeners: + self.listeners[channel] = set() + self.listeners[channel].add(callback) + + if priority is None: + priority = getattr(callback, 'priority', 50) + self._priorities[(channel, callback)] = priority + + def unsubscribe(self, channel, callback): + """Discard the given callback (if present).""" + listeners = self.listeners.get(channel) + if listeners and callback in listeners: + listeners.discard(callback) + del self._priorities[(channel, callback)] + + def publish(self, channel, *args, **kwargs): + """Return output of all subscribers for the given channel.""" + if channel not in self.listeners: + return [] + + exc = ChannelFailures() + output = [] + + items = [(self._priorities[(channel, listener)], listener) + for listener in self.listeners[channel]] + try: + items.sort(key=lambda item: item[0]) + except TypeError: + # Python 2.3 had no 'key' arg, but that doesn't matter + # since it could sort dissimilar types just fine. + items.sort() + for priority, listener in items: + try: + output.append(listener(*args, **kwargs)) + except KeyboardInterrupt: + raise + except SystemExit: + e = sys.exc_info()[1] + # If we have previous errors ensure the exit code is non-zero + if exc and e.code == 0: + e.code = 1 + raise + except: + exc.handle_exception() + if channel == 'log': + # Assume any further messages to 'log' will fail. + pass + else: + self.log("Error in %r listener %r" % (channel, listener), + level=40, traceback=True) + if exc: + raise exc + return output + + def _clean_exit(self): + """An atexit handler which asserts the Bus is not running.""" + if self.state != states.EXITING: + warnings.warn( + "The main thread is exiting, but the Bus is in the %r state; " + "shutting it down automatically now. You must either call " + "bus.block() after start(), or call bus.exit() before the " + "main thread exits." % self.state, RuntimeWarning) + self.exit() + + def start(self): + """Start all services.""" + atexit.register(self._clean_exit) + + self.state = states.STARTING + self.log('Bus STARTING') + try: + self.publish('start') + self.state = states.STARTED + self.log('Bus STARTED') + except (KeyboardInterrupt, SystemExit): + raise + except: + self.log("Shutting down due to error in start listener:", + level=40, traceback=True) + e_info = sys.exc_info()[1] + try: + self.exit() + except: + # Any stop/exit errors will be logged inside publish(). + pass + # Re-raise the original error + raise e_info + + def exit(self): + """Stop all services and prepare to exit the process.""" + exitstate = self.state + try: + self.stop() + + self.state = states.EXITING + self.log('Bus EXITING') + self.publish('exit') + # This isn't strictly necessary, but it's better than seeing + # "Waiting for child threads to terminate..." and then nothing. + self.log('Bus EXITED') + except: + # This method is often called asynchronously (whether thread, + # signal handler, console handler, or atexit handler), so we + # can't just let exceptions propagate out unhandled. + # Assume it's been logged and just die. + os._exit(70) # EX_SOFTWARE + + if exitstate == states.STARTING: + # exit() was called before start() finished, possibly due to + # Ctrl-C because a start listener got stuck. In this case, + # we could get stuck in a loop where Ctrl-C never exits the + # process, so we just call os.exit here. + os._exit(70) # EX_SOFTWARE + + def restart(self): + """Restart the process (may close connections). + + This method does not restart the process from the calling thread; + instead, it stops the bus and asks the main thread to call execv. + """ + self.execv = True + self.exit() + + def graceful(self): + """Advise all services to reload.""" + self.log('Bus graceful') + self.publish('graceful') + + def block(self, interval=0.1): + """Wait for the EXITING state, KeyboardInterrupt or SystemExit. + + This function is intended to be called only by the main thread. + After waiting for the EXITING state, it also waits for all threads + to terminate, and then calls os.execv if self.execv is True. This + design allows another thread to call bus.restart, yet have the main + thread perform the actual execv call (required on some platforms). + """ + try: + self.wait(states.EXITING, interval=interval, channel='main') + except (KeyboardInterrupt, IOError): + # The time.sleep call might raise + # "IOError: [Errno 4] Interrupted function call" on KBInt. + self.log('Keyboard Interrupt: shutting down bus') + self.exit() + except SystemExit: + self.log('SystemExit raised: shutting down bus') + self.exit() + raise + + # Waiting for ALL child threads to finish is necessary on OS X. + # See https://bitbucket.org/cherrypy/cherrypy/issue/581. + # It's also good to let them all shut down before allowing + # the main thread to call atexit handlers. + # See https://bitbucket.org/cherrypy/cherrypy/issue/751. + self.log("Waiting for child threads to terminate...") + for t in threading.enumerate(): + # Validate the we're not trying to join the MainThread + # that will cause a deadlock and the case exist when + # implemented as a windows service and in any other case + # that another thread executes cherrypy.engine.exit() + if ( + t != threading.currentThread() and + t.isAlive() and + not isinstance(t, threading._MainThread) + ): + # Note that any dummy (external) threads are always daemonic. + if hasattr(threading.Thread, "daemon"): + # Python 2.6+ + d = t.daemon + else: + d = t.isDaemon() + if not d: + self.log("Waiting for thread %s." % t.getName()) + t.join() + + if self.execv: + self._do_execv() + + def wait(self, state, interval=0.1, channel=None): + """Poll for the given state(s) at intervals; publish to channel.""" + if isinstance(state, (tuple, list)): + states = state + else: + states = [state] + + def _wait(): + while self.state not in states: + time.sleep(interval) + self.publish(channel) + + # From http://psyco.sourceforge.net/psycoguide/bugs.html: + # "The compiled machine code does not include the regular polling + # done by Python, meaning that a KeyboardInterrupt will not be + # detected before execution comes back to the regular Python + # interpreter. Your program cannot be interrupted if caught + # into an infinite Psyco-compiled loop." + try: + sys.modules['psyco'].cannotcompile(_wait) + except (KeyError, AttributeError): + pass + + _wait() + + def _do_execv(self): + """Re-execute the current process. + + This must be called from the main thread, because certain platforms + (OS X) don't allow execv to be called in a child thread very well. + """ + args = sys.argv[:] + self.log('Re-spawning %s' % ' '.join(args)) + + if sys.platform[:4] == 'java': + from _systemrestart import SystemRestart + raise SystemRestart + else: + args.insert(0, sys.executable) + if sys.platform == 'win32': + args = ['"%s"' % arg for arg in args] + + os.chdir(_startup_cwd) + if self.max_cloexec_files: + self._set_cloexec() + os.execv(sys.executable, args) + + def _set_cloexec(self): + """Set the CLOEXEC flag on all open files (except stdin/out/err). + + If self.max_cloexec_files is an integer (the default), then on + platforms which support it, it represents the max open files setting + for the operating system. This function will be called just before + the process is restarted via os.execv() to prevent open files + from persisting into the new process. + + Set self.max_cloexec_files to 0 to disable this behavior. + """ + for fd in range(3, self.max_cloexec_files): # skip stdin/out/err + try: + flags = fcntl.fcntl(fd, fcntl.F_GETFD) + except IOError: + continue + fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) + + def stop(self): + """Stop all services.""" + self.state = states.STOPPING + self.log('Bus STOPPING') + self.publish('stop') + self.state = states.STOPPED + self.log('Bus STOPPED') + + def start_with_callback(self, func, args=None, kwargs=None): + """Start 'func' in a new thread T, then start self (and return T).""" + if args is None: + args = () + if kwargs is None: + kwargs = {} + args = (func,) + args + + def _callback(func, *a, **kw): + self.wait(states.STARTED) + func(*a, **kw) + t = threading.Thread(target=_callback, args=args, kwargs=kwargs) + t.setName('Bus Callback ' + t.getName()) + t.start() + + self.start() + + return t + + def log(self, msg="", level=20, traceback=False): + """Log the given message. Append the last traceback if requested.""" + if traceback: + msg += "\n" + "".join(_traceback.format_exception(*sys.exc_info())) + self.publish('log', msg, level) + +bus = Bus() diff --git a/lib/cherrypy/scaffold/__init__.py b/lib/cherrypy/scaffold/__init__.py new file mode 100644 index 00000000..50de34bb --- /dev/null +++ b/lib/cherrypy/scaffold/__init__.py @@ -0,0 +1,61 @@ +""", a CherryPy application. + +Use this as a base for creating new CherryPy applications. When you want +to make a new app, copy and paste this folder to some other location +(maybe site-packages) and rename it to the name of your project, +then tweak as desired. + +Even before any tweaking, this should serve a few demonstration pages. +Change to this directory and run: + + ../cherryd -c site.conf + +""" + +import cherrypy +from cherrypy import tools, url + +import os +local_dir = os.path.join(os.getcwd(), os.path.dirname(__file__)) + + +class Root: + + _cp_config = {'tools.log_tracebacks.on': True, + } + + def index(self): + return """ +Try some other path, +or a default path.
+Or, just look at the pretty picture:
+ +""" % (url("other"), url("else"), + url("files/made_with_cherrypy_small.png")) + index.exposed = True + + def default(self, *args, **kwargs): + return "args: %s kwargs: %s" % (args, kwargs) + default.exposed = True + + def other(self, a=2, b='bananas', c=None): + cherrypy.response.headers['Content-Type'] = 'text/plain' + if c is None: + return "Have %d %s." % (int(a), b) + else: + return "Have %d %s, %s." % (int(a), b, c) + other.exposed = True + + files = cherrypy.tools.staticdir.handler( + section="/files", + dir=os.path.join(local_dir, "static"), + # Ignore .php files, etc. + match=r'\.(css|gif|html?|ico|jpe?g|js|png|swf|xml)$', + ) + + +root = Root() + +# Uncomment the following to use your own favicon instead of CP's default. +#favicon_path = os.path.join(local_dir, "favicon.ico") +#root.favicon_ico = tools.staticfile.handler(filename=favicon_path) diff --git a/lib/cherrypy/scaffold/apache-fcgi.conf b/lib/cherrypy/scaffold/apache-fcgi.conf new file mode 100644 index 00000000..922398ea --- /dev/null +++ b/lib/cherrypy/scaffold/apache-fcgi.conf @@ -0,0 +1,22 @@ +# Apache2 server conf file for using CherryPy with mod_fcgid. + +# This doesn't have to be "C:/", but it has to be a directory somewhere, and +# MUST match the directory used in the FastCgiExternalServer directive, below. +DocumentRoot "C:/" + +ServerName 127.0.0.1 +Listen 80 +LoadModule fastcgi_module modules/mod_fastcgi.dll +LoadModule rewrite_module modules/mod_rewrite.so + +Options ExecCGI +SetHandler fastcgi-script +RewriteEngine On +# Send requests for any URI to our fastcgi handler. +RewriteRule ^(.*)$ /fastcgi.pyc [L] + +# The FastCgiExternalServer directive defines filename as an external FastCGI application. +# If filename does not begin with a slash (/) then it is assumed to be relative to the ServerRoot. +# The filename does not have to exist in the local filesystem. URIs that Apache resolves to this +# filename will be handled by this external FastCGI application. +FastCgiExternalServer "C:/fastcgi.pyc" -host 127.0.0.1:8088 \ No newline at end of file diff --git a/lib/cherrypy/scaffold/example.conf b/lib/cherrypy/scaffold/example.conf new file mode 100644 index 00000000..93a6e53c --- /dev/null +++ b/lib/cherrypy/scaffold/example.conf @@ -0,0 +1,3 @@ +[/] +log.error_file: "error.log" +log.access_file: "access.log" \ No newline at end of file diff --git a/lib/cherrypy/scaffold/site.conf b/lib/cherrypy/scaffold/site.conf new file mode 100644 index 00000000..6ed38983 --- /dev/null +++ b/lib/cherrypy/scaffold/site.conf @@ -0,0 +1,14 @@ +[global] +# Uncomment this when you're done developing +#environment: "production" + +server.socket_host: "0.0.0.0" +server.socket_port: 8088 + +# Uncomment the following lines to run on HTTPS at the same time +#server.2.socket_host: "0.0.0.0" +#server.2.socket_port: 8433 +#server.2.ssl_certificate: '../test/test.pem' +#server.2.ssl_private_key: '../test/test.pem' + +tree.myapp: cherrypy.Application(scaffold.root, "/", "example.conf") diff --git a/lib/cherrypy/scaffold/static/made_with_cherrypy_small.png b/lib/cherrypy/scaffold/static/made_with_cherrypy_small.png new file mode 100644 index 0000000000000000000000000000000000000000..c3aafeed952190f5da9982bb359aa75b107ff079 GIT binary patch literal 7455 zcmV+)9pK`LP)7N6iYPr5@U(R*fj>!*b#f9NC#;mT|kg1D2jq&Lj>tf5D-D6cTjpq zM2*V(&+dUE$T@H@a&NdlKF>Vo`?k!^&c5I5?CvbS4*K`nzw94S`&vnU?rYUm<*)VZ zKlrsb-hAs{CSiv-Eoy)P>)-P4@xvMfTz0~N?aQ%e@i^>WG#2rZLH`!v(s_J^DycTIW{%{Z8$3ex2 zJwN{Ye*4vGhvaBGVAYbvdG-)hRoQS0(8QrrbKv5!75RlXDZQFg?b9mUNpTedsvcPE z_x}BCVY&M9FZ=uK??QBtq&g@;?Xw5}_|tgBz^VnF-bd}Q_mf%$u73`!+D8PcbeaJ}k)Q2^zs@ef5tl{CNA!}w`+bR9j(v1($D9<&6fHUBMQR|~V-NCgX(7O9Ij&o-L`i}^O5*hpVzB~2X3{8((H8QI zbKQnO0*w+O1=c7^$*0W=Wfpxy5a z>?c5&jOoMQ5B|^-Y)UIj(nWEcHmqiRk5jA0JZ?7ur09lAJ&>Q>> z+?D3QdBbGvdhcxl)@Gg`=eh+tgT5v}F2)tb*}QJ1zd|EfZ#t}lTK7-L3LW6-F-{w? z?TyzF>mny;gVL2g|B%49hx0f%suzJulgq#D_1B?FQ=?k9t|+23FRl5|=>0j&Cicfk zl2@zm7rZa`NzRt^Q=F_v{&Hm71MAttR6L^9!Ox5ULvXRt~1X1Y=?`_ zHi;-c(ON7oaczi8ugSH={Y?5QTR~9{YoJbpS(&EG>tzM(#g4b$K>kft{$8H6AA3LQ zgcSGfP56ddN<)9>w>&-OWwrC{I zZT_hdPuAAH&p$@e<*hv3QD!XcmyW`zKgaX;go|5V zU5}@EY0iVqFGF})_MQ{0h#hagiGnD#7WGp>JhJXsw=uBDBtEH%L~xioU;Rbh(kCEz z5?vEX!I3&R%S&7v;?f@#o+70&#Y#@`=QnXHRbQtxUK2ateG8=tn!+?@ z1yZhVa==1&Tg6lCWrZ?{>bp%jeTEyM9#T3Tx6%vG`oP;3AK{u`RtkH z34o<8^NKnifrk-N>p<8?#`5>m z3eWh*-ZXvf;br~w=EI2msS|&U7S;_hUka0Pz4?22iYAzh370kr^RKEft2$ixUKk?) zcSHG_lOu&boYJC}w;o>FV&xUaz>=oG#CQ_|z@=vCMwA{LJ!D%&X#~cW__i+pBC2yt zB?1*wZ1@prX!ib0SU!Um9n29fx~*I{Xc~M#lHBAt?ftkDO)(?juxunb!#$u?SGP1# z4JKoIL!;NH)1J!Loe^?q8RwJYu?1>0{V|`+e(6ZcABBD-o>q~j zM%GG}R)qWbdxqaO1ews$mGc_1YWt9Qd84pyd5S8c99AI2d@-_vcF?MC8wzg8H{u<2 zd?jsHETF0F58B3HgWOa`CR0wx&PI7@B}=PYl@@ivcqWJKAzP8``Qf(O;TcZifo`#KT;ti6;eC!m)T=ovB4x0T%g2v~V8 zuJG?agahhD3K}sMF=~o9t~njip5(|TIG{T9I35+8p#gRlNuD7JIC^EZ#AHvvtITt3 z#H*&@G@?UH;p(J^1G;;#Rc&_>OlcWTepf1e@$HP#!gs^pY%hEbcfiMRC%kmF;+pmrTvL;SxAtZPm~BDC`OPSb zC*STB_ANP-78);OvGOlmhF|aRM1rawWB~)DaDD6)M9y7=m=$YrM|LaXckDohi3%!i z+LBdsYDE5FDmZd!rNngHeH|VaJm--^kr79&m9hNyMfm2MZ}0~B1PnO!G;78f0uWg) zYPTrW4&M#v!ShZZQ)nW~i?Z=|0@=fSdpsluzr9dqj0pmV{|Min197Q)Kb-DH8@J!~ zh70|A^yp89VJJezPKKY{LKG)kzOsI0#u%ZbfTA8+_}9pJe~^}efI-8X1(jc~?+{E@ zE8>@Vh+Qj-BqcRu7@Koq&v#@uquAGkEXXM#Mc>ww7*q^^u0C6ZbyIJdW532u^ytwY zojSb>nLp;k-c}vH?q;}p-T>x?+u-<@hOm5T=246sKClj0dmG8oP^R|&d^g?-0i(y_ zQn$VcKW%`lTPIMM?2N3N4!C(r2X2dI;Ti#qjaQhWG<~QiUx_ZVVl7ZonA{Ss8VCB_ zUL)HqsF=m_?>7XY_k(mZ#lbvl{nPj*t~N&M7zD61Ep) zBqr}*d0ww5)dsqnJMh_OpP_%hUWf^E;FheO@AF>@@2G35BKs)Dj2_H^H~PYKbbd|H9Ns!#S`npV9!(y z)#TYDKI|aQI%+~sdpi{Otc9NDMw~yXgyJ-tMs*63tr6m>L+y~WoNb5XNE4htr3ow3 zy|6zlk1((Oyw3G&`{9089jtujZ8KavtqE&Wc^tErhu>ul+zCC1a9>?S2N~c&E=|WT z^fNEX3K4#Kh$LeDWBGYWmhhi41(&<^Lsqbn$iBp#RpN`Uv)%imEX58f5hjQt9=8MO zIOnc_PeWp|-tyN+w4W|2GDPbq>ox;cy5zvhv9oUxRNP{bQy)5E8gBi*5=n}xDE9KD zAj2o9eewjw-oBKkhG@!*S-BK7d4CBhC`?HrP%-eOCLBP1q6Jn+&%^7qcci>`755Pu z@)P%5nQe<1KaJqPGSgp6)97|gnm8VvJ9oz0Z@-QA-g^(-x_yhq^T*RbA8N$&L9QJ< z&#Oaj<0>lC4R5{m7We1a@8_R?jsX_X z18_Iqyh%S@$nyR??I#SA8jP|``cO%)xd|5VpZXKr2v}*p1|oG6>!FF@;bU;R-#}Dm zI^dSoZroU~6oEg_LQarwlRCF9Ya;5;b?}%r69v%}?AKuY>Yp=Wj9X{1l2kP%0+wIz zK?oT&fjjPZWVazzTc1ydjM(#?P1|_h^)ZuZ)pb}9iAYqKZVpS=Ww1E6q**#dQ4*zD zmh>g7&;azun>4WT5tc$e_58&BR#KaDW{&2-8YVS_2J~CVuKyi}jCVp`YXe4&7{K!w zV2$=KZ^SpmQxnUUF2K9*z6)iA4Y(a1 zf|=n~1YX%kykF;Kj+m`!(pPn*74%WO#X-m{Dr>tS*#f>(CgTbNmXBVOI!s1?TLlCT z83pG#)A=-bFY*Ac^&12qG7#ZfE1T2_Cg25pHw>qzOrlxFQKZh(qYYaJtoWVs5&?@% zmw|&v@aZsW(Q?GDm!lP-JSX-XwU{>JCQs*ODq@Yvv{M8kBZ@?H57^csupF+eLiIgs z*qY1nDe$W=yP`VR{yCpIUiCZ~Uru%HR1hfZfwgqebQGnZqC8t+net3)3>(s&58|8| zQj}-I^Gedpp}6}`4k`m}IsUaaGWhewH-?R}1{E zs}t#588mucNhZ8Qy&JO$-{?jej|k z#;jLlnhSwtXU@WZo5s2;?);TopOM3Qp@zb4S()peDi|?bid&%4OajLX^)jRN@$ttW z@&OLKqQvvd(#Z3*gwZ zKfFzqNPl+1ddY5z@a{DL9^LyR-BVeZo?ym^%XL6va|joGQ7G zm8`K}Vqh`EGvEXa9LDGHkkJ!4v1evVS)O0tL8#6$ZxUM_Kt~cL6c8wm!XsFqzXjA3 z7I9$p>GcH-hU78p6`7_DfVEC!Z1-WKmE6Y}HM~#VKwIFLr5yLehD-G!M_n0b?3LhX zCl3dkz4*&&4-T8|#K;j+eDE)x+`;))W}1;dH;kv9wA~~&j{J`$6L=XDy_I6i6eb&? zcMrkFuCD5QVL2<~P2{hA1T{rgmwhCHxc%Z3V|e{Mm5-SZjf2mSA8>WRP&gCN?7sV+ zLTOeaEws*>H;DCTGJDUSC^${ou|UW#4JN=d}9YVJPSUGmbS_Q27 zl_Ccb`CXjoxjy~Ij0tq;Yu1D+vrJ%rSw<|dvJ-W99euTh99ZwsLGQgd1ChK$@h{IX zZUC&cBJ0_it)@M(PM9!uAWaVzyo{FeGVY`^U#4r$9w z?#}YOPGzPE=FJ(y!Qf~sD^|w!tEO?E&O}$H*?wp$&gHy@522tv(o<2gA!L@%#{2Jg zLVAp4qq@0qdhq>u8a&DJTveCFb$b;=xa>!2=pj_)oZui7r5~0Sb3~T=SyzS)p|j6} zv;s7Nx8i!7BMX<}Z`7=e=~3D!YYniU9lo)A&l&C{Uqm@#2oDC|ntazRBjDuMC+js%6Y&M9MQFiqV z0kCwM??+0620r?*6E7R?C67=Kc?=si6f(chpuuM0)z4%0@5a&$Gj%GT(&8`elUP5D zMarbo%>^IP$!GkiVf1IH4kameZ(sP7dsLv z3Z{s}?1N${l1|wVz5py2Puf_mt5Za=7RQPrsu-IT7gpu|_4y>LO$6k`cE*FTGy*{rz-GCMV0{_X;X1$m?hWJx@ z3NK@(B}9UBL2Dx%1bgN_pw;X);RFZZKOu8olRk`1}&0L z;O0ga>v|}QAAerYOm~rRYhd0lQn*K#0b1f)nyibd6NmD8R)#Vn%OBKU&iAw?jG++T zEK(%38SS?hohXox)zgswg(3u4OnCEsC`;ADte*wII&pZN$nupL29RDdNdT<FKwtAXK9T!ME$zNHmmfG}C`!ZGg@cyk%d0Zp&%b+=}T^N0_%3fr|c=LTe78 zS=m$_gO>)DU!U)A*O~6iWirJ}PNAXOVB@-k?!DPG`nbRktG?jr-7x@%tKCZ5sL~I!5Lp&i-g+vf_-t|Y*tMgR zwi^gAgZQeg9%O9Q$OeFs+`8Eum_>;iw7cGirnPrqpuxg_o653j^%PnazJoEN2Jkz1 z4a*eKSE;&pKEC>*t8f~wZ{zjXX~ibRrcWbYC-06bZMOF2ZP}UqDYC7SWKEiy2lFzz zs`T+lpc_YPX}(cAYx)mOG0Q*Ukex223t9@{8Ea#5P88OV?S+vnvmJo zz_}B9csmzPDd1)R-6za!T&E<}l=G^fZ4Pn$DQ!WX7wesOSc^|S=?WvAm121^xl8G^ zpfufz+Lk<)zhPT3?i5b_<3xDMON-V+-NloVBz1&bkwaE$s6^IEq{V7j^I>it*#1`B z)?Kt2D%AvJ?ARt*LkD-G&mk>bo=(tN+%*gN?Vy*{L3ye=4Cx$??VUvjwyhq04{wIY z&>s-v%5D&eZmT#!71t>yJ|^5*VoB4IL>Yx^R zS@frg=zifnx(L87542Ux_5U*8GSZuy(`L+?ISo=Y#a8lZy5)->tu4c1;)5=1Lx}7K zoE$2JGTJ14kw+E6dP)j;s@#_~Hx5HR}E9U+>i=Q~?Ypf*Q?S19?3v}59&qEX|Cc6pV6szUBO9q)y z3cQ_+$UR^&?Ki#TaLsugLcGmTQI^{(OI2U^l>)1tDck3`ml=-47+1vHIDu%2{Olm{ zI*1H9im6k^afh8POlHlTfw)_j+eBwq|CAwzT?%d#Cx3MMO!}Wc+=T7Kgq=WaBwe2) zU+Q5^I0?`Ps8)FgG)WiZcB= (3, 0): + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + return wsgiserver.CP_makefile(sock, mode, bufsize) + else: + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + return wsgiserver.CP_fileobject(sock, mode, bufsize) diff --git a/lib/cherrypy/wsgiserver/ssl_pyopenssl.py b/lib/cherrypy/wsgiserver/ssl_pyopenssl.py new file mode 100644 index 00000000..f8f2dafe --- /dev/null +++ b/lib/cherrypy/wsgiserver/ssl_pyopenssl.py @@ -0,0 +1,253 @@ +"""A library for integrating pyOpenSSL with CherryPy. + +The OpenSSL module must be importable for SSL functionality. +You can obtain it from `here `_. + +To use this module, set CherryPyWSGIServer.ssl_adapter to an instance of +SSLAdapter. There are two ways to use SSL: + +Method One +---------- + + * ``ssl_adapter.context``: an instance of SSL.Context. + +If this is not None, it is assumed to be an SSL.Context instance, +and will be passed to SSL.Connection on bind(). The developer is +responsible for forming a valid Context object. This approach is +to be preferred for more flexibility, e.g. if the cert and key are +streams instead of files, or need decryption, or SSL.SSLv3_METHOD +is desired instead of the default SSL.SSLv23_METHOD, etc. Consult +the pyOpenSSL documentation for complete options. + +Method Two (shortcut) +--------------------- + + * ``ssl_adapter.certificate``: the filename of the server SSL certificate. + * ``ssl_adapter.private_key``: the filename of the server's private key file. + +Both are None by default. If ssl_adapter.context is None, but .private_key +and .certificate are both given and valid, they will be read, and the +context will be automatically created from them. +""" + +import socket +import threading +import time + +from cherrypy import wsgiserver + +try: + from OpenSSL import SSL + from OpenSSL import crypto +except ImportError: + SSL = None + + +class SSL_fileobject(wsgiserver.CP_fileobject): + + """SSL file object attached to a socket object.""" + + ssl_timeout = 3 + ssl_retry = .01 + + def _safe_call(self, is_reader, call, *args, **kwargs): + """Wrap the given call with SSL error-trapping. + + is_reader: if False EOF errors will be raised. If True, EOF errors + will return "" (to emulate normal sockets). + """ + start = time.time() + while True: + try: + return call(*args, **kwargs) + except SSL.WantReadError: + # Sleep and try again. This is dangerous, because it means + # the rest of the stack has no way of differentiating + # between a "new handshake" error and "client dropped". + # Note this isn't an endless loop: there's a timeout below. + time.sleep(self.ssl_retry) + except SSL.WantWriteError: + time.sleep(self.ssl_retry) + except SSL.SysCallError, e: + if is_reader and e.args == (-1, 'Unexpected EOF'): + return "" + + errnum = e.args[0] + if is_reader and errnum in wsgiserver.socket_errors_to_ignore: + return "" + raise socket.error(errnum) + except SSL.Error, e: + if is_reader and e.args == (-1, 'Unexpected EOF'): + return "" + + thirdarg = None + try: + thirdarg = e.args[0][0][2] + except IndexError: + pass + + if thirdarg == 'http request': + # The client is talking HTTP to an HTTPS server. + raise wsgiserver.NoSSLError() + + raise wsgiserver.FatalSSLAlert(*e.args) + except: + raise + + if time.time() - start > self.ssl_timeout: + raise socket.timeout("timed out") + + def recv(self, size): + return self._safe_call(True, super(SSL_fileobject, self).recv, size) + + def sendall(self, *args, **kwargs): + return self._safe_call(False, super(SSL_fileobject, self).sendall, + *args, **kwargs) + + def send(self, *args, **kwargs): + return self._safe_call(False, super(SSL_fileobject, self).send, + *args, **kwargs) + + +class SSLConnection: + + """A thread-safe wrapper for an SSL.Connection. + + ``*args``: the arguments to create the wrapped ``SSL.Connection(*args)``. + """ + + def __init__(self, *args): + self._ssl_conn = SSL.Connection(*args) + self._lock = threading.RLock() + + for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read', + 'renegotiate', 'bind', 'listen', 'connect', 'accept', + 'setblocking', 'fileno', 'close', 'get_cipher_list', + 'getpeername', 'getsockname', 'getsockopt', 'setsockopt', + 'makefile', 'get_app_data', 'set_app_data', 'state_string', + 'sock_shutdown', 'get_peer_certificate', 'want_read', + 'want_write', 'set_connect_state', 'set_accept_state', + 'connect_ex', 'sendall', 'settimeout', 'gettimeout'): + exec("""def %s(self, *args): + self._lock.acquire() + try: + return self._ssl_conn.%s(*args) + finally: + self._lock.release() +""" % (f, f)) + + def shutdown(self, *args): + self._lock.acquire() + try: + # pyOpenSSL.socket.shutdown takes no args + return self._ssl_conn.shutdown() + finally: + self._lock.release() + + +class pyOpenSSLAdapter(wsgiserver.SSLAdapter): + + """A wrapper for integrating pyOpenSSL with CherryPy.""" + + context = None + """An instance of SSL.Context.""" + + certificate = None + """The filename of the server SSL certificate.""" + + private_key = None + """The filename of the server's private key file.""" + + certificate_chain = None + """Optional. The filename of CA's intermediate certificate bundle. + + This is needed for cheaper "chained root" SSL certificates, and should be + left as None if not required.""" + + def __init__(self, certificate, private_key, certificate_chain=None): + if SSL is None: + raise ImportError("You must install pyOpenSSL to use HTTPS.") + + self.context = None + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + self._environ = None + + def bind(self, sock): + """Wrap and return the given socket.""" + if self.context is None: + self.context = self.get_context() + conn = SSLConnection(self.context, sock) + self._environ = self.get_environ() + return conn + + def wrap(self, sock): + """Wrap and return the given socket, plus WSGI environ entries.""" + return sock, self._environ.copy() + + def get_context(self): + """Return an SSL.Context from self attributes.""" + # See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/442473 + c = SSL.Context(SSL.SSLv23_METHOD) + c.use_privatekey_file(self.private_key) + if self.certificate_chain: + c.load_verify_locations(self.certificate_chain) + c.use_certificate_file(self.certificate) + return c + + def get_environ(self): + """Return WSGI environ entries to be merged into each request.""" + ssl_environ = { + "HTTPS": "on", + # pyOpenSSL doesn't provide access to any of these AFAICT + # 'SSL_PROTOCOL': 'SSLv2', + # SSL_CIPHER string The cipher specification name + # SSL_VERSION_INTERFACE string The mod_ssl program version + # SSL_VERSION_LIBRARY string The OpenSSL program version + } + + if self.certificate: + # Server certificate attributes + cert = open(self.certificate, 'rb').read() + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert) + ssl_environ.update({ + 'SSL_SERVER_M_VERSION': cert.get_version(), + 'SSL_SERVER_M_SERIAL': cert.get_serial_number(), + # 'SSL_SERVER_V_START': + # Validity of server's certificate (start time), + # 'SSL_SERVER_V_END': + # Validity of server's certificate (end time), + }) + + for prefix, dn in [("I", cert.get_issuer()), + ("S", cert.get_subject())]: + # X509Name objects don't seem to have a way to get the + # complete DN string. Use str() and slice it instead, + # because str(dn) == "" + dnstr = str(dn)[18:-2] + + wsgikey = 'SSL_SERVER_%s_DN' % prefix + ssl_environ[wsgikey] = dnstr + + # The DN should be of the form: /k1=v1/k2=v2, but we must allow + # for any value to contain slashes itself (in a URL). + while dnstr: + pos = dnstr.rfind("=") + dnstr, value = dnstr[:pos], dnstr[pos + 1:] + pos = dnstr.rfind("/") + dnstr, key = dnstr[:pos], dnstr[pos + 1:] + if key and value: + wsgikey = 'SSL_SERVER_%s_DN_%s' % (prefix, key) + ssl_environ[wsgikey] = value + + return ssl_environ + + def makefile(self, sock, mode='r', bufsize=-1): + if SSL and isinstance(sock, SSL.ConnectionType): + timeout = sock.gettimeout() + f = SSL_fileobject(sock, mode, bufsize) + f.ssl_timeout = timeout + return f + else: + return wsgiserver.CP_fileobject(sock, mode, bufsize) diff --git a/lib/cherrypy/wsgiserver/wsgiserver2.py b/lib/cherrypy/wsgiserver/wsgiserver2.py new file mode 100644 index 00000000..b4919cf5 --- /dev/null +++ b/lib/cherrypy/wsgiserver/wsgiserver2.py @@ -0,0 +1,2481 @@ +"""A high-speed, production ready, thread pooled, generic HTTP server. + +Simplest example on how to use this module directly +(without using CherryPy's application machinery):: + + from cherrypy import wsgiserver + + def my_crazy_app(environ, start_response): + status = '200 OK' + response_headers = [('Content-type','text/plain')] + start_response(status, response_headers) + return ['Hello world!'] + + server = wsgiserver.CherryPyWSGIServer( + ('0.0.0.0', 8070), my_crazy_app, + server_name='www.cherrypy.example') + server.start() + +The CherryPy WSGI server can serve as many WSGI applications +as you want in one instance by using a WSGIPathInfoDispatcher:: + + d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) + server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) + +Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance. + +This won't call the CherryPy engine (application side) at all, only the +HTTP server, which is independent from the rest of CherryPy. Don't +let the name "CherryPyWSGIServer" throw you; the name merely reflects +its origin, not its coupling. + +For those of you wanting to understand internals of this module, here's the +basic call flow. The server's listening thread runs a very tight loop, +sticking incoming connections onto a Queue:: + + server = CherryPyWSGIServer(...) + server.start() + while True: + tick() + # This blocks until a request comes in: + child = socket.accept() + conn = HTTPConnection(child, ...) + server.requests.put(conn) + +Worker threads are kept in a pool and poll the Queue, popping off and then +handling each connection in turn. Each connection can consist of an arbitrary +number of requests and their responses, so we run a nested loop:: + + while True: + conn = server.requests.get() + conn.communicate() + -> while True: + req = HTTPRequest(...) + req.parse_request() + -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" + req.rfile.readline() + read_headers(req.rfile, req.inheaders) + req.respond() + -> response = app(...) + try: + for chunk in response: + if chunk: + req.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + if req.close_connection: + return +""" + +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'CP_fileobject', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] + +import os +try: + import queue +except: + import Queue as queue +import re +import rfc822 +import socket +import sys +if 'win' in sys.platform and hasattr(socket, "AF_INET6"): + if not hasattr(socket, 'IPPROTO_IPV6'): + socket.IPPROTO_IPV6 = 41 + if not hasattr(socket, 'IPV6_V6ONLY'): + socket.IPV6_V6ONLY = 27 +try: + import cStringIO as StringIO +except ImportError: + import StringIO +DEFAULT_BUFFER_SIZE = -1 + + +class FauxSocket(object): + + """Faux socket with the minimal interface required by pypy""" + + def _reuse(self): + pass + +_fileobject_uses_str_type = isinstance( + socket._fileobject(FauxSocket())._rbuf, basestring) +del FauxSocket # this class is not longer required for anything. + +import threading +import time +import traceback + + +def format_exc(limit=None): + """Like print_exc() but return a string. Backport for Python 2.3.""" + try: + etype, value, tb = sys.exc_info() + return ''.join(traceback.format_exception(etype, value, tb, limit)) + finally: + etype = value = tb = None + +import operator + +from urllib import unquote +import warnings + +if sys.version_info >= (3, 0): + bytestr = bytes + unicodestr = str + basestring = (bytes, str) + + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given + encoding. + """ + # In Python 3, the native string type is unicode + return n.encode(encoding) +else: + bytestr = str + unicodestr = unicode + basestring = basestring + + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given + encoding. + """ + # In Python 2, the native string type is bytes. Assume it's already + # in the given encoding, which for ISO-8859-1 is almost always what + # was intended. + return n + +LF = ntob('\n') +CRLF = ntob('\r\n') +TAB = ntob('\t') +SPACE = ntob(' ') +COLON = ntob(':') +SEMICOLON = ntob(';') +EMPTY = ntob('') +NUMBER_SIGN = ntob('#') +QUESTION_MARK = ntob('?') +ASTERISK = ntob('*') +FORWARD_SLASH = ntob('/') +quoted_slash = re.compile(ntob("(?i)%2F")) + +import errno + + +def plat_specific_errors(*errnames): + """Return error numbers for all errors in errnames on this platform. + + The 'errno' module contains different global constants depending on + the specific platform (OS). This function will return the list of + numeric values for a given list of potential names. + """ + errno_names = dir(errno) + nums = [getattr(errno, k) for k in errnames if k in errno_names] + # de-dupe the list + return list(dict.fromkeys(nums).keys()) + +socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") + +socket_errors_to_ignore = plat_specific_errors( + "EPIPE", + "EBADF", "WSAEBADF", + "ENOTSOCK", "WSAENOTSOCK", + "ETIMEDOUT", "WSAETIMEDOUT", + "ECONNREFUSED", "WSAECONNREFUSED", + "ECONNRESET", "WSAECONNRESET", + "ECONNABORTED", "WSAECONNABORTED", + "ENETRESET", "WSAENETRESET", + "EHOSTDOWN", "EHOSTUNREACH", +) +socket_errors_to_ignore.append("timed out") +socket_errors_to_ignore.append("The read operation timed out") + +socket_errors_nonblocking = plat_specific_errors( + 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') + +comma_separated_headers = [ + ntob(h) for h in + ['Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', + 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', + 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', + 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', + 'WWW-Authenticate'] +] + + +import logging +if not hasattr(logging, 'statistics'): + logging.statistics = {} + + +def read_headers(rfile, hdict=None): + """Read headers from the given stream into the given header dict. + + If hdict is None, a new header dict is created. Returns the populated + header dict. + + Headers which are repeated are folded together using a comma if their + specification so dictates. + + This function raises ValueError when the read bytes violate the HTTP spec. + You should probably return "400 Bad Request" if this happens. + """ + if hdict is None: + hdict = {} + + while True: + line = rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + if line[0] in (SPACE, TAB): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(COLON, 1) + except ValueError: + raise ValueError("Illegal header line.") + # TODO: what about TE and WWW-Authenticate? + k = k.strip().title() + v = v.strip() + hname = k + + if k in comma_separated_headers: + existing = hdict.get(hname) + if existing: + v = ", ".join((existing, v)) + hdict[hname] = v + + return hdict + + +class MaxSizeExceeded(Exception): + pass + + +class SizeCheckWrapper(object): + + """Wraps a file-like object, raising MaxSizeExceeded if too large.""" + + def __init__(self, rfile, maxlen): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + + def _check_length(self): + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded() + + def read(self, size=None): + data = self.rfile.read(size) + self.bytes_read += len(data) + self._check_length() + return data + + def readline(self, size=None): + if size is not None: + data = self.rfile.readline(size) + self.bytes_read += len(data) + self._check_length() + return data + + # User didn't specify a size ... + # We read the line in chunks to make sure it's not a 100MB line ! + res = [] + while True: + data = self.rfile.readline(256) + self.bytes_read += len(data) + self._check_length() + res.append(data) + # See https://bitbucket.org/cherrypy/cherrypy/issue/421 + if len(data) < 256 or data[-1:] == LF: + return EMPTY.join(res) + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.bytes_read += len(data) + self._check_length() + return data + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + self._check_length() + return data + + +class KnownLengthRFile(object): + + """Wraps a file-like object, returning an empty string when exhausted.""" + + def __init__(self, rfile, content_length): + self.rfile = rfile + self.remaining = content_length + + def read(self, size=None): + if self.remaining == 0: + return '' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.read(size) + self.remaining -= len(data) + return data + + def readline(self, size=None): + if self.remaining == 0: + return '' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.readline(size) + self.remaining -= len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.remaining -= len(data) + return data + + +class ChunkedRFile(object): + + """Wraps a file-like object, returning an empty string when exhausted. + + This class is intended to provide a conforming wsgi.input value for + request entities that have been encoded with the 'chunked' transfer + encoding. + """ + + def __init__(self, rfile, maxlen, bufsize=8192): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + self.buffer = EMPTY + self.bufsize = bufsize + self.closed = False + + def _fetch(self): + if self.closed: + return + + line = self.rfile.readline() + self.bytes_read += len(line) + + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded("Request Entity Too Large", self.maxlen) + + line = line.strip().split(SEMICOLON, 1) + + try: + chunk_size = line.pop(0) + chunk_size = int(chunk_size, 16) + except ValueError: + raise ValueError("Bad chunked transfer size: " + repr(chunk_size)) + + if chunk_size <= 0: + self.closed = True + return + +## if line: chunk_extension = line[0] + + if self.maxlen and self.bytes_read + chunk_size > self.maxlen: + raise IOError("Request Entity Too Large") + + chunk = self.rfile.read(chunk_size) + self.bytes_read += len(chunk) + self.buffer += chunk + + crlf = self.rfile.read(2) + if crlf != CRLF: + raise ValueError( + "Bad chunked transfer coding (expected '\\r\\n', " + "got " + repr(crlf) + ")") + + def read(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + if size: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + data += self.buffer + + def readline(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + newline_pos = self.buffer.find(LF) + if size: + if newline_pos == -1: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + remaining = min(size - len(data), newline_pos) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + if newline_pos == -1: + data += self.buffer + else: + data += self.buffer[:newline_pos] + self.buffer = self.buffer[newline_pos:] + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def read_trailer_lines(self): + if not self.closed: + raise ValueError( + "Cannot read trailers until the request body has been read.") + + while True: + line = self.rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + self.bytes_read += len(line) + if self.maxlen and self.bytes_read > self.maxlen: + raise IOError("Request Entity Too Large") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + yield line + + def close(self): + self.rfile.close() + + def __iter__(self): + # Shamelessly stolen from StringIO + total = 0 + line = self.readline(sizehint) + while line: + yield line + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + + +class HTTPRequest(object): + + """An HTTP Request (and response). + + A single HTTP connection may consist of multiple request/response pairs. + """ + + server = None + """The HTTPServer object which is receiving this request.""" + + conn = None + """The HTTPConnection object on which this request connected.""" + + inheaders = {} + """A dict of request headers.""" + + outheaders = [] + """A list of header tuples to write in the response.""" + + ready = False + """When True, the request has been parsed and is ready to begin generating + the response. When False, signals the calling Connection that the response + should not be generated and the connection should close.""" + + close_connection = False + """Signals the calling Connection that the request should close. This does + not imply an error! The client and/or server may each request that the + connection be closed.""" + + chunked_write = False + """If True, output will be encoded with the "chunked" transfer-coding. + + This value is set automatically inside send_headers.""" + + def __init__(self, server, conn): + self.server = server + self.conn = conn + + self.ready = False + self.started_request = False + self.scheme = ntob("http") + if self.server.ssl_adapter is not None: + self.scheme = ntob("https") + # Use the lowest-common protocol in case read_request_line errors. + self.response_protocol = 'HTTP/1.0' + self.inheaders = {} + + self.status = "" + self.outheaders = [] + self.sent_headers = False + self.close_connection = self.__class__.close_connection + self.chunked_read = False + self.chunked_write = self.__class__.chunked_write + + def parse_request(self): + """Parse the next HTTP request start-line and message-headers.""" + self.rfile = SizeCheckWrapper(self.conn.rfile, + self.server.max_request_header_size) + try: + success = self.read_request_line() + except MaxSizeExceeded: + self.simple_response( + "414 Request-URI Too Long", + "The Request-URI sent with the request exceeds the maximum " + "allowed bytes.") + return + else: + if not success: + return + + try: + success = self.read_request_headers() + except MaxSizeExceeded: + self.simple_response( + "413 Request Entity Too Large", + "The headers sent with the request exceed the maximum " + "allowed bytes.") + return + else: + if not success: + return + + self.ready = True + + def read_request_line(self): + # HTTP/1.1 connections are persistent by default. If a client + # requests a page, then idles (leaves the connection open), + # then rfile.readline() will raise socket.error("timed out"). + # Note that it does this based on the value given to settimeout(), + # and doesn't need the client to request or acknowledge the close + # (although your TCP stack might suffer for it: cf Apache's history + # with FIN_WAIT_2). + request_line = self.rfile.readline() + + # Set started_request to True so communicate() knows to send 408 + # from here on out. + self.started_request = True + if not request_line: + return False + + if request_line == CRLF: + # RFC 2616 sec 4.1: "...if the server is reading the protocol + # stream at the beginning of a message and receives a CRLF + # first, it should ignore the CRLF." + # But only ignore one leading line! else we enable a DoS. + request_line = self.rfile.readline() + if not request_line: + return False + + if not request_line.endswith(CRLF): + self.simple_response( + "400 Bad Request", "HTTP requires CRLF terminators") + return False + + try: + method, uri, req_protocol = request_line.strip().split(SPACE, 2) + rp = int(req_protocol[5]), int(req_protocol[7]) + except (ValueError, IndexError): + self.simple_response("400 Bad Request", "Malformed Request-Line") + return False + + self.uri = uri + self.method = method + + # uri may be an abs_path (including "http://host.domain.tld"); + scheme, authority, path = self.parse_request_uri(uri) + if NUMBER_SIGN in path: + self.simple_response("400 Bad Request", + "Illegal #fragment in Request-URI.") + return False + + if scheme: + self.scheme = scheme + + qs = EMPTY + if QUESTION_MARK in path: + path, qs = path.split(QUESTION_MARK, 1) + + # Unquote the path+params (e.g. "/this%20path" -> "/this path"). + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 + # + # But note that "...a URI must be separated into its components + # before the escaped characters within those components can be + # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 + # Therefore, "/this%2Fpath" becomes "/this%2Fpath", not "/this/path". + try: + atoms = [unquote(x) for x in quoted_slash.split(path)] + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + path = "%2F".join(atoms) + self.path = path + + # Note that, like wsgiref and most other HTTP servers, + # we "% HEX HEX"-unquote the path but not the query string. + self.qs = qs + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + sp = int(self.server.protocol[5]), int(self.server.protocol[7]) + + if sp[0] != rp[0]: + self.simple_response("505 HTTP Version Not Supported") + return False + + self.request_protocol = req_protocol + self.response_protocol = "HTTP/%s.%s" % min(rp, sp) + + return True + + def read_request_headers(self): + """Read self.rfile into self.inheaders. Return success.""" + + # then all the http headers + try: + read_headers(self.rfile, self.inheaders) + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + + mrbs = self.server.max_request_body_size + if mrbs and int(self.inheaders.get("Content-Length", 0)) > mrbs: + self.simple_response( + "413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return False + + # Persistent connection support + if self.response_protocol == "HTTP/1.1": + # Both server and client are HTTP/1.1 + if self.inheaders.get("Connection", "") == "close": + self.close_connection = True + else: + # Either the server or client (or both) are HTTP/1.0 + if self.inheaders.get("Connection", "") != "Keep-Alive": + self.close_connection = True + + # Transfer-Encoding support + te = None + if self.response_protocol == "HTTP/1.1": + te = self.inheaders.get("Transfer-Encoding") + if te: + te = [x.strip().lower() for x in te.split(",") if x.strip()] + + self.chunked_read = False + + if te: + for enc in te: + if enc == "chunked": + self.chunked_read = True + else: + # Note that, even if we see "chunked", we must reject + # if there is an extension we don't recognize. + self.simple_response("501 Unimplemented") + self.close_connection = True + return False + + # From PEP 333: + # "Servers and gateways that implement HTTP 1.1 must provide + # transparent support for HTTP 1.1's "expect/continue" mechanism. + # This may be done in any of several ways: + # 1. Respond to requests containing an Expect: 100-continue request + # with an immediate "100 Continue" response, and proceed normally. + # 2. Proceed with the request normally, but provide the application + # with a wsgi.input stream that will send the "100 Continue" + # response if/when the application first attempts to read from + # the input stream. The read request must then remain blocked + # until the client responds. + # 3. Wait until the client decides that the server does not support + # expect/continue, and sends the request body on its own. + # (This is suboptimal, and is not recommended.) + # + # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, + # but it seems like it would be a big slowdown for such a rare case. + if self.inheaders.get("Expect", "") == "100-continue": + # Don't use simple_response here, because it emits headers + # we don't want. See + # https://bitbucket.org/cherrypy/cherrypy/issue/951 + msg = self.server.protocol + " 100 Continue\r\n\r\n" + try: + self.conn.wfile.sendall(msg) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return True + + def parse_request_uri(self, uri): + """Parse a Request-URI into (scheme, authority, path). + + Note that Request-URI's must be one of:: + + Request-URI = "*" | absoluteURI | abs_path | authority + + Therefore, a Request-URI which starts with a double forward-slash + cannot be a "net_path":: + + net_path = "//" authority [ abs_path ] + + Instead, it must be interpreted as an "abs_path" with an empty first + path segment:: + + abs_path = "/" path_segments + path_segments = segment *( "/" segment ) + segment = *pchar *( ";" param ) + param = *pchar + """ + if uri == ASTERISK: + return None, None, uri + + i = uri.find('://') + if i > 0 and QUESTION_MARK not in uri[:i]: + # An absoluteURI. + # If there's a scheme (and it must be http or https), then: + # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query + # ]] + scheme, remainder = uri[:i].lower(), uri[i + 3:] + authority, path = remainder.split(FORWARD_SLASH, 1) + path = FORWARD_SLASH + path + return scheme, authority, path + + if uri.startswith(FORWARD_SLASH): + # An abs_path. + return None, None, uri + else: + # An authority. + return None, uri, None + + def respond(self): + """Call the gateway and write its iterable output.""" + mrbs = self.server.max_request_body_size + if self.chunked_read: + self.rfile = ChunkedRFile(self.conn.rfile, mrbs) + else: + cl = int(self.inheaders.get("Content-Length", 0)) + if mrbs and mrbs < cl: + if not self.sent_headers: + self.simple_response( + "413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return + self.rfile = KnownLengthRFile(self.conn.rfile, cl) + + self.server.gateway(self).respond() + + if (self.ready and not self.sent_headers): + self.sent_headers = True + self.send_headers() + if self.chunked_write: + self.conn.wfile.sendall("0\r\n\r\n") + + def simple_response(self, status, msg=""): + """Write a simple response back to the client.""" + status = str(status) + buf = [self.server.protocol + SPACE + + status + CRLF, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n"] + + if status[:3] in ("413", "414"): + # Request Entity Too Large / Request-URI Too Long + self.close_connection = True + if self.response_protocol == 'HTTP/1.1': + # This will not be true for 414, since read_request_line + # usually raises 414 before reading the whole line, and we + # therefore cannot know the proper response_protocol. + buf.append("Connection: close\r\n") + else: + # HTTP/1.0 had no 413/414 status nor Connection header. + # Emit 400 instead and trust the message body is enough. + status = "400 Bad Request" + + buf.append(CRLF) + if msg: + if isinstance(msg, unicodestr): + msg = msg.encode("ISO-8859-1") + buf.append(msg) + + try: + self.conn.wfile.sendall("".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + + def write(self, chunk): + """Write unbuffered data to the client.""" + if self.chunked_write and chunk: + buf = [hex(len(chunk))[2:], CRLF, chunk, CRLF] + self.conn.wfile.sendall(EMPTY.join(buf)) + else: + self.conn.wfile.sendall(chunk) + + def send_headers(self): + """Assert, process, and send the HTTP response message-headers. + + You must set self.status, and self.outheaders before calling this. + """ + hkeys = [key.lower() for key, value in self.outheaders] + status = int(self.status[:3]) + + if status == 413: + # Request Entity Too Large. Close conn to avoid garbage. + self.close_connection = True + elif "content-length" not in hkeys: + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." So no point chunking. + if status < 200 or status in (204, 205, 304): + pass + else: + if (self.response_protocol == 'HTTP/1.1' + and self.method != 'HEAD'): + # Use the chunked transfer-coding + self.chunked_write = True + self.outheaders.append(("Transfer-Encoding", "chunked")) + else: + # Closing the conn is the only way to determine len. + self.close_connection = True + + if "connection" not in hkeys: + if self.response_protocol == 'HTTP/1.1': + # Both server and client are HTTP/1.1 or better + if self.close_connection: + self.outheaders.append(("Connection", "close")) + else: + # Server and/or client are HTTP/1.0 + if not self.close_connection: + self.outheaders.append(("Connection", "Keep-Alive")) + + if (not self.close_connection) and (not self.chunked_read): + # Read any remaining request body data on the socket. + # "If an origin server receives a request that does not include an + # Expect request-header field with the "100-continue" expectation, + # the request includes a request body, and the server responds + # with a final status code before reading the entire request body + # from the transport connection, then the server SHOULD NOT close + # the transport connection until it has read the entire request, + # or until the client closes the connection. Otherwise, the client + # might not reliably receive the response message. However, this + # requirement is not be construed as preventing a server from + # defending itself against denial-of-service attacks, or from + # badly broken client implementations." + remaining = getattr(self.rfile, 'remaining', 0) + if remaining > 0: + self.rfile.read(remaining) + + if "date" not in hkeys: + self.outheaders.append(("Date", rfc822.formatdate())) + + if "server" not in hkeys: + self.outheaders.append(("Server", self.server.server_name)) + + buf = [self.server.protocol + SPACE + self.status + CRLF] + for k, v in self.outheaders: + buf.append(k + COLON + SPACE + v + CRLF) + buf.append(CRLF) + self.conn.wfile.sendall(EMPTY.join(buf)) + + +class NoSSLError(Exception): + + """Exception raised when a client speaks HTTP to an HTTPS socket.""" + pass + + +class FatalSSLAlert(Exception): + + """Exception raised when the SSL implementation signals a fatal alert.""" + pass + + +class CP_fileobject(socket._fileobject): + + """Faux file object attached to a socket object.""" + + def __init__(self, *args, **kwargs): + self.bytes_read = 0 + self.bytes_written = 0 + socket._fileobject.__init__(self, *args, **kwargs) + + def sendall(self, data): + """Sendall for non-blocking sockets.""" + while data: + try: + bytes_sent = self.send(data) + data = data[bytes_sent:] + except socket.error, e: + if e.args[0] not in socket_errors_nonblocking: + raise + + def send(self, data): + bytes_sent = self._sock.send(data) + self.bytes_written += bytes_sent + return bytes_sent + + def flush(self): + if self._wbuf: + buffer = "".join(self._wbuf) + self._wbuf = [] + self.sendall(buffer) + + def recv(self, size): + while True: + try: + data = self._sock.recv(size) + self.bytes_read += len(data) + return data + except socket.error, e: + if (e.args[0] not in socket_errors_nonblocking + and e.args[0] not in socket_error_eintr): + raise + + if not _fileobject_uses_str_type: + def read(self, size=-1): + # Use max, disallow tiny reads in a loop as they are very + # inefficient. + # We never leave read() with any leftover data from a new recv() + # call in our internal buffer. + rbufsize = max(self._rbufsize, self.default_bufsize) + # Our use of StringIO rather than lists of string objects returned + # by recv() minimizes memory usage and fragmentation that occurs + # when rbufsize is large compared to the typical return value of + # recv(). + buf = self._rbuf + buf.seek(0, 2) # seek end + if size < 0: + # Read until EOF + # reset _rbuf. we consume it via buf. + self._rbuf = StringIO.StringIO() + while True: + data = self.recv(rbufsize) + if not data: + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or EOF seen, whichever comes first + buf_len = buf.tell() + if buf_len >= size: + # Already have size bytes in our buffer? Extract and + # return. + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return rv + + # reset _rbuf. we consume it via buf. + self._rbuf = StringIO.StringIO() + while True: + left = size - buf_len + # recv() will malloc the amount of memory given as its + # parameter even though it often returns much less data + # than that. The returned data string is short lived + # as we copy it into a StringIO and free it. This avoids + # fragmentation issues on many platforms. + data = self.recv(left) + if not data: + break + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid buffer data copies when: + # - We have no data in our buffer. + # AND + # - Our call to recv returned exactly the + # number of bytes we were asked to read. + return data + if n == left: + buf.write(data) + del data # explicit free + break + assert n <= left, "recv(%d) returned %d bytes" % (left, n) + buf.write(data) + buf_len += n + del data # explicit free + #assert buf_len == buf.tell() + return buf.getvalue() + + def readline(self, size=-1): + buf = self._rbuf + buf.seek(0, 2) # seek end + if buf.tell() > 0: + # check if we already have it in our buffer + buf.seek(0) + bline = buf.readline(size) + if bline.endswith('\n') or len(bline) == size: + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return bline + del bline + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + buf.seek(0) + buffers = [buf.read()] + # reset _rbuf. we consume it via buf. + self._rbuf = StringIO.StringIO() + data = None + recv = self.recv + while data != "\n": + data = recv(1) + if not data: + break + buffers.append(data) + return "".join(buffers) + + buf.seek(0, 2) # seek end + # reset _rbuf. we consume it via buf. + self._rbuf = StringIO.StringIO() + while True: + data = self.recv(self._rbufsize) + if not data: + break + nl = data.find('\n') + if nl >= 0: + nl += 1 + buf.write(data[:nl]) + self._rbuf.write(data[nl:]) + del data + break + buf.write(data) + return buf.getvalue() + else: + # Read until size bytes or \n or EOF seen, whichever comes + # first + buf.seek(0, 2) # seek end + buf_len = buf.tell() + if buf_len >= size: + buf.seek(0) + rv = buf.read(size) + self._rbuf = StringIO.StringIO() + self._rbuf.write(buf.read()) + return rv + # reset _rbuf. we consume it via buf. + self._rbuf = StringIO.StringIO() + while True: + data = self.recv(self._rbufsize) + if not data: + break + left = size - buf_len + # did we just receive a newline? + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + # save the excess data to _rbuf + self._rbuf.write(data[nl:]) + if buf_len: + buf.write(data[:nl]) + break + else: + # Shortcut. Avoid data copy through buf when + # returning a substring of our first recv(). + return data[:nl] + n = len(data) + if n == size and not buf_len: + # Shortcut. Avoid data copy through buf when + # returning exactly all of our first recv(). + return data + if n >= left: + buf.write(data[:left]) + self._rbuf.write(data[left:]) + break + buf.write(data) + buf_len += n + #assert buf_len == buf.tell() + return buf.getvalue() + else: + def read(self, size=-1): + if size < 0: + # Read until EOF + buffers = [self._rbuf] + self._rbuf = "" + if self._rbufsize <= 1: + recv_size = self.default_bufsize + else: + recv_size = self._rbufsize + + while True: + data = self.recv(recv_size) + if not data: + break + buffers.append(data) + return "".join(buffers) + else: + # Read until size bytes or EOF seen, whichever comes first + data = self._rbuf + buf_len = len(data) + if buf_len >= size: + self._rbuf = data[size:] + return data[:size] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + left = size - buf_len + recv_size = max(self._rbufsize, left) + data = self.recv(recv_size) + if not data: + break + buffers.append(data) + n = len(data) + if n >= left: + self._rbuf = data[left:] + buffers[-1] = data[:left] + break + buf_len += n + return "".join(buffers) + + def readline(self, size=-1): + data = self._rbuf + if size < 0: + # Read until \n or EOF, whichever comes first + if self._rbufsize <= 1: + # Speed up unbuffered case + assert data == "" + buffers = [] + while data != "\n": + data = self.recv(1) + if not data: + break + buffers.append(data) + return "".join(buffers) + nl = data.find('\n') + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + return data[:nl] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + data = self.recv(self._rbufsize) + if not data: + break + buffers.append(data) + nl = data.find('\n') + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + buffers[-1] = data[:nl] + break + return "".join(buffers) + else: + # Read until size bytes or \n or EOF seen, whichever comes + # first + nl = data.find('\n', 0, size) + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + return data[:nl] + buf_len = len(data) + if buf_len >= size: + self._rbuf = data[size:] + return data[:size] + buffers = [] + if data: + buffers.append(data) + self._rbuf = "" + while True: + data = self.recv(self._rbufsize) + if not data: + break + buffers.append(data) + left = size - buf_len + nl = data.find('\n', 0, left) + if nl >= 0: + nl += 1 + self._rbuf = data[nl:] + buffers[-1] = data[:nl] + break + n = len(data) + if n >= left: + self._rbuf = data[left:] + buffers[-1] = data[:left] + break + buf_len += n + return "".join(buffers) + + +class HTTPConnection(object): + + """An HTTP connection (active socket). + + server: the Server object which received this connection. + socket: the raw socket object (usually TCP) for this connection. + makefile: a fileobject class for reading from the socket. + """ + + remote_addr = None + remote_port = None + ssl_env = None + rbufsize = DEFAULT_BUFFER_SIZE + wbufsize = DEFAULT_BUFFER_SIZE + RequestHandlerClass = HTTPRequest + + def __init__(self, server, sock, makefile=CP_fileobject): + self.server = server + self.socket = sock + self.rfile = makefile(sock, "rb", self.rbufsize) + self.wfile = makefile(sock, "wb", self.wbufsize) + self.requests_seen = 0 + + def communicate(self): + """Read each request and respond appropriately.""" + request_seen = False + try: + while True: + # (re)set req to None so that if something goes wrong in + # the RequestHandlerClass constructor, the error doesn't + # get written to the previous request. + req = None + req = self.RequestHandlerClass(self.server, self) + + # This order of operations should guarantee correct pipelining. + req.parse_request() + if self.server.stats['Enabled']: + self.requests_seen += 1 + if not req.ready: + # Something went wrong in the parsing (and the server has + # probably already made a simple_response). Return and + # let the conn close. + return + + request_seen = True + req.respond() + if req.close_connection: + return + except socket.error: + e = sys.exc_info()[1] + errnum = e.args[0] + # sadly SSL sockets return a different (longer) time out string + if ( + errnum == 'timed out' or + errnum == 'The read operation timed out' + ): + # Don't error if we're between requests; only error + # if 1) no request has been started at all, or 2) we're + # in the middle of a request. + # See https://bitbucket.org/cherrypy/cherrypy/issue/853 + if (not request_seen) or (req and req.started_request): + # Don't bother writing the 408 if the response + # has already started being written. + if req and not req.sent_headers: + try: + req.simple_response("408 Request Timeout") + except FatalSSLAlert: + # Close the connection. + return + elif errnum not in socket_errors_to_ignore: + self.server.error_log("socket.error %s" % repr(errnum), + level=logging.WARNING, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + return + except (KeyboardInterrupt, SystemExit): + raise + except FatalSSLAlert: + # Close the connection. + return + except NoSSLError: + if req and not req.sent_headers: + # Unwrap our wfile + self.wfile = CP_fileobject( + self.socket._sock, "wb", self.wbufsize) + req.simple_response( + "400 Bad Request", + "The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + self.linger = True + except Exception: + e = sys.exc_info()[1] + self.server.error_log(repr(e), level=logging.ERROR, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + + linger = False + + def close(self): + """Close the socket underlying this connection.""" + self.rfile.close() + + if not self.linger: + # Python's socket module does NOT call close on the kernel + # socket when you call socket.close(). We do so manually here + # because we want this server to send a FIN TCP segment + # immediately. Note this must be called *before* calling + # socket.close(), because the latter drops its reference to + # the kernel socket. + if hasattr(self.socket, '_sock'): + self.socket._sock.close() + self.socket.close() + else: + # On the other hand, sometimes we want to hang around for a bit + # to make sure the client has a chance to read our entire + # response. Skipping the close() calls here delays the FIN + # packet until the socket object is garbage-collected later. + # Someday, perhaps, we'll do the full lingering_close that + # Apache does, but not today. + pass + + +class TrueyZero(object): + + """An object which equals and does math like the integer 0 but evals True. + """ + + def __add__(self, other): + return other + + def __radd__(self, other): + return other +trueyzero = TrueyZero() + + +_SHUTDOWNREQUEST = None + + +class WorkerThread(threading.Thread): + + """Thread which continuously polls a Queue for Connection objects. + + Due to the timing issues of polling a Queue, a WorkerThread does not + check its own 'ready' flag after it has started. To stop the thread, + it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue + (one for each running WorkerThread). + """ + + conn = None + """The current connection pulled off the Queue, or None.""" + + server = None + """The HTTP Server which spawned this thread, and which owns the + Queue and is placing active connections into it.""" + + ready = False + """A simple flag for the calling server to know when this thread + has begun polling the Queue.""" + + def __init__(self, server): + self.ready = False + self.server = server + + self.requests_seen = 0 + self.bytes_read = 0 + self.bytes_written = 0 + self.start_time = None + self.work_time = 0 + self.stats = { + 'Requests': lambda s: self.requests_seen + ( + (self.start_time is None) and + trueyzero or + self.conn.requests_seen + ), + 'Bytes Read': lambda s: self.bytes_read + ( + (self.start_time is None) and + trueyzero or + self.conn.rfile.bytes_read + ), + 'Bytes Written': lambda s: self.bytes_written + ( + (self.start_time is None) and + trueyzero or + self.conn.wfile.bytes_written + ), + 'Work Time': lambda s: self.work_time + ( + (self.start_time is None) and + trueyzero or + time.time() - self.start_time + ), + 'Read Throughput': lambda s: s['Bytes Read'](s) / ( + s['Work Time'](s) or 1e-6), + 'Write Throughput': lambda s: s['Bytes Written'](s) / ( + s['Work Time'](s) or 1e-6), + } + threading.Thread.__init__(self) + + def run(self): + self.server.stats['Worker Threads'][self.getName()] = self.stats + try: + self.ready = True + while True: + conn = self.server.requests.get() + if conn is _SHUTDOWNREQUEST: + return + + self.conn = conn + if self.server.stats['Enabled']: + self.start_time = time.time() + try: + conn.communicate() + finally: + conn.close() + if self.server.stats['Enabled']: + self.requests_seen += self.conn.requests_seen + self.bytes_read += self.conn.rfile.bytes_read + self.bytes_written += self.conn.wfile.bytes_written + self.work_time += time.time() - self.start_time + self.start_time = None + self.conn = None + except (KeyboardInterrupt, SystemExit): + exc = sys.exc_info()[1] + self.server.interrupt = exc + + +class ThreadPool(object): + + """A Request Queue for an HTTPServer which pools threads. + + ThreadPool objects must provide min, get(), put(obj), start() + and stop(timeout) attributes. + """ + + def __init__(self, server, min=10, max=-1, + accepted_queue_size=-1, accepted_queue_timeout=10): + self.server = server + self.min = min + self.max = max + self._threads = [] + self._queue = queue.Queue(maxsize=accepted_queue_size) + self._queue_put_timeout = accepted_queue_timeout + self.get = self._queue.get + + def start(self): + """Start the pool of threads.""" + for i in range(self.min): + self._threads.append(WorkerThread(self.server)) + for worker in self._threads: + worker.setName("CP Server " + worker.getName()) + worker.start() + for worker in self._threads: + while not worker.ready: + time.sleep(.1) + + def _get_idle(self): + """Number of worker threads which are idle. Read-only.""" + return len([t for t in self._threads if t.conn is None]) + idle = property(_get_idle, doc=_get_idle.__doc__) + + def put(self, obj): + self._queue.put(obj, block=True, timeout=self._queue_put_timeout) + if obj is _SHUTDOWNREQUEST: + return + + def grow(self, amount): + """Spawn new worker threads (not above self.max).""" + if self.max > 0: + budget = max(self.max - len(self._threads), 0) + else: + # self.max <= 0 indicates no maximum + budget = float('inf') + + n_new = min(amount, budget) + + workers = [self._spawn_worker() for i in range(n_new)] + while not self._all(operator.attrgetter('ready'), workers): + time.sleep(.1) + self._threads.extend(workers) + + def _spawn_worker(self): + worker = WorkerThread(self.server) + worker.setName("CP Server " + worker.getName()) + worker.start() + return worker + + def _all(func, items): + results = [func(item) for item in items] + return reduce(operator.and_, results, True) + _all = staticmethod(_all) + + def shrink(self, amount): + """Kill off worker threads (not below self.min).""" + # Grow/shrink the pool if necessary. + # Remove any dead threads from our list + for t in self._threads: + if not t.isAlive(): + self._threads.remove(t) + amount -= 1 + + # calculate the number of threads above the minimum + n_extra = max(len(self._threads) - self.min, 0) + + # don't remove more than amount + n_to_remove = min(amount, n_extra) + + # put shutdown requests on the queue equal to the number of threads + # to remove. As each request is processed by a worker, that worker + # will terminate and be culled from the list. + for n in range(n_to_remove): + self._queue.put(_SHUTDOWNREQUEST) + + def stop(self, timeout=5): + # Must shut down threads here so the code that calls + # this method can know when all threads are stopped. + for worker in self._threads: + self._queue.put(_SHUTDOWNREQUEST) + + # Don't join currentThread (when stop is called inside a request). + current = threading.currentThread() + if timeout and timeout >= 0: + endtime = time.time() + timeout + while self._threads: + worker = self._threads.pop() + if worker is not current and worker.isAlive(): + try: + if timeout is None or timeout < 0: + worker.join() + else: + remaining_time = endtime - time.time() + if remaining_time > 0: + worker.join(remaining_time) + if worker.isAlive(): + # We exhausted the timeout. + # Forcibly shut down the socket. + c = worker.conn + if c and not c.rfile.closed: + try: + c.socket.shutdown(socket.SHUT_RD) + except TypeError: + # pyOpenSSL sockets don't take an arg + c.socket.shutdown() + worker.join() + except (AssertionError, + # Ignore repeated Ctrl-C. + # See + # https://bitbucket.org/cherrypy/cherrypy/issue/691. + KeyboardInterrupt): + pass + + def _get_qsize(self): + return self._queue.qsize() + qsize = property(_get_qsize) + + +try: + import fcntl +except ImportError: + try: + from ctypes import windll, WinError + import ctypes.wintypes + _SetHandleInformation = windll.kernel32.SetHandleInformation + _SetHandleInformation.argtypes = [ + ctypes.wintypes.HANDLE, + ctypes.wintypes.DWORD, + ctypes.wintypes.DWORD, + ] + _SetHandleInformation.restype = ctypes.wintypes.BOOL + except ImportError: + def prevent_socket_inheritance(sock): + """Dummy function, since neither fcntl nor ctypes are available.""" + pass + else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (Windows).""" + if not _SetHandleInformation(sock.fileno(), 1, 0): + raise WinError() +else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (POSIX).""" + fd = sock.fileno() + old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) + + +class SSLAdapter(object): + + """Base class for SSL driver library adapters. + + Required methods: + + * ``wrap(sock) -> (wrapped socket, ssl environ dict)`` + * ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) -> + socket file object`` + """ + + def __init__(self, certificate, private_key, certificate_chain=None): + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def wrap(self, sock): + raise NotImplemented + + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + raise NotImplemented + + +class HTTPServer(object): + + """An HTTP server.""" + + _bind_addr = "127.0.0.1" + _interrupt = None + + gateway = None + """A Gateway instance.""" + + minthreads = None + """The minimum number of worker threads to create (default 10).""" + + maxthreads = None + """The maximum number of worker threads to create (default -1 = no limit). + """ + + server_name = None + """The name of the server; defaults to socket.gethostname().""" + + protocol = "HTTP/1.1" + """The version string to write in the Status-Line of all HTTP responses. + + For example, "HTTP/1.1" is the default. This also limits the supported + features used in the response.""" + + request_queue_size = 5 + """The 'backlog' arg to socket.listen(); max queued connections + (default 5). + """ + + shutdown_timeout = 5 + """The total time, in seconds, to wait for worker threads to cleanly exit. + """ + + timeout = 10 + """The timeout in seconds for accepted connections (default 10).""" + + version = "CherryPy/3.6.0" + """A version string for the HTTPServer.""" + + software = None + """The value to set for the SERVER_SOFTWARE entry in the WSGI environ. + + If None, this defaults to ``'%s Server' % self.version``.""" + + ready = False + """An internal flag which marks whether the socket is accepting connections + """ + + max_request_header_size = 0 + """The maximum size, in bytes, for request headers, or 0 for no limit.""" + + max_request_body_size = 0 + """The maximum size, in bytes, for request bodies, or 0 for no limit.""" + + nodelay = True + """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" + + ConnectionClass = HTTPConnection + """The class to use for handling HTTP connections.""" + + ssl_adapter = None + """An instance of SSLAdapter (or a subclass). + + You must have the corresponding SSL driver library installed.""" + + def __init__(self, bind_addr, gateway, minthreads=10, maxthreads=-1, + server_name=None): + self.bind_addr = bind_addr + self.gateway = gateway + + self.requests = ThreadPool(self, min=minthreads or 1, max=maxthreads) + + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.clear_stats() + + def clear_stats(self): + self._start_time = None + self._run_time = 0 + self.stats = { + 'Enabled': False, + 'Bind Address': lambda s: repr(self.bind_addr), + 'Run time': lambda s: (not s['Enabled']) and -1 or self.runtime(), + 'Accepts': 0, + 'Accepts/sec': lambda s: s['Accepts'] / self.runtime(), + 'Queue': lambda s: getattr(self.requests, "qsize", None), + 'Threads': lambda s: len(getattr(self.requests, "_threads", [])), + 'Threads Idle': lambda s: getattr(self.requests, "idle", None), + 'Socket Errors': 0, + 'Requests': lambda s: (not s['Enabled']) and -1 or sum( + [w['Requests'](w) for w in s['Worker Threads'].values()], 0), + 'Bytes Read': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Read'](w) for w in s['Worker Threads'].values()], 0), + 'Bytes Written': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Written'](w) for w in s['Worker Threads'].values()], + 0), + 'Work Time': lambda s: (not s['Enabled']) and -1 or sum( + [w['Work Time'](w) for w in s['Worker Threads'].values()], 0), + 'Read Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Read'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Write Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Written'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Worker Threads': {}, + } + logging.statistics["CherryPy HTTPServer %d" % id(self)] = self.stats + + def runtime(self): + if self._start_time is None: + return self._run_time + else: + return self._run_time + (time.time() - self._start_time) + + def __str__(self): + return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, + self.bind_addr) + + def _get_bind_addr(self): + return self._bind_addr + + def _set_bind_addr(self, value): + if isinstance(value, tuple) and value[0] in ('', None): + # Despite the socket module docs, using '' does not + # allow AI_PASSIVE to work. Passing None instead + # returns '0.0.0.0' like we want. In other words: + # host AI_PASSIVE result + # '' Y 192.168.x.y + # '' N 192.168.x.y + # None Y 0.0.0.0 + # None N 127.0.0.1 + # But since you can get the same effect with an explicit + # '0.0.0.0', we deny both the empty string and None as values. + raise ValueError("Host values of '' or None are not allowed. " + "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " + "to listen on all active interfaces.") + self._bind_addr = value + bind_addr = property( + _get_bind_addr, + _set_bind_addr, + doc="""The interface on which to listen for connections. + + For TCP sockets, a (host, port) tuple. Host values may be any IPv4 + or IPv6 address, or any valid hostname. The string 'localhost' is a + synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). + The string '0.0.0.0' is a special IPv4 entry meaning "any active + interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for + IPv6. The empty string or None are not allowed. + + For UNIX sockets, supply the filename as a string.""") + + def start(self): + """Run the server forever.""" + # We don't have to trap KeyboardInterrupt or SystemExit here, + # because cherrpy.server already does so, calling self.stop() for us. + # If you're using this server with another framework, you should + # trap those exceptions in whatever code block calls start(). + self._interrupt = None + + if self.software is None: + self.software = "%s Server" % self.version + + # SSL backward compatibility + if (self.ssl_adapter is None and + getattr(self, 'ssl_certificate', None) and + getattr(self, 'ssl_private_key', None)): + warnings.warn( + "SSL attributes are deprecated in CherryPy 3.2, and will " + "be removed in CherryPy 3.3. Use an ssl_adapter attribute " + "instead.", + DeprecationWarning + ) + try: + from cherrypy.wsgiserver.ssl_pyopenssl import pyOpenSSLAdapter + except ImportError: + pass + else: + self.ssl_adapter = pyOpenSSLAdapter( + self.ssl_certificate, self.ssl_private_key, + getattr(self, 'ssl_certificate_chain', None)) + + # Select the appropriate socket + if isinstance(self.bind_addr, basestring): + # AF_UNIX socket + + # So we can reuse the socket... + try: + os.unlink(self.bind_addr) + except: + pass + + # So everyone can access the socket... + try: + os.chmod(self.bind_addr, 511) # 0777 + except: + pass + + info = [ + (socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] + else: + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 + # addresses) + host, port = self.bind_addr + try: + info = socket.getaddrinfo( + host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM, 0, socket.AI_PASSIVE) + except socket.gaierror: + if ':' in self.bind_addr[0]: + info = [(socket.AF_INET6, socket.SOCK_STREAM, + 0, "", self.bind_addr + (0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, + 0, "", self.bind_addr)] + + self.socket = None + msg = "No socket could be created" + for res in info: + af, socktype, proto, canonname, sa = res + try: + self.bind(af, socktype, proto) + except socket.error, serr: + msg = "%s -- (%s: %s)" % (msg, sa, serr) + if self.socket: + self.socket.close() + self.socket = None + continue + break + if not self.socket: + raise socket.error(msg) + + # Timeout so KeyboardInterrupt can be caught on Win32 + self.socket.settimeout(1) + self.socket.listen(self.request_queue_size) + + # Create worker threads + self.requests.start() + + self.ready = True + self._start_time = time.time() + while self.ready: + try: + self.tick() + except (KeyboardInterrupt, SystemExit): + raise + except: + self.error_log("Error in HTTPServer.tick", level=logging.ERROR, + traceback=True) + + if self.interrupt: + while self.interrupt is True: + # Wait for self.stop() to complete. See _set_interrupt. + time.sleep(0.1) + if self.interrupt: + raise self.interrupt + + def error_log(self, msg="", level=20, traceback=False): + # Override this in subclasses as desired + sys.stderr.write(msg + '\n') + sys.stderr.flush() + if traceback: + tblines = format_exc() + sys.stderr.write(tblines) + sys.stderr.flush() + + def bind(self, family, type, proto=0): + """Create (or recreate) the actual socket object.""" + self.socket = socket.socket(family, type, proto) + prevent_socket_inheritance(self.socket) + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if self.nodelay and not isinstance(self.bind_addr, str): + self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + + if self.ssl_adapter is not None: + self.socket = self.ssl_adapter.bind(self.socket) + + # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), + # activate dual-stack. See + # https://bitbucket.org/cherrypy/cherrypy/issue/871. + if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6 + and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')): + try: + self.socket.setsockopt( + socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + except (AttributeError, socket.error): + # Apparently, the socket option is not available in + # this machine's TCP stack + pass + + self.socket.bind(self.bind_addr) + + def tick(self): + """Accept a new connection and put it on the Queue.""" + try: + s, addr = self.socket.accept() + if self.stats['Enabled']: + self.stats['Accepts'] += 1 + if not self.ready: + return + + prevent_socket_inheritance(s) + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + makefile = CP_fileobject + ssl_env = {} + # if ssl cert and key are set, we try to be a secure HTTP server + if self.ssl_adapter is not None: + try: + s, ssl_env = self.ssl_adapter.wrap(s) + except NoSSLError: + msg = ("The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + buf = ["%s 400 Bad Request\r\n" % self.protocol, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n\r\n", + msg] + + wfile = makefile(s._sock, "wb", DEFAULT_BUFFER_SIZE) + try: + wfile.sendall("".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return + if not s: + return + makefile = self.ssl_adapter.makefile + # Re-apply our timeout since we may have a new socket object + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + conn = self.ConnectionClass(self, s, makefile) + + if not isinstance(self.bind_addr, basestring): + # optional values + # Until we do DNS lookups, omit REMOTE_HOST + if addr is None: # sometimes this can happen + # figure out if AF_INET or AF_INET6. + if len(s.getsockname()) == 2: + # AF_INET + addr = ('0.0.0.0', 0) + else: + # AF_INET6 + addr = ('::', 0) + conn.remote_addr = addr[0] + conn.remote_port = addr[1] + + conn.ssl_env = ssl_env + + try: + self.requests.put(conn) + except queue.Full: + # Just drop the conn. TODO: write 503 back? + conn.close() + return + except socket.timeout: + # The only reason for the timeout in start() is so we can + # notice keyboard interrupts on Win32, which don't interrupt + # accept() by default + return + except socket.error: + x = sys.exc_info()[1] + if self.stats['Enabled']: + self.stats['Socket Errors'] += 1 + if x.args[0] in socket_error_eintr: + # I *think* this is right. EINTR should occur when a signal + # is received during the accept() call; all docs say retry + # the call, and I *think* I'm reading it right that Python + # will then go ahead and poll for and handle the signal + # elsewhere. See + # https://bitbucket.org/cherrypy/cherrypy/issue/707. + return + if x.args[0] in socket_errors_nonblocking: + # Just try again. See + # https://bitbucket.org/cherrypy/cherrypy/issue/479. + return + if x.args[0] in socket_errors_to_ignore: + # Our socket was closed. + # See https://bitbucket.org/cherrypy/cherrypy/issue/686. + return + raise + + def _get_interrupt(self): + return self._interrupt + + def _set_interrupt(self, interrupt): + self._interrupt = True + self.stop() + self._interrupt = interrupt + interrupt = property(_get_interrupt, _set_interrupt, + doc="Set this to an Exception instance to " + "interrupt the server.") + + def stop(self): + """Gracefully shutdown a server that is serving forever.""" + self.ready = False + if self._start_time is not None: + self._run_time += (time.time() - self._start_time) + self._start_time = None + + sock = getattr(self, "socket", None) + if sock: + if not isinstance(self.bind_addr, basestring): + # Touch our own socket to make accept() return immediately. + try: + host, port = sock.getsockname()[:2] + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + # Changed to use error code and not message + # See + # https://bitbucket.org/cherrypy/cherrypy/issue/860. + raise + else: + # Note that we're explicitly NOT using AI_PASSIVE, + # here, because we want an actual IP to touch. + # localhost won't work if we've bound to a public IP, + # but it will if we bound to '0.0.0.0' (INADDR_ANY). + for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See + # http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(1.0) + s.connect((host, port)) + s.close() + except socket.error: + if s: + s.close() + if hasattr(sock, "close"): + sock.close() + self.socket = None + + self.requests.stop(self.shutdown_timeout) + + +class Gateway(object): + + """A base class to interface HTTPServer with other systems, such as WSGI. + """ + + def __init__(self, req): + self.req = req + + def respond(self): + """Process the current request. Must be overridden in a subclass.""" + raise NotImplemented + + +# These may either be wsgiserver.SSLAdapter subclasses or the string names +# of such classes (in which case they will be lazily loaded). +ssl_adapters = { + 'builtin': 'cherrypy.wsgiserver.ssl_builtin.BuiltinSSLAdapter', + 'pyopenssl': 'cherrypy.wsgiserver.ssl_pyopenssl.pyOpenSSLAdapter', +} + + +def get_ssl_adapter_class(name='pyopenssl'): + """Return an SSL adapter class for the given name.""" + adapter = ssl_adapters[name.lower()] + if isinstance(adapter, basestring): + last_dot = adapter.rfind(".") + attr_name = adapter[last_dot + 1:] + mod_path = adapter[:last_dot] + + try: + mod = sys.modules[mod_path] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(mod_path, globals(), locals(), ['']) + + # Let an AttributeError propagate outward. + try: + adapter = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + return adapter + +# ------------------------------- WSGI Stuff -------------------------------- # + + +class CherryPyWSGIServer(HTTPServer): + + """A subclass of HTTPServer which calls a WSGI application.""" + + wsgi_version = (1, 0) + """The version of WSGI to produce.""" + + def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, + max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5, + accepted_queue_size=-1, accepted_queue_timeout=10): + self.requests = ThreadPool(self, min=numthreads or 1, max=max, + accepted_queue_size=accepted_queue_size, + accepted_queue_timeout=accepted_queue_timeout) + self.wsgi_app = wsgi_app + self.gateway = wsgi_gateways[self.wsgi_version] + + self.bind_addr = bind_addr + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.request_queue_size = request_queue_size + + self.timeout = timeout + self.shutdown_timeout = shutdown_timeout + self.clear_stats() + + def _get_numthreads(self): + return self.requests.min + + def _set_numthreads(self, value): + self.requests.min = value + numthreads = property(_get_numthreads, _set_numthreads) + + +class WSGIGateway(Gateway): + + """A base class to interface HTTPServer with WSGI.""" + + def __init__(self, req): + self.req = req + self.started_response = False + self.env = self.get_environ() + self.remaining_bytes_out = None + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + raise NotImplemented + + def respond(self): + """Process the current request.""" + response = self.req.server.wsgi_app(self.env, self.start_response) + try: + for chunk in response: + # "The start_response callable must not actually transmit + # the response headers. Instead, it must store them for the + # server or gateway to transmit only after the first + # iteration of the application return value that yields + # a NON-EMPTY string, or upon the application's first + # invocation of the write() callable." (PEP 333) + if chunk: + if isinstance(chunk, unicodestr): + chunk = chunk.encode('ISO-8859-1') + self.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + + def start_response(self, status, headers, exc_info=None): + """WSGI callable to begin the HTTP response.""" + # "The application may call start_response more than once, + # if and only if the exc_info argument is provided." + if self.started_response and not exc_info: + raise AssertionError("WSGI start_response called a second " + "time with no exc_info.") + self.started_response = True + + # "if exc_info is provided, and the HTTP headers have already been + # sent, start_response must raise an error, and should raise the + # exc_info tuple." + if self.req.sent_headers: + try: + raise exc_info[0], exc_info[1], exc_info[2] + finally: + exc_info = None + + self.req.status = status + for k, v in headers: + if not isinstance(k, str): + raise TypeError( + "WSGI response header key %r is not of type str." % k) + if not isinstance(v, str): + raise TypeError( + "WSGI response header value %r is not of type str." % v) + if k.lower() == 'content-length': + self.remaining_bytes_out = int(v) + self.req.outheaders.extend(headers) + + return self.write + + def write(self, chunk): + """WSGI callable to write unbuffered data to the client. + + This method is also used internally by start_response (to write + data from the iterable returned by the WSGI application). + """ + if not self.started_response: + raise AssertionError("WSGI write called before start_response.") + + chunklen = len(chunk) + rbo = self.remaining_bytes_out + if rbo is not None and chunklen > rbo: + if not self.req.sent_headers: + # Whew. We can send a 500 to the client. + self.req.simple_response( + "500 Internal Server Error", + "The requested resource returned more bytes than the " + "declared Content-Length.") + else: + # Dang. We have probably already sent data. Truncate the chunk + # to fit (so the client doesn't hang) and raise an error later. + chunk = chunk[:rbo] + + if not self.req.sent_headers: + self.req.sent_headers = True + self.req.send_headers() + + self.req.write(chunk) + + if rbo is not None: + rbo -= chunklen + if rbo < 0: + raise ValueError( + "Response body exceeds the declared Content-Length.") + + +class WSGIGateway_10(WSGIGateway): + + """A Gateway class to interface HTTPServer with WSGI 1.0.x.""" + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env = { + # set a non-standard environ entry so the WSGI app can know what + # the *real* server protocol is (and what features to support). + # See http://www.faqs.org/rfcs/rfc2145.html. + 'ACTUAL_SERVER_PROTOCOL': req.server.protocol, + 'PATH_INFO': req.path, + 'QUERY_STRING': req.qs, + 'REMOTE_ADDR': req.conn.remote_addr or '', + 'REMOTE_PORT': str(req.conn.remote_port or ''), + 'REQUEST_METHOD': req.method, + 'REQUEST_URI': req.uri, + 'SCRIPT_NAME': '', + 'SERVER_NAME': req.server.server_name, + # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. + 'SERVER_PROTOCOL': req.request_protocol, + 'SERVER_SOFTWARE': req.server.software, + 'wsgi.errors': sys.stderr, + 'wsgi.input': req.rfile, + 'wsgi.multiprocess': False, + 'wsgi.multithread': True, + 'wsgi.run_once': False, + 'wsgi.url_scheme': req.scheme, + 'wsgi.version': (1, 0), + } + + if isinstance(req.server.bind_addr, basestring): + # AF_UNIX. This isn't really allowed by WSGI, which doesn't + # address unix domain sockets. But it's better than nothing. + env["SERVER_PORT"] = "" + else: + env["SERVER_PORT"] = str(req.server.bind_addr[1]) + + # Request headers + for k, v in req.inheaders.iteritems(): + env["HTTP_" + k.upper().replace("-", "_")] = v + + # CONTENT_TYPE/CONTENT_LENGTH + ct = env.pop("HTTP_CONTENT_TYPE", None) + if ct is not None: + env["CONTENT_TYPE"] = ct + cl = env.pop("HTTP_CONTENT_LENGTH", None) + if cl is not None: + env["CONTENT_LENGTH"] = cl + + if req.conn.ssl_env: + env.update(req.conn.ssl_env) + + return env + + +class WSGIGateway_u0(WSGIGateway_10): + + """A Gateway class to interface HTTPServer with WSGI u.0. + + WSGI u.0 is an experimental protocol, which uses unicode for keys and + values in both Python 2 and Python 3. + """ + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env_10 = WSGIGateway_10.get_environ(self) + env = dict([(k.decode('ISO-8859-1'), v) + for k, v in env_10.iteritems()]) + env[u'wsgi.version'] = ('u', 0) + + # Request-URI + env.setdefault(u'wsgi.url_encoding', u'utf-8') + try: + for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]: + env[key] = env_10[str(key)].decode(env[u'wsgi.url_encoding']) + except UnicodeDecodeError: + # Fall back to latin 1 so apps can transcode if needed. + env[u'wsgi.url_encoding'] = u'ISO-8859-1' + for key in [u"PATH_INFO", u"SCRIPT_NAME", u"QUERY_STRING"]: + env[key] = env_10[str(key)].decode(env[u'wsgi.url_encoding']) + + for k, v in sorted(env.items()): + if isinstance(v, str) and k not in ('REQUEST_URI', 'wsgi.input'): + env[k] = v.decode('ISO-8859-1') + + return env + +wsgi_gateways = { + (1, 0): WSGIGateway_10, + ('u', 0): WSGIGateway_u0, +} + + +class WSGIPathInfoDispatcher(object): + + """A WSGI dispatcher for dispatch based on the PATH_INFO. + + apps: a dict or list of (path_prefix, app) pairs. + """ + + def __init__(self, apps): + try: + apps = list(apps.items()) + except AttributeError: + pass + + # Sort the apps by len(path), descending + apps.sort(cmp=lambda x, y: cmp(len(x[0]), len(y[0]))) + apps.reverse() + + # The path_prefix strings must start, but not end, with a slash. + # Use "" instead of "/". + self.apps = [(p.rstrip("/"), a) for p, a in apps] + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] or "/" + for p, app in self.apps: + # The apps list should be sorted by length, descending. + if path.startswith(p + "/") or path == p: + environ = environ.copy() + environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p + environ["PATH_INFO"] = path[len(p):] + return app(environ, start_response) + + start_response('404 Not Found', [('Content-Type', 'text/plain'), + ('Content-Length', '0')]) + return [''] diff --git a/lib/cherrypy/wsgiserver/wsgiserver3.py b/lib/cherrypy/wsgiserver/wsgiserver3.py new file mode 100644 index 00000000..8bba6261 --- /dev/null +++ b/lib/cherrypy/wsgiserver/wsgiserver3.py @@ -0,0 +1,2176 @@ +"""A high-speed, production ready, thread pooled, generic HTTP server. + +Simplest example on how to use this module directly +(without using CherryPy's application machinery):: + + from cherrypy import wsgiserver + + def my_crazy_app(environ, start_response): + status = '200 OK' + response_headers = [('Content-type','text/plain')] + start_response(status, response_headers) + return ['Hello world!'] + + server = wsgiserver.CherryPyWSGIServer( + ('0.0.0.0', 8070), my_crazy_app, + server_name='www.cherrypy.example') + server.start() + +The CherryPy WSGI server can serve as many WSGI applications +as you want in one instance by using a WSGIPathInfoDispatcher:: + + d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app}) + server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d) + +Want SSL support? Just set server.ssl_adapter to an SSLAdapter instance. + +This won't call the CherryPy engine (application side) at all, only the +HTTP server, which is independent from the rest of CherryPy. Don't +let the name "CherryPyWSGIServer" throw you; the name merely reflects +its origin, not its coupling. + +For those of you wanting to understand internals of this module, here's the +basic call flow. The server's listening thread runs a very tight loop, +sticking incoming connections onto a Queue:: + + server = CherryPyWSGIServer(...) + server.start() + while True: + tick() + # This blocks until a request comes in: + child = socket.accept() + conn = HTTPConnection(child, ...) + server.requests.put(conn) + +Worker threads are kept in a pool and poll the Queue, popping off and then +handling each connection in turn. Each connection can consist of an arbitrary +number of requests and their responses, so we run a nested loop:: + + while True: + conn = server.requests.get() + conn.communicate() + -> while True: + req = HTTPRequest(...) + req.parse_request() + -> # Read the Request-Line, e.g. "GET /page HTTP/1.1" + req.rfile.readline() + read_headers(req.rfile, req.inheaders) + req.respond() + -> response = app(...) + try: + for chunk in response: + if chunk: + req.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + if req.close_connection: + return +""" + +__all__ = ['HTTPRequest', 'HTTPConnection', 'HTTPServer', + 'SizeCheckWrapper', 'KnownLengthRFile', 'ChunkedRFile', + 'CP_makefile', + 'MaxSizeExceeded', 'NoSSLError', 'FatalSSLAlert', + 'WorkerThread', 'ThreadPool', 'SSLAdapter', + 'CherryPyWSGIServer', + 'Gateway', 'WSGIGateway', 'WSGIGateway_10', 'WSGIGateway_u0', + 'WSGIPathInfoDispatcher', 'get_ssl_adapter_class'] + +import os +try: + import queue +except: + import Queue as queue +import re +import email.utils +import socket +import sys +if 'win' in sys.platform and hasattr(socket, "AF_INET6"): + if not hasattr(socket, 'IPPROTO_IPV6'): + socket.IPPROTO_IPV6 = 41 + if not hasattr(socket, 'IPV6_V6ONLY'): + socket.IPV6_V6ONLY = 27 +if sys.version_info < (3, 1): + import io +else: + import _pyio as io +DEFAULT_BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE + +import threading +import time +from traceback import format_exc + +if sys.version_info >= (3, 0): + bytestr = bytes + unicodestr = str + basestring = (bytes, str) + + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given + encoding. + """ + # In Python 3, the native string type is unicode + return n.encode(encoding) +else: + bytestr = str + unicodestr = unicode + basestring = basestring + + def ntob(n, encoding='ISO-8859-1'): + """Return the given native string as a byte string in the given + encoding. + """ + # In Python 2, the native string type is bytes. Assume it's already + # in the given encoding, which for ISO-8859-1 is almost always what + # was intended. + return n + +LF = ntob('\n') +CRLF = ntob('\r\n') +TAB = ntob('\t') +SPACE = ntob(' ') +COLON = ntob(':') +SEMICOLON = ntob(';') +EMPTY = ntob('') +NUMBER_SIGN = ntob('#') +QUESTION_MARK = ntob('?') +ASTERISK = ntob('*') +FORWARD_SLASH = ntob('/') +quoted_slash = re.compile(ntob("(?i)%2F")) + +import errno + + +def plat_specific_errors(*errnames): + """Return error numbers for all errors in errnames on this platform. + + The 'errno' module contains different global constants depending on + the specific platform (OS). This function will return the list of + numeric values for a given list of potential names. + """ + errno_names = dir(errno) + nums = [getattr(errno, k) for k in errnames if k in errno_names] + # de-dupe the list + return list(dict.fromkeys(nums).keys()) + +socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR") + +socket_errors_to_ignore = plat_specific_errors( + "EPIPE", + "EBADF", "WSAEBADF", + "ENOTSOCK", "WSAENOTSOCK", + "ETIMEDOUT", "WSAETIMEDOUT", + "ECONNREFUSED", "WSAECONNREFUSED", + "ECONNRESET", "WSAECONNRESET", + "ECONNABORTED", "WSAECONNABORTED", + "ENETRESET", "WSAENETRESET", + "EHOSTDOWN", "EHOSTUNREACH", +) +socket_errors_to_ignore.append("timed out") +socket_errors_to_ignore.append("The read operation timed out") + +socket_errors_nonblocking = plat_specific_errors( + 'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK') + +comma_separated_headers = [ + ntob(h) for h in + ['Accept', 'Accept-Charset', 'Accept-Encoding', + 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', + 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', + 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', + 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', + 'WWW-Authenticate'] +] + + +import logging +if not hasattr(logging, 'statistics'): + logging.statistics = {} + + +def read_headers(rfile, hdict=None): + """Read headers from the given stream into the given header dict. + + If hdict is None, a new header dict is created. Returns the populated + header dict. + + Headers which are repeated are folded together using a comma if their + specification so dictates. + + This function raises ValueError when the read bytes violate the HTTP spec. + You should probably return "400 Bad Request" if this happens. + """ + if hdict is None: + hdict = {} + + while True: + line = rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + if line[0] in (SPACE, TAB): + # It's a continuation line. + v = line.strip() + else: + try: + k, v = line.split(COLON, 1) + except ValueError: + raise ValueError("Illegal header line.") + # TODO: what about TE and WWW-Authenticate? + k = k.strip().title() + v = v.strip() + hname = k + + if k in comma_separated_headers: + existing = hdict.get(hname) + if existing: + v = b", ".join((existing, v)) + hdict[hname] = v + + return hdict + + +class MaxSizeExceeded(Exception): + pass + + +class SizeCheckWrapper(object): + + """Wraps a file-like object, raising MaxSizeExceeded if too large.""" + + def __init__(self, rfile, maxlen): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + + def _check_length(self): + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded() + + def read(self, size=None): + data = self.rfile.read(size) + self.bytes_read += len(data) + self._check_length() + return data + + def readline(self, size=None): + if size is not None: + data = self.rfile.readline(size) + self.bytes_read += len(data) + self._check_length() + return data + + # User didn't specify a size ... + # We read the line in chunks to make sure it's not a 100MB line ! + res = [] + while True: + data = self.rfile.readline(256) + self.bytes_read += len(data) + self._check_length() + res.append(data) + # See https://bitbucket.org/cherrypy/cherrypy/issue/421 + if len(data) < 256 or data[-1:] == LF: + return EMPTY.join(res) + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.bytes_read += len(data) + self._check_length() + return data + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + self._check_length() + return data + + +class KnownLengthRFile(object): + + """Wraps a file-like object, returning an empty string when exhausted.""" + + def __init__(self, rfile, content_length): + self.rfile = rfile + self.remaining = content_length + + def read(self, size=None): + if self.remaining == 0: + return b'' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.read(size) + self.remaining -= len(data) + return data + + def readline(self, size=None): + if self.remaining == 0: + return b'' + if size is None: + size = self.remaining + else: + size = min(size, self.remaining) + + data = self.rfile.readline(size) + self.remaining -= len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def __next__(self): + data = next(self.rfile) + self.remaining -= len(data) + return data + + +class ChunkedRFile(object): + + """Wraps a file-like object, returning an empty string when exhausted. + + This class is intended to provide a conforming wsgi.input value for + request entities that have been encoded with the 'chunked' transfer + encoding. + """ + + def __init__(self, rfile, maxlen, bufsize=8192): + self.rfile = rfile + self.maxlen = maxlen + self.bytes_read = 0 + self.buffer = EMPTY + self.bufsize = bufsize + self.closed = False + + def _fetch(self): + if self.closed: + return + + line = self.rfile.readline() + self.bytes_read += len(line) + + if self.maxlen and self.bytes_read > self.maxlen: + raise MaxSizeExceeded("Request Entity Too Large", self.maxlen) + + line = line.strip().split(SEMICOLON, 1) + + try: + chunk_size = line.pop(0) + chunk_size = int(chunk_size, 16) + except ValueError: + raise ValueError("Bad chunked transfer size: " + repr(chunk_size)) + + if chunk_size <= 0: + self.closed = True + return + +## if line: chunk_extension = line[0] + + if self.maxlen and self.bytes_read + chunk_size > self.maxlen: + raise IOError("Request Entity Too Large") + + chunk = self.rfile.read(chunk_size) + self.bytes_read += len(chunk) + self.buffer += chunk + + crlf = self.rfile.read(2) + if crlf != CRLF: + raise ValueError( + "Bad chunked transfer coding (expected '\\r\\n', " + "got " + repr(crlf) + ")") + + def read(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + if size: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + data += self.buffer + + def readline(self, size=None): + data = EMPTY + while True: + if size and len(data) >= size: + return data + + if not self.buffer: + self._fetch() + if not self.buffer: + # EOF + return data + + newline_pos = self.buffer.find(LF) + if size: + if newline_pos == -1: + remaining = size - len(data) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + remaining = min(size - len(data), newline_pos) + data += self.buffer[:remaining] + self.buffer = self.buffer[remaining:] + else: + if newline_pos == -1: + data += self.buffer + else: + data += self.buffer[:newline_pos] + self.buffer = self.buffer[newline_pos:] + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline(sizehint) + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + return lines + + def read_trailer_lines(self): + if not self.closed: + raise ValueError( + "Cannot read trailers until the request body has been read.") + + while True: + line = self.rfile.readline() + if not line: + # No more data--illegal end of headers + raise ValueError("Illegal end of headers.") + + self.bytes_read += len(line) + if self.maxlen and self.bytes_read > self.maxlen: + raise IOError("Request Entity Too Large") + + if line == CRLF: + # Normal end of headers + break + if not line.endswith(CRLF): + raise ValueError("HTTP requires CRLF terminators") + + yield line + + def close(self): + self.rfile.close() + + def __iter__(self): + # Shamelessly stolen from StringIO + total = 0 + line = self.readline(sizehint) + while line: + yield line + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline(sizehint) + + +class HTTPRequest(object): + + """An HTTP Request (and response). + + A single HTTP connection may consist of multiple request/response pairs. + """ + + server = None + """The HTTPServer object which is receiving this request.""" + + conn = None + """The HTTPConnection object on which this request connected.""" + + inheaders = {} + """A dict of request headers.""" + + outheaders = [] + """A list of header tuples to write in the response.""" + + ready = False + """When True, the request has been parsed and is ready to begin generating + the response. When False, signals the calling Connection that the response + should not be generated and the connection should close.""" + + close_connection = False + """Signals the calling Connection that the request should close. This does + not imply an error! The client and/or server may each request that the + connection be closed.""" + + chunked_write = False + """If True, output will be encoded with the "chunked" transfer-coding. + + This value is set automatically inside send_headers.""" + + def __init__(self, server, conn): + self.server = server + self.conn = conn + + self.ready = False + self.started_request = False + self.scheme = ntob("http") + if self.server.ssl_adapter is not None: + self.scheme = ntob("https") + # Use the lowest-common protocol in case read_request_line errors. + self.response_protocol = 'HTTP/1.0' + self.inheaders = {} + + self.status = "" + self.outheaders = [] + self.sent_headers = False + self.close_connection = self.__class__.close_connection + self.chunked_read = False + self.chunked_write = self.__class__.chunked_write + + def parse_request(self): + """Parse the next HTTP request start-line and message-headers.""" + self.rfile = SizeCheckWrapper(self.conn.rfile, + self.server.max_request_header_size) + try: + success = self.read_request_line() + except MaxSizeExceeded: + self.simple_response( + "414 Request-URI Too Long", + "The Request-URI sent with the request exceeds the maximum " + "allowed bytes.") + return + else: + if not success: + return + + try: + success = self.read_request_headers() + except MaxSizeExceeded: + self.simple_response( + "413 Request Entity Too Large", + "The headers sent with the request exceed the maximum " + "allowed bytes.") + return + else: + if not success: + return + + self.ready = True + + def read_request_line(self): + # HTTP/1.1 connections are persistent by default. If a client + # requests a page, then idles (leaves the connection open), + # then rfile.readline() will raise socket.error("timed out"). + # Note that it does this based on the value given to settimeout(), + # and doesn't need the client to request or acknowledge the close + # (although your TCP stack might suffer for it: cf Apache's history + # with FIN_WAIT_2). + request_line = self.rfile.readline() + + # Set started_request to True so communicate() knows to send 408 + # from here on out. + self.started_request = True + if not request_line: + return False + + if request_line == CRLF: + # RFC 2616 sec 4.1: "...if the server is reading the protocol + # stream at the beginning of a message and receives a CRLF + # first, it should ignore the CRLF." + # But only ignore one leading line! else we enable a DoS. + request_line = self.rfile.readline() + if not request_line: + return False + + if not request_line.endswith(CRLF): + self.simple_response( + "400 Bad Request", "HTTP requires CRLF terminators") + return False + + try: + method, uri, req_protocol = request_line.strip().split(SPACE, 2) + # The [x:y] slicing is necessary for byte strings to avoid getting + # ord's + rp = int(req_protocol[5:6]), int(req_protocol[7:8]) + except ValueError: + self.simple_response("400 Bad Request", "Malformed Request-Line") + return False + + self.uri = uri + self.method = method + + # uri may be an abs_path (including "http://host.domain.tld"); + scheme, authority, path = self.parse_request_uri(uri) + if NUMBER_SIGN in path: + self.simple_response("400 Bad Request", + "Illegal #fragment in Request-URI.") + return False + + if scheme: + self.scheme = scheme + + qs = EMPTY + if QUESTION_MARK in path: + path, qs = path.split(QUESTION_MARK, 1) + + # Unquote the path+params (e.g. "/this%20path" -> "/this path"). + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2 + # + # But note that "...a URI must be separated into its components + # before the escaped characters within those components can be + # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2 + # Therefore, "/this%2Fpath" becomes "/this%2Fpath", not "/this/path". + try: + atoms = [self.unquote_bytes(x) for x in quoted_slash.split(path)] + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + path = b"%2F".join(atoms) + self.path = path + + # Note that, like wsgiref and most other HTTP servers, + # we "% HEX HEX"-unquote the path but not the query string. + self.qs = qs + + # Compare request and server HTTP protocol versions, in case our + # server does not support the requested protocol. Limit our output + # to min(req, server). We want the following output: + # request server actual written supported response + # protocol protocol response protocol feature set + # a 1.0 1.0 1.0 1.0 + # b 1.0 1.1 1.1 1.0 + # c 1.1 1.0 1.0 1.0 + # d 1.1 1.1 1.1 1.1 + # Notice that, in (b), the response will be "HTTP/1.1" even though + # the client only understands 1.0. RFC 2616 10.5.6 says we should + # only return 505 if the _major_ version is different. + # The [x:y] slicing is necessary for byte strings to avoid getting + # ord's + sp = int(self.server.protocol[5:6]), int(self.server.protocol[7:8]) + + if sp[0] != rp[0]: + self.simple_response("505 HTTP Version Not Supported") + return False + + self.request_protocol = req_protocol + self.response_protocol = "HTTP/%s.%s" % min(rp, sp) + return True + + def read_request_headers(self): + """Read self.rfile into self.inheaders. Return success.""" + + # then all the http headers + try: + read_headers(self.rfile, self.inheaders) + except ValueError: + ex = sys.exc_info()[1] + self.simple_response("400 Bad Request", ex.args[0]) + return False + + mrbs = self.server.max_request_body_size + if mrbs and int(self.inheaders.get(b"Content-Length", 0)) > mrbs: + self.simple_response( + "413 Request Entity Too Large", + "The entity sent with the request exceeds the maximum " + "allowed bytes.") + return False + + # Persistent connection support + if self.response_protocol == "HTTP/1.1": + # Both server and client are HTTP/1.1 + if self.inheaders.get(b"Connection", b"") == b"close": + self.close_connection = True + else: + # Either the server or client (or both) are HTTP/1.0 + if self.inheaders.get(b"Connection", b"") != b"Keep-Alive": + self.close_connection = True + + # Transfer-Encoding support + te = None + if self.response_protocol == "HTTP/1.1": + te = self.inheaders.get(b"Transfer-Encoding") + if te: + te = [x.strip().lower() for x in te.split(b",") if x.strip()] + + self.chunked_read = False + + if te: + for enc in te: + if enc == b"chunked": + self.chunked_read = True + else: + # Note that, even if we see "chunked", we must reject + # if there is an extension we don't recognize. + self.simple_response("501 Unimplemented") + self.close_connection = True + return False + + # From PEP 333: + # "Servers and gateways that implement HTTP 1.1 must provide + # transparent support for HTTP 1.1's "expect/continue" mechanism. + # This may be done in any of several ways: + # 1. Respond to requests containing an Expect: 100-continue request + # with an immediate "100 Continue" response, and proceed normally. + # 2. Proceed with the request normally, but provide the application + # with a wsgi.input stream that will send the "100 Continue" + # response if/when the application first attempts to read from + # the input stream. The read request must then remain blocked + # until the client responds. + # 3. Wait until the client decides that the server does not support + # expect/continue, and sends the request body on its own. + # (This is suboptimal, and is not recommended.) + # + # We used to do 3, but are now doing 1. Maybe we'll do 2 someday, + # but it seems like it would be a big slowdown for such a rare case. + if self.inheaders.get(b"Expect", b"") == b"100-continue": + # Don't use simple_response here, because it emits headers + # we don't want. See + # https://bitbucket.org/cherrypy/cherrypy/issue/951 + msg = self.server.protocol.encode( + 'ascii') + b" 100 Continue\r\n\r\n" + try: + self.conn.wfile.write(msg) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return True + + def parse_request_uri(self, uri): + """Parse a Request-URI into (scheme, authority, path). + + Note that Request-URI's must be one of:: + + Request-URI = "*" | absoluteURI | abs_path | authority + + Therefore, a Request-URI which starts with a double forward-slash + cannot be a "net_path":: + + net_path = "//" authority [ abs_path ] + + Instead, it must be interpreted as an "abs_path" with an empty first + path segment:: + + abs_path = "/" path_segments + path_segments = segment *( "/" segment ) + segment = *pchar *( ";" param ) + param = *pchar + """ + if uri == ASTERISK: + return None, None, uri + + scheme, sep, remainder = uri.partition(b'://') + if sep and QUESTION_MARK not in scheme: + # An absoluteURI. + # If there's a scheme (and it must be http or https), then: + # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query + # ]] + authority, path_a, path_b = remainder.partition(FORWARD_SLASH) + return scheme.lower(), authority, path_a + path_b + + if uri.startswith(FORWARD_SLASH): + # An abs_path. + return None, None, uri + else: + # An authority. + return None, uri, None + + def unquote_bytes(self, path): + """takes quoted string and unquotes % encoded values""" + res = path.split(b'%') + + for i in range(1, len(res)): + item = res[i] + try: + res[i] = bytes([int(item[:2], 16)]) + item[2:] + except ValueError: + raise + return b''.join(res) + + def respond(self): + """Call the gateway and write its iterable output.""" + mrbs = self.server.max_request_body_size + if self.chunked_read: + self.rfile = ChunkedRFile(self.conn.rfile, mrbs) + else: + cl = int(self.inheaders.get(b"Content-Length", 0)) + if mrbs and mrbs < cl: + if not self.sent_headers: + self.simple_response( + "413 Request Entity Too Large", + "The entity sent with the request exceeds the " + "maximum allowed bytes.") + return + self.rfile = KnownLengthRFile(self.conn.rfile, cl) + + self.server.gateway(self).respond() + + if (self.ready and not self.sent_headers): + self.sent_headers = True + self.send_headers() + if self.chunked_write: + self.conn.wfile.write(b"0\r\n\r\n") + + def simple_response(self, status, msg=""): + """Write a simple response back to the client.""" + status = str(status) + buf = [bytes(self.server.protocol, "ascii") + SPACE + + bytes(status, "ISO-8859-1") + CRLF, + bytes("Content-Length: %s\r\n" % len(msg), "ISO-8859-1"), + b"Content-Type: text/plain\r\n"] + + if status[:3] in ("413", "414"): + # Request Entity Too Large / Request-URI Too Long + self.close_connection = True + if self.response_protocol == 'HTTP/1.1': + # This will not be true for 414, since read_request_line + # usually raises 414 before reading the whole line, and we + # therefore cannot know the proper response_protocol. + buf.append(b"Connection: close\r\n") + else: + # HTTP/1.0 had no 413/414 status nor Connection header. + # Emit 400 instead and trust the message body is enough. + status = "400 Bad Request" + + buf.append(CRLF) + if msg: + if isinstance(msg, unicodestr): + msg = msg.encode("ISO-8859-1") + buf.append(msg) + + try: + self.conn.wfile.write(b"".join(buf)) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + + def write(self, chunk): + """Write unbuffered data to the client.""" + if self.chunked_write and chunk: + buf = [bytes(hex(len(chunk)), 'ASCII')[2:], CRLF, chunk, CRLF] + self.conn.wfile.write(EMPTY.join(buf)) + else: + self.conn.wfile.write(chunk) + + def send_headers(self): + """Assert, process, and send the HTTP response message-headers. + + You must set self.status, and self.outheaders before calling this. + """ + hkeys = [key.lower() for key, value in self.outheaders] + status = int(self.status[:3]) + + if status == 413: + # Request Entity Too Large. Close conn to avoid garbage. + self.close_connection = True + elif b"content-length" not in hkeys: + # "All 1xx (informational), 204 (no content), + # and 304 (not modified) responses MUST NOT + # include a message-body." So no point chunking. + if status < 200 or status in (204, 205, 304): + pass + else: + if (self.response_protocol == 'HTTP/1.1' + and self.method != b'HEAD'): + # Use the chunked transfer-coding + self.chunked_write = True + self.outheaders.append((b"Transfer-Encoding", b"chunked")) + else: + # Closing the conn is the only way to determine len. + self.close_connection = True + + if b"connection" not in hkeys: + if self.response_protocol == 'HTTP/1.1': + # Both server and client are HTTP/1.1 or better + if self.close_connection: + self.outheaders.append((b"Connection", b"close")) + else: + # Server and/or client are HTTP/1.0 + if not self.close_connection: + self.outheaders.append((b"Connection", b"Keep-Alive")) + + if (not self.close_connection) and (not self.chunked_read): + # Read any remaining request body data on the socket. + # "If an origin server receives a request that does not include an + # Expect request-header field with the "100-continue" expectation, + # the request includes a request body, and the server responds + # with a final status code before reading the entire request body + # from the transport connection, then the server SHOULD NOT close + # the transport connection until it has read the entire request, + # or until the client closes the connection. Otherwise, the client + # might not reliably receive the response message. However, this + # requirement is not be construed as preventing a server from + # defending itself against denial-of-service attacks, or from + # badly broken client implementations." + remaining = getattr(self.rfile, 'remaining', 0) + if remaining > 0: + self.rfile.read(remaining) + + if b"date" not in hkeys: + self.outheaders.append(( + b"Date", + email.utils.formatdate(usegmt=True).encode('ISO-8859-1') + )) + + if b"server" not in hkeys: + self.outheaders.append( + (b"Server", self.server.server_name.encode('ISO-8859-1'))) + + buf = [self.server.protocol.encode( + 'ascii') + SPACE + self.status + CRLF] + for k, v in self.outheaders: + buf.append(k + COLON + SPACE + v + CRLF) + buf.append(CRLF) + self.conn.wfile.write(EMPTY.join(buf)) + + +class NoSSLError(Exception): + + """Exception raised when a client speaks HTTP to an HTTPS socket.""" + pass + + +class FatalSSLAlert(Exception): + + """Exception raised when the SSL implementation signals a fatal alert.""" + pass + + +class CP_BufferedWriter(io.BufferedWriter): + + """Faux file object attached to a socket object.""" + + def write(self, b): + self._checkClosed() + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + + with self._write_lock: + self._write_buf.extend(b) + self._flush_unlocked() + return len(b) + + def _flush_unlocked(self): + self._checkClosed("flush of closed file") + while self._write_buf: + try: + # ssl sockets only except 'bytes', not bytearrays + # so perhaps we should conditionally wrap this for perf? + n = self.raw.write(bytes(self._write_buf)) + except io.BlockingIOError as e: + n = e.characters_written + del self._write_buf[:n] + + +def CP_makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + if 'r' in mode: + return io.BufferedReader(socket.SocketIO(sock, mode), bufsize) + else: + return CP_BufferedWriter(socket.SocketIO(sock, mode), bufsize) + + +class HTTPConnection(object): + + """An HTTP connection (active socket). + + server: the Server object which received this connection. + socket: the raw socket object (usually TCP) for this connection. + makefile: a fileobject class for reading from the socket. + """ + + remote_addr = None + remote_port = None + ssl_env = None + rbufsize = DEFAULT_BUFFER_SIZE + wbufsize = DEFAULT_BUFFER_SIZE + RequestHandlerClass = HTTPRequest + + def __init__(self, server, sock, makefile=CP_makefile): + self.server = server + self.socket = sock + self.rfile = makefile(sock, "rb", self.rbufsize) + self.wfile = makefile(sock, "wb", self.wbufsize) + self.requests_seen = 0 + + def communicate(self): + """Read each request and respond appropriately.""" + request_seen = False + try: + while True: + # (re)set req to None so that if something goes wrong in + # the RequestHandlerClass constructor, the error doesn't + # get written to the previous request. + req = None + req = self.RequestHandlerClass(self.server, self) + + # This order of operations should guarantee correct pipelining. + req.parse_request() + if self.server.stats['Enabled']: + self.requests_seen += 1 + if not req.ready: + # Something went wrong in the parsing (and the server has + # probably already made a simple_response). Return and + # let the conn close. + return + + request_seen = True + req.respond() + if req.close_connection: + return + except socket.error: + e = sys.exc_info()[1] + errnum = e.args[0] + # sadly SSL sockets return a different (longer) time out string + if ( + errnum == 'timed out' or + errnum == 'The read operation timed out' + ): + # Don't error if we're between requests; only error + # if 1) no request has been started at all, or 2) we're + # in the middle of a request. + # See https://bitbucket.org/cherrypy/cherrypy/issue/853 + if (not request_seen) or (req and req.started_request): + # Don't bother writing the 408 if the response + # has already started being written. + if req and not req.sent_headers: + try: + req.simple_response("408 Request Timeout") + except FatalSSLAlert: + # Close the connection. + return + elif errnum not in socket_errors_to_ignore: + self.server.error_log("socket.error %s" % repr(errnum), + level=logging.WARNING, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + return + except (KeyboardInterrupt, SystemExit): + raise + except FatalSSLAlert: + # Close the connection. + return + except NoSSLError: + if req and not req.sent_headers: + # Unwrap our wfile + self.wfile = CP_makefile( + self.socket._sock, "wb", self.wbufsize) + req.simple_response( + "400 Bad Request", + "The client sent a plain HTTP request, but this server " + "only speaks HTTPS on this port.") + self.linger = True + except Exception: + e = sys.exc_info()[1] + self.server.error_log(repr(e), level=logging.ERROR, traceback=True) + if req and not req.sent_headers: + try: + req.simple_response("500 Internal Server Error") + except FatalSSLAlert: + # Close the connection. + return + + linger = False + + def close(self): + """Close the socket underlying this connection.""" + self.rfile.close() + + if not self.linger: + # Python's socket module does NOT call close on the kernel + # socket when you call socket.close(). We do so manually here + # because we want this server to send a FIN TCP segment + # immediately. Note this must be called *before* calling + # socket.close(), because the latter drops its reference to + # the kernel socket. + # Python 3 *probably* fixed this with socket._real_close; + # hard to tell. +# self.socket._sock.close() + self.socket.close() + else: + # On the other hand, sometimes we want to hang around for a bit + # to make sure the client has a chance to read our entire + # response. Skipping the close() calls here delays the FIN + # packet until the socket object is garbage-collected later. + # Someday, perhaps, we'll do the full lingering_close that + # Apache does, but not today. + pass + + +class TrueyZero(object): + + """An object which equals and does math like the integer 0 but evals True. + """ + + def __add__(self, other): + return other + + def __radd__(self, other): + return other +trueyzero = TrueyZero() + + +_SHUTDOWNREQUEST = None + + +class WorkerThread(threading.Thread): + + """Thread which continuously polls a Queue for Connection objects. + + Due to the timing issues of polling a Queue, a WorkerThread does not + check its own 'ready' flag after it has started. To stop the thread, + it is necessary to stick a _SHUTDOWNREQUEST object onto the Queue + (one for each running WorkerThread). + """ + + conn = None + """The current connection pulled off the Queue, or None.""" + + server = None + """The HTTP Server which spawned this thread, and which owns the + Queue and is placing active connections into it.""" + + ready = False + """A simple flag for the calling server to know when this thread + has begun polling the Queue.""" + + def __init__(self, server): + self.ready = False + self.server = server + + self.requests_seen = 0 + self.bytes_read = 0 + self.bytes_written = 0 + self.start_time = None + self.work_time = 0 + self.stats = { + 'Requests': lambda s: self.requests_seen + ( + (self.start_time is None) and + trueyzero or + self.conn.requests_seen + ), + 'Bytes Read': lambda s: self.bytes_read + ( + (self.start_time is None) and + trueyzero or + self.conn.rfile.bytes_read + ), + 'Bytes Written': lambda s: self.bytes_written + ( + (self.start_time is None) and + trueyzero or + self.conn.wfile.bytes_written + ), + 'Work Time': lambda s: self.work_time + ( + (self.start_time is None) and + trueyzero or + time.time() - self.start_time + ), + 'Read Throughput': lambda s: s['Bytes Read'](s) / ( + s['Work Time'](s) or 1e-6), + 'Write Throughput': lambda s: s['Bytes Written'](s) / ( + s['Work Time'](s) or 1e-6), + } + threading.Thread.__init__(self) + + def run(self): + self.server.stats['Worker Threads'][self.getName()] = self.stats + try: + self.ready = True + while True: + conn = self.server.requests.get() + if conn is _SHUTDOWNREQUEST: + return + + self.conn = conn + if self.server.stats['Enabled']: + self.start_time = time.time() + try: + conn.communicate() + finally: + conn.close() + if self.server.stats['Enabled']: + self.requests_seen += self.conn.requests_seen + self.bytes_read += self.conn.rfile.bytes_read + self.bytes_written += self.conn.wfile.bytes_written + self.work_time += time.time() - self.start_time + self.start_time = None + self.conn = None + except (KeyboardInterrupt, SystemExit): + exc = sys.exc_info()[1] + self.server.interrupt = exc + + +class ThreadPool(object): + + """A Request Queue for an HTTPServer which pools threads. + + ThreadPool objects must provide min, get(), put(obj), start() + and stop(timeout) attributes. + """ + + def __init__(self, server, min=10, max=-1, + accepted_queue_size=-1, accepted_queue_timeout=10): + self.server = server + self.min = min + self.max = max + self._threads = [] + self._queue = queue.Queue(maxsize=accepted_queue_size) + self._queue_put_timeout = accepted_queue_timeout + self.get = self._queue.get + + def start(self): + """Start the pool of threads.""" + for i in range(self.min): + self._threads.append(WorkerThread(self.server)) + for worker in self._threads: + worker.setName("CP Server " + worker.getName()) + worker.start() + for worker in self._threads: + while not worker.ready: + time.sleep(.1) + + def _get_idle(self): + """Number of worker threads which are idle. Read-only.""" + return len([t for t in self._threads if t.conn is None]) + idle = property(_get_idle, doc=_get_idle.__doc__) + + def put(self, obj): + self._queue.put(obj, block=True, timeout=self._queue_put_timeout) + if obj is _SHUTDOWNREQUEST: + return + + def grow(self, amount): + """Spawn new worker threads (not above self.max).""" + if self.max > 0: + budget = max(self.max - len(self._threads), 0) + else: + # self.max <= 0 indicates no maximum + budget = float('inf') + + n_new = min(amount, budget) + + workers = [self._spawn_worker() for i in range(n_new)] + while not all(worker.ready for worker in workers): + time.sleep(.1) + self._threads.extend(workers) + + def _spawn_worker(self): + worker = WorkerThread(self.server) + worker.setName("CP Server " + worker.getName()) + worker.start() + return worker + + def shrink(self, amount): + """Kill off worker threads (not below self.min).""" + # Grow/shrink the pool if necessary. + # Remove any dead threads from our list + for t in self._threads: + if not t.isAlive(): + self._threads.remove(t) + amount -= 1 + + # calculate the number of threads above the minimum + n_extra = max(len(self._threads) - self.min, 0) + + # don't remove more than amount + n_to_remove = min(amount, n_extra) + + # put shutdown requests on the queue equal to the number of threads + # to remove. As each request is processed by a worker, that worker + # will terminate and be culled from the list. + for n in range(n_to_remove): + self._queue.put(_SHUTDOWNREQUEST) + + def stop(self, timeout=5): + # Must shut down threads here so the code that calls + # this method can know when all threads are stopped. + for worker in self._threads: + self._queue.put(_SHUTDOWNREQUEST) + + # Don't join currentThread (when stop is called inside a request). + current = threading.currentThread() + if timeout and timeout >= 0: + endtime = time.time() + timeout + while self._threads: + worker = self._threads.pop() + if worker is not current and worker.isAlive(): + try: + if timeout is None or timeout < 0: + worker.join() + else: + remaining_time = endtime - time.time() + if remaining_time > 0: + worker.join(remaining_time) + if worker.isAlive(): + # We exhausted the timeout. + # Forcibly shut down the socket. + c = worker.conn + if c and not c.rfile.closed: + try: + c.socket.shutdown(socket.SHUT_RD) + except TypeError: + # pyOpenSSL sockets don't take an arg + c.socket.shutdown() + worker.join() + except (AssertionError, + # Ignore repeated Ctrl-C. + # See + # https://bitbucket.org/cherrypy/cherrypy/issue/691. + KeyboardInterrupt): + pass + + def _get_qsize(self): + return self._queue.qsize() + qsize = property(_get_qsize) + + +try: + import fcntl +except ImportError: + try: + from ctypes import windll, WinError + import ctypes.wintypes + _SetHandleInformation = windll.kernel32.SetHandleInformation + _SetHandleInformation.argtypes = [ + ctypes.wintypes.HANDLE, + ctypes.wintypes.DWORD, + ctypes.wintypes.DWORD, + ] + _SetHandleInformation.restype = ctypes.wintypes.BOOL + except ImportError: + def prevent_socket_inheritance(sock): + """Dummy function, since neither fcntl nor ctypes are available.""" + pass + else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (Windows).""" + if not _SetHandleInformation(sock.fileno(), 1, 0): + raise WinError() +else: + def prevent_socket_inheritance(sock): + """Mark the given socket fd as non-inheritable (POSIX).""" + fd = sock.fileno() + old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) + + +class SSLAdapter(object): + + """Base class for SSL driver library adapters. + + Required methods: + + * ``wrap(sock) -> (wrapped socket, ssl environ dict)`` + * ``makefile(sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE) -> + socket file object`` + """ + + def __init__(self, certificate, private_key, certificate_chain=None): + self.certificate = certificate + self.private_key = private_key + self.certificate_chain = certificate_chain + + def wrap(self, sock): + raise NotImplemented + + def makefile(self, sock, mode='r', bufsize=DEFAULT_BUFFER_SIZE): + raise NotImplemented + + +class HTTPServer(object): + + """An HTTP server.""" + + _bind_addr = "127.0.0.1" + _interrupt = None + + gateway = None + """A Gateway instance.""" + + minthreads = None + """The minimum number of worker threads to create (default 10).""" + + maxthreads = None + """The maximum number of worker threads to create (default -1 = no limit). + """ + + server_name = None + """The name of the server; defaults to socket.gethostname().""" + + protocol = "HTTP/1.1" + """The version string to write in the Status-Line of all HTTP responses. + + For example, "HTTP/1.1" is the default. This also limits the supported + features used in the response.""" + + request_queue_size = 5 + """The 'backlog' arg to socket.listen(); max queued connections + (default 5). + """ + + shutdown_timeout = 5 + """The total time, in seconds, to wait for worker threads to cleanly exit. + """ + + timeout = 10 + """The timeout in seconds for accepted connections (default 10).""" + + version = "CherryPy/3.6.0" + """A version string for the HTTPServer.""" + + software = None + """The value to set for the SERVER_SOFTWARE entry in the WSGI environ. + + If None, this defaults to ``'%s Server' % self.version``.""" + + ready = False + """An internal flag which marks whether the socket is accepting + connections. + """ + + max_request_header_size = 0 + """The maximum size, in bytes, for request headers, or 0 for no limit.""" + + max_request_body_size = 0 + """The maximum size, in bytes, for request bodies, or 0 for no limit.""" + + nodelay = True + """If True (the default since 3.1), sets the TCP_NODELAY socket option.""" + + ConnectionClass = HTTPConnection + """The class to use for handling HTTP connections.""" + + ssl_adapter = None + """An instance of SSLAdapter (or a subclass). + + You must have the corresponding SSL driver library installed.""" + + def __init__(self, bind_addr, gateway, minthreads=10, maxthreads=-1, + server_name=None): + self.bind_addr = bind_addr + self.gateway = gateway + + self.requests = ThreadPool(self, min=minthreads or 1, max=maxthreads) + + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.clear_stats() + + def clear_stats(self): + self._start_time = None + self._run_time = 0 + self.stats = { + 'Enabled': False, + 'Bind Address': lambda s: repr(self.bind_addr), + 'Run time': lambda s: (not s['Enabled']) and -1 or self.runtime(), + 'Accepts': 0, + 'Accepts/sec': lambda s: s['Accepts'] / self.runtime(), + 'Queue': lambda s: getattr(self.requests, "qsize", None), + 'Threads': lambda s: len(getattr(self.requests, "_threads", [])), + 'Threads Idle': lambda s: getattr(self.requests, "idle", None), + 'Socket Errors': 0, + 'Requests': lambda s: (not s['Enabled']) and -1 or sum( + [w['Requests'](w) for w in s['Worker Threads'].values()], 0), + 'Bytes Read': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Read'](w) for w in s['Worker Threads'].values()], 0), + 'Bytes Written': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Written'](w) for w in s['Worker Threads'].values()], + 0), + 'Work Time': lambda s: (not s['Enabled']) and -1 or sum( + [w['Work Time'](w) for w in s['Worker Threads'].values()], 0), + 'Read Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Read'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Write Throughput': lambda s: (not s['Enabled']) and -1 or sum( + [w['Bytes Written'](w) / (w['Work Time'](w) or 1e-6) + for w in s['Worker Threads'].values()], 0), + 'Worker Threads': {}, + } + logging.statistics["CherryPy HTTPServer %d" % id(self)] = self.stats + + def runtime(self): + if self._start_time is None: + return self._run_time + else: + return self._run_time + (time.time() - self._start_time) + + def __str__(self): + return "%s.%s(%r)" % (self.__module__, self.__class__.__name__, + self.bind_addr) + + def _get_bind_addr(self): + return self._bind_addr + + def _set_bind_addr(self, value): + if isinstance(value, tuple) and value[0] in ('', None): + # Despite the socket module docs, using '' does not + # allow AI_PASSIVE to work. Passing None instead + # returns '0.0.0.0' like we want. In other words: + # host AI_PASSIVE result + # '' Y 192.168.x.y + # '' N 192.168.x.y + # None Y 0.0.0.0 + # None N 127.0.0.1 + # But since you can get the same effect with an explicit + # '0.0.0.0', we deny both the empty string and None as values. + raise ValueError("Host values of '' or None are not allowed. " + "Use '0.0.0.0' (IPv4) or '::' (IPv6) instead " + "to listen on all active interfaces.") + self._bind_addr = value + bind_addr = property( + _get_bind_addr, + _set_bind_addr, + doc="""The interface on which to listen for connections. + + For TCP sockets, a (host, port) tuple. Host values may be any IPv4 + or IPv6 address, or any valid hostname. The string 'localhost' is a + synonym for '127.0.0.1' (or '::1', if your hosts file prefers IPv6). + The string '0.0.0.0' is a special IPv4 entry meaning "any active + interface" (INADDR_ANY), and '::' is the similar IN6ADDR_ANY for + IPv6. The empty string or None are not allowed. + + For UNIX sockets, supply the filename as a string.""") + + def start(self): + """Run the server forever.""" + # We don't have to trap KeyboardInterrupt or SystemExit here, + # because cherrpy.server already does so, calling self.stop() for us. + # If you're using this server with another framework, you should + # trap those exceptions in whatever code block calls start(). + self._interrupt = None + + if self.software is None: + self.software = "%s Server" % self.version + + # Select the appropriate socket + if isinstance(self.bind_addr, basestring): + # AF_UNIX socket + + # So we can reuse the socket... + try: + os.unlink(self.bind_addr) + except: + pass + + # So everyone can access the socket... + try: + os.chmod(self.bind_addr, 511) # 0777 + except: + pass + + info = [ + (socket.AF_UNIX, socket.SOCK_STREAM, 0, "", self.bind_addr)] + else: + # AF_INET or AF_INET6 socket + # Get the correct address family for our host (allows IPv6 + # addresses) + host, port = self.bind_addr + try: + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM, 0, + socket.AI_PASSIVE) + except socket.gaierror: + if ':' in self.bind_addr[0]: + info = [(socket.AF_INET6, socket.SOCK_STREAM, + 0, "", self.bind_addr + (0, 0))] + else: + info = [(socket.AF_INET, socket.SOCK_STREAM, + 0, "", self.bind_addr)] + + self.socket = None + msg = "No socket could be created" + for res in info: + af, socktype, proto, canonname, sa = res + try: + self.bind(af, socktype, proto) + except socket.error as serr: + msg = "%s -- (%s: %s)" % (msg, sa, serr) + if self.socket: + self.socket.close() + self.socket = None + continue + break + if not self.socket: + raise socket.error(msg) + + # Timeout so KeyboardInterrupt can be caught on Win32 + self.socket.settimeout(1) + self.socket.listen(self.request_queue_size) + + # Create worker threads + self.requests.start() + + self.ready = True + self._start_time = time.time() + while self.ready: + try: + self.tick() + except (KeyboardInterrupt, SystemExit): + raise + except: + self.error_log("Error in HTTPServer.tick", level=logging.ERROR, + traceback=True) + if self.interrupt: + while self.interrupt is True: + # Wait for self.stop() to complete. See _set_interrupt. + time.sleep(0.1) + if self.interrupt: + raise self.interrupt + + def error_log(self, msg="", level=20, traceback=False): + # Override this in subclasses as desired + sys.stderr.write(msg + '\n') + sys.stderr.flush() + if traceback: + tblines = format_exc() + sys.stderr.write(tblines) + sys.stderr.flush() + + def bind(self, family, type, proto=0): + """Create (or recreate) the actual socket object.""" + self.socket = socket.socket(family, type, proto) + prevent_socket_inheritance(self.socket) + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + if self.nodelay and not isinstance(self.bind_addr, str): + self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + + if self.ssl_adapter is not None: + self.socket = self.ssl_adapter.bind(self.socket) + + # If listening on the IPV6 any address ('::' = IN6ADDR_ANY), + # activate dual-stack. See + # https://bitbucket.org/cherrypy/cherrypy/issue/871. + if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6 + and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')): + try: + self.socket.setsockopt( + socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + except (AttributeError, socket.error): + # Apparently, the socket option is not available in + # this machine's TCP stack + pass + + self.socket.bind(self.bind_addr) + + def tick(self): + """Accept a new connection and put it on the Queue.""" + try: + s, addr = self.socket.accept() + if self.stats['Enabled']: + self.stats['Accepts'] += 1 + if not self.ready: + return + + prevent_socket_inheritance(s) + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + makefile = CP_makefile + ssl_env = {} + # if ssl cert and key are set, we try to be a secure HTTP server + if self.ssl_adapter is not None: + try: + s, ssl_env = self.ssl_adapter.wrap(s) + except NoSSLError: + msg = ("The client sent a plain HTTP request, but " + "this server only speaks HTTPS on this port.") + buf = ["%s 400 Bad Request\r\n" % self.protocol, + "Content-Length: %s\r\n" % len(msg), + "Content-Type: text/plain\r\n\r\n", + msg] + + wfile = makefile(s, "wb", DEFAULT_BUFFER_SIZE) + try: + wfile.write("".join(buf).encode('ISO-8859-1')) + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + raise + return + if not s: + return + makefile = self.ssl_adapter.makefile + # Re-apply our timeout since we may have a new socket object + if hasattr(s, 'settimeout'): + s.settimeout(self.timeout) + + conn = self.ConnectionClass(self, s, makefile) + + if not isinstance(self.bind_addr, basestring): + # optional values + # Until we do DNS lookups, omit REMOTE_HOST + if addr is None: # sometimes this can happen + # figure out if AF_INET or AF_INET6. + if len(s.getsockname()) == 2: + # AF_INET + addr = ('0.0.0.0', 0) + else: + # AF_INET6 + addr = ('::', 0) + conn.remote_addr = addr[0] + conn.remote_port = addr[1] + + conn.ssl_env = ssl_env + + try: + self.requests.put(conn) + except queue.Full: + # Just drop the conn. TODO: write 503 back? + conn.close() + return + except socket.timeout: + # The only reason for the timeout in start() is so we can + # notice keyboard interrupts on Win32, which don't interrupt + # accept() by default + return + except socket.error: + x = sys.exc_info()[1] + if self.stats['Enabled']: + self.stats['Socket Errors'] += 1 + if x.args[0] in socket_error_eintr: + # I *think* this is right. EINTR should occur when a signal + # is received during the accept() call; all docs say retry + # the call, and I *think* I'm reading it right that Python + # will then go ahead and poll for and handle the signal + # elsewhere. See + # https://bitbucket.org/cherrypy/cherrypy/issue/707. + return + if x.args[0] in socket_errors_nonblocking: + # Just try again. See + # https://bitbucket.org/cherrypy/cherrypy/issue/479. + return + if x.args[0] in socket_errors_to_ignore: + # Our socket was closed. + # See https://bitbucket.org/cherrypy/cherrypy/issue/686. + return + raise + + def _get_interrupt(self): + return self._interrupt + + def _set_interrupt(self, interrupt): + self._interrupt = True + self.stop() + self._interrupt = interrupt + interrupt = property(_get_interrupt, _set_interrupt, + doc="Set this to an Exception instance to " + "interrupt the server.") + + def stop(self): + """Gracefully shutdown a server that is serving forever.""" + self.ready = False + if self._start_time is not None: + self._run_time += (time.time() - self._start_time) + self._start_time = None + + sock = getattr(self, "socket", None) + if sock: + if not isinstance(self.bind_addr, basestring): + # Touch our own socket to make accept() return immediately. + try: + host, port = sock.getsockname()[:2] + except socket.error: + x = sys.exc_info()[1] + if x.args[0] not in socket_errors_to_ignore: + # Changed to use error code and not message + # See + # https://bitbucket.org/cherrypy/cherrypy/issue/860. + raise + else: + # Note that we're explicitly NOT using AI_PASSIVE, + # here, because we want an actual IP to touch. + # localhost won't work if we've bound to a public IP, + # but it will if we bound to '0.0.0.0' (INADDR_ANY). + for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + s = None + try: + s = socket.socket(af, socktype, proto) + # See + # http://groups.google.com/group/cherrypy-users/ + # browse_frm/thread/bbfe5eb39c904fe0 + s.settimeout(1.0) + s.connect((host, port)) + s.close() + except socket.error: + if s: + s.close() + if hasattr(sock, "close"): + sock.close() + self.socket = None + + self.requests.stop(self.shutdown_timeout) + + +class Gateway(object): + + """A base class to interface HTTPServer with other systems, such as WSGI. + """ + + def __init__(self, req): + self.req = req + + def respond(self): + """Process the current request. Must be overridden in a subclass.""" + raise NotImplemented + + +# These may either be wsgiserver.SSLAdapter subclasses or the string names +# of such classes (in which case they will be lazily loaded). +ssl_adapters = { + 'builtin': 'cherrypy.wsgiserver.ssl_builtin.BuiltinSSLAdapter', +} + + +def get_ssl_adapter_class(name='builtin'): + """Return an SSL adapter class for the given name.""" + adapter = ssl_adapters[name.lower()] + if isinstance(adapter, basestring): + last_dot = adapter.rfind(".") + attr_name = adapter[last_dot + 1:] + mod_path = adapter[:last_dot] + + try: + mod = sys.modules[mod_path] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(mod_path, globals(), locals(), ['']) + + # Let an AttributeError propagate outward. + try: + adapter = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + return adapter + +# ------------------------------- WSGI Stuff -------------------------------- # + + +class CherryPyWSGIServer(HTTPServer): + + """A subclass of HTTPServer which calls a WSGI application.""" + + wsgi_version = (1, 0) + """The version of WSGI to produce.""" + + def __init__(self, bind_addr, wsgi_app, numthreads=10, server_name=None, + max=-1, request_queue_size=5, timeout=10, shutdown_timeout=5, + accepted_queue_size=-1, accepted_queue_timeout=10): + self.requests = ThreadPool(self, min=numthreads or 1, max=max, + accepted_queue_size=accepted_queue_size, + accepted_queue_timeout=accepted_queue_timeout) + self.wsgi_app = wsgi_app + self.gateway = wsgi_gateways[self.wsgi_version] + + self.bind_addr = bind_addr + if not server_name: + server_name = socket.gethostname() + self.server_name = server_name + self.request_queue_size = request_queue_size + + self.timeout = timeout + self.shutdown_timeout = shutdown_timeout + self.clear_stats() + + def _get_numthreads(self): + return self.requests.min + + def _set_numthreads(self, value): + self.requests.min = value + numthreads = property(_get_numthreads, _set_numthreads) + + +class WSGIGateway(Gateway): + + """A base class to interface HTTPServer with WSGI.""" + + def __init__(self, req): + self.req = req + self.started_response = False + self.env = self.get_environ() + self.remaining_bytes_out = None + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + raise NotImplemented + + def respond(self): + """Process the current request.""" + response = self.req.server.wsgi_app(self.env, self.start_response) + try: + for chunk in response: + # "The start_response callable must not actually transmit + # the response headers. Instead, it must store them for the + # server or gateway to transmit only after the first + # iteration of the application return value that yields + # a NON-EMPTY string, or upon the application's first + # invocation of the write() callable." (PEP 333) + if chunk: + if isinstance(chunk, unicodestr): + chunk = chunk.encode('ISO-8859-1') + self.write(chunk) + finally: + if hasattr(response, "close"): + response.close() + + def start_response(self, status, headers, exc_info=None): + """WSGI callable to begin the HTTP response.""" + # "The application may call start_response more than once, + # if and only if the exc_info argument is provided." + if self.started_response and not exc_info: + raise AssertionError("WSGI start_response called a second " + "time with no exc_info.") + self.started_response = True + + # "if exc_info is provided, and the HTTP headers have already been + # sent, start_response must raise an error, and should raise the + # exc_info tuple." + if self.req.sent_headers: + try: + raise exc_info[0](exc_info[1]).with_traceback(exc_info[2]) + finally: + exc_info = None + + # According to PEP 3333, when using Python 3, the response status + # and headers must be bytes masquerading as unicode; that is, they + # must be of type "str" but are restricted to code points in the + # "latin-1" set. + if not isinstance(status, str): + raise TypeError("WSGI response status is not of type str.") + self.req.status = status.encode('ISO-8859-1') + + for k, v in headers: + if not isinstance(k, str): + raise TypeError( + "WSGI response header key %r is not of type str." % k) + if not isinstance(v, str): + raise TypeError( + "WSGI response header value %r is not of type str." % v) + if k.lower() == 'content-length': + self.remaining_bytes_out = int(v) + self.req.outheaders.append( + (k.encode('ISO-8859-1'), v.encode('ISO-8859-1'))) + + return self.write + + def write(self, chunk): + """WSGI callable to write unbuffered data to the client. + + This method is also used internally by start_response (to write + data from the iterable returned by the WSGI application). + """ + if not self.started_response: + raise AssertionError("WSGI write called before start_response.") + + chunklen = len(chunk) + rbo = self.remaining_bytes_out + if rbo is not None and chunklen > rbo: + if not self.req.sent_headers: + # Whew. We can send a 500 to the client. + self.req.simple_response("500 Internal Server Error", + "The requested resource returned " + "more bytes than the declared " + "Content-Length.") + else: + # Dang. We have probably already sent data. Truncate the chunk + # to fit (so the client doesn't hang) and raise an error later. + chunk = chunk[:rbo] + + if not self.req.sent_headers: + self.req.sent_headers = True + self.req.send_headers() + + self.req.write(chunk) + + if rbo is not None: + rbo -= chunklen + if rbo < 0: + raise ValueError( + "Response body exceeds the declared Content-Length.") + + +class WSGIGateway_10(WSGIGateway): + + """A Gateway class to interface HTTPServer with WSGI 1.0.x.""" + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env = { + # set a non-standard environ entry so the WSGI app can know what + # the *real* server protocol is (and what features to support). + # See http://www.faqs.org/rfcs/rfc2145.html. + 'ACTUAL_SERVER_PROTOCOL': req.server.protocol, + 'PATH_INFO': req.path.decode('ISO-8859-1'), + 'QUERY_STRING': req.qs.decode('ISO-8859-1'), + 'REMOTE_ADDR': req.conn.remote_addr or '', + 'REMOTE_PORT': str(req.conn.remote_port or ''), + 'REQUEST_METHOD': req.method.decode('ISO-8859-1'), + 'REQUEST_URI': req.uri.decode('ISO-8859-1'), + 'SCRIPT_NAME': '', + 'SERVER_NAME': req.server.server_name, + # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol. + 'SERVER_PROTOCOL': req.request_protocol.decode('ISO-8859-1'), + 'SERVER_SOFTWARE': req.server.software, + 'wsgi.errors': sys.stderr, + 'wsgi.input': req.rfile, + 'wsgi.multiprocess': False, + 'wsgi.multithread': True, + 'wsgi.run_once': False, + 'wsgi.url_scheme': req.scheme.decode('ISO-8859-1'), + 'wsgi.version': (1, 0), + } + if isinstance(req.server.bind_addr, basestring): + # AF_UNIX. This isn't really allowed by WSGI, which doesn't + # address unix domain sockets. But it's better than nothing. + env["SERVER_PORT"] = "" + else: + env["SERVER_PORT"] = str(req.server.bind_addr[1]) + + # Request headers + for k, v in req.inheaders.items(): + k = k.decode('ISO-8859-1').upper().replace("-", "_") + env["HTTP_" + k] = v.decode('ISO-8859-1') + + # CONTENT_TYPE/CONTENT_LENGTH + ct = env.pop("HTTP_CONTENT_TYPE", None) + if ct is not None: + env["CONTENT_TYPE"] = ct + cl = env.pop("HTTP_CONTENT_LENGTH", None) + if cl is not None: + env["CONTENT_LENGTH"] = cl + + if req.conn.ssl_env: + env.update(req.conn.ssl_env) + + return env + + +class WSGIGateway_u0(WSGIGateway_10): + + """A Gateway class to interface HTTPServer with WSGI u.0. + + WSGI u.0 is an experimental protocol, which uses unicode for keys + and values in both Python 2 and Python 3. + """ + + def get_environ(self): + """Return a new environ dict targeting the given wsgi.version""" + req = self.req + env_10 = WSGIGateway_10.get_environ(self) + env = env_10.copy() + env['wsgi.version'] = ('u', 0) + + # Request-URI + env.setdefault('wsgi.url_encoding', 'utf-8') + try: + # SCRIPT_NAME is the empty string, who cares what encoding it is? + env["PATH_INFO"] = req.path.decode(env['wsgi.url_encoding']) + env["QUERY_STRING"] = req.qs.decode(env['wsgi.url_encoding']) + except UnicodeDecodeError: + # Fall back to latin 1 so apps can transcode if needed. + env['wsgi.url_encoding'] = 'ISO-8859-1' + env["PATH_INFO"] = env_10["PATH_INFO"] + env["QUERY_STRING"] = env_10["QUERY_STRING"] + + return env + +wsgi_gateways = { + (1, 0): WSGIGateway_10, + ('u', 0): WSGIGateway_u0, +} + + +class WSGIPathInfoDispatcher(object): + + """A WSGI dispatcher for dispatch based on the PATH_INFO. + + apps: a dict or list of (path_prefix, app) pairs. + """ + + def __init__(self, apps): + try: + apps = list(apps.items()) + except AttributeError: + pass + + # Sort the apps by len(path), descending + apps.sort() + apps.reverse() + + # The path_prefix strings must start, but not end, with a slash. + # Use "" instead of "/". + self.apps = [(p.rstrip("/"), a) for p, a in apps] + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] or "/" + for p, app in self.apps: + # The apps list should be sorted by length, descending. + if path.startswith(p + "/") or path == p: + environ = environ.copy() + environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p + environ["PATH_INFO"] = path[len(p):] + return app(environ, start_response) + + start_response('404 Not Found', [('Content-Type', 'text/plain'), + ('Content-Length', '0')]) + return [''] diff --git a/lib/concurrent/LICENSE b/lib/concurrent/LICENSE new file mode 100644 index 00000000..c430db0f --- /dev/null +++ b/lib/concurrent/LICENSE @@ -0,0 +1,21 @@ +Copyright 2009 Brian Quinlan. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY BRIAN QUINLAN "AS IS" AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +HALL THE FREEBSD PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/lib/concurrent/__init__.py b/lib/concurrent/__init__.py new file mode 100644 index 00000000..b36383a6 --- /dev/null +++ b/lib/concurrent/__init__.py @@ -0,0 +1,3 @@ +from pkgutil import extend_path + +__path__ = extend_path(__path__, __name__) diff --git a/lib/concurrent/futures/__init__.py b/lib/concurrent/futures/__init__.py new file mode 100644 index 00000000..fef52819 --- /dev/null +++ b/lib/concurrent/futures/__init__.py @@ -0,0 +1,23 @@ +# Copyright 2009 Brian Quinlan. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Execute computations asynchronously using threads or processes.""" + +__author__ = 'Brian Quinlan (brian@sweetapp.com)' + +from concurrent.futures._base import (FIRST_COMPLETED, + FIRST_EXCEPTION, + ALL_COMPLETED, + CancelledError, + TimeoutError, + Future, + Executor, + wait, + as_completed) +from concurrent.futures.thread import ThreadPoolExecutor + +# Jython doesn't have multiprocessing +try: + from concurrent.futures.process import ProcessPoolExecutor +except ImportError: + pass diff --git a/lib/concurrent/futures/_base.py b/lib/concurrent/futures/_base.py new file mode 100644 index 00000000..6f0c0f3b --- /dev/null +++ b/lib/concurrent/futures/_base.py @@ -0,0 +1,605 @@ +# Copyright 2009 Brian Quinlan. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +from __future__ import with_statement +import logging +import threading +import time + +from concurrent.futures._compat import reraise + +try: + from collections import namedtuple +except ImportError: + from concurrent.futures._compat import namedtuple + +__author__ = 'Brian Quinlan (brian@sweetapp.com)' + +FIRST_COMPLETED = 'FIRST_COMPLETED' +FIRST_EXCEPTION = 'FIRST_EXCEPTION' +ALL_COMPLETED = 'ALL_COMPLETED' +_AS_COMPLETED = '_AS_COMPLETED' + +# Possible future states (for internal use by the futures package). +PENDING = 'PENDING' +RUNNING = 'RUNNING' +# The future was cancelled by the user... +CANCELLED = 'CANCELLED' +# ...and _Waiter.add_cancelled() was called by a worker. +CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED' +FINISHED = 'FINISHED' + +_FUTURE_STATES = [ + PENDING, + RUNNING, + CANCELLED, + CANCELLED_AND_NOTIFIED, + FINISHED +] + +_STATE_TO_DESCRIPTION_MAP = { + PENDING: "pending", + RUNNING: "running", + CANCELLED: "cancelled", + CANCELLED_AND_NOTIFIED: "cancelled", + FINISHED: "finished" +} + +# Logger for internal use by the futures package. +LOGGER = logging.getLogger("concurrent.futures") + +class Error(Exception): + """Base class for all future-related exceptions.""" + pass + +class CancelledError(Error): + """The Future was cancelled.""" + pass + +class TimeoutError(Error): + """The operation exceeded the given deadline.""" + pass + +class _Waiter(object): + """Provides the event that wait() and as_completed() block on.""" + def __init__(self): + self.event = threading.Event() + self.finished_futures = [] + + def add_result(self, future): + self.finished_futures.append(future) + + def add_exception(self, future): + self.finished_futures.append(future) + + def add_cancelled(self, future): + self.finished_futures.append(future) + +class _AsCompletedWaiter(_Waiter): + """Used by as_completed().""" + + def __init__(self): + super(_AsCompletedWaiter, self).__init__() + self.lock = threading.Lock() + + def add_result(self, future): + with self.lock: + super(_AsCompletedWaiter, self).add_result(future) + self.event.set() + + def add_exception(self, future): + with self.lock: + super(_AsCompletedWaiter, self).add_exception(future) + self.event.set() + + def add_cancelled(self, future): + with self.lock: + super(_AsCompletedWaiter, self).add_cancelled(future) + self.event.set() + +class _FirstCompletedWaiter(_Waiter): + """Used by wait(return_when=FIRST_COMPLETED).""" + + def add_result(self, future): + super(_FirstCompletedWaiter, self).add_result(future) + self.event.set() + + def add_exception(self, future): + super(_FirstCompletedWaiter, self).add_exception(future) + self.event.set() + + def add_cancelled(self, future): + super(_FirstCompletedWaiter, self).add_cancelled(future) + self.event.set() + +class _AllCompletedWaiter(_Waiter): + """Used by wait(return_when=FIRST_EXCEPTION and ALL_COMPLETED).""" + + def __init__(self, num_pending_calls, stop_on_exception): + self.num_pending_calls = num_pending_calls + self.stop_on_exception = stop_on_exception + self.lock = threading.Lock() + super(_AllCompletedWaiter, self).__init__() + + def _decrement_pending_calls(self): + with self.lock: + self.num_pending_calls -= 1 + if not self.num_pending_calls: + self.event.set() + + def add_result(self, future): + super(_AllCompletedWaiter, self).add_result(future) + self._decrement_pending_calls() + + def add_exception(self, future): + super(_AllCompletedWaiter, self).add_exception(future) + if self.stop_on_exception: + self.event.set() + else: + self._decrement_pending_calls() + + def add_cancelled(self, future): + super(_AllCompletedWaiter, self).add_cancelled(future) + self._decrement_pending_calls() + +class _AcquireFutures(object): + """A context manager that does an ordered acquire of Future conditions.""" + + def __init__(self, futures): + self.futures = sorted(futures, key=id) + + def __enter__(self): + for future in self.futures: + future._condition.acquire() + + def __exit__(self, *args): + for future in self.futures: + future._condition.release() + +def _create_and_install_waiters(fs, return_when): + if return_when == _AS_COMPLETED: + waiter = _AsCompletedWaiter() + elif return_when == FIRST_COMPLETED: + waiter = _FirstCompletedWaiter() + else: + pending_count = sum( + f._state not in [CANCELLED_AND_NOTIFIED, FINISHED] for f in fs) + + if return_when == FIRST_EXCEPTION: + waiter = _AllCompletedWaiter(pending_count, stop_on_exception=True) + elif return_when == ALL_COMPLETED: + waiter = _AllCompletedWaiter(pending_count, stop_on_exception=False) + else: + raise ValueError("Invalid return condition: %r" % return_when) + + for f in fs: + f._waiters.append(waiter) + + return waiter + +def as_completed(fs, timeout=None): + """An iterator over the given futures that yields each as it completes. + + Args: + fs: The sequence of Futures (possibly created by different Executors) to + iterate over. + timeout: The maximum number of seconds to wait. If None, then there + is no limit on the wait time. + + Returns: + An iterator that yields the given Futures as they complete (finished or + cancelled). + + Raises: + TimeoutError: If the entire result iterator could not be generated + before the given timeout. + """ + if timeout is not None: + end_time = timeout + time.time() + + with _AcquireFutures(fs): + finished = set( + f for f in fs + if f._state in [CANCELLED_AND_NOTIFIED, FINISHED]) + pending = set(fs) - finished + waiter = _create_and_install_waiters(fs, _AS_COMPLETED) + + try: + for future in finished: + yield future + + while pending: + if timeout is None: + wait_timeout = None + else: + wait_timeout = end_time - time.time() + if wait_timeout < 0: + raise TimeoutError( + '%d (of %d) futures unfinished' % ( + len(pending), len(fs))) + + waiter.event.wait(wait_timeout) + + with waiter.lock: + finished = waiter.finished_futures + waiter.finished_futures = [] + waiter.event.clear() + + for future in finished: + yield future + pending.remove(future) + + finally: + for f in fs: + f._waiters.remove(waiter) + +DoneAndNotDoneFutures = namedtuple( + 'DoneAndNotDoneFutures', 'done not_done') +def wait(fs, timeout=None, return_when=ALL_COMPLETED): + """Wait for the futures in the given sequence to complete. + + Args: + fs: The sequence of Futures (possibly created by different Executors) to + wait upon. + timeout: The maximum number of seconds to wait. If None, then there + is no limit on the wait time. + return_when: Indicates when this function should return. The options + are: + + FIRST_COMPLETED - Return when any future finishes or is + cancelled. + FIRST_EXCEPTION - Return when any future finishes by raising an + exception. If no future raises an exception + then it is equivalent to ALL_COMPLETED. + ALL_COMPLETED - Return when all futures finish or are cancelled. + + Returns: + A named 2-tuple of sets. The first set, named 'done', contains the + futures that completed (is finished or cancelled) before the wait + completed. The second set, named 'not_done', contains uncompleted + futures. + """ + with _AcquireFutures(fs): + done = set(f for f in fs + if f._state in [CANCELLED_AND_NOTIFIED, FINISHED]) + not_done = set(fs) - done + + if (return_when == FIRST_COMPLETED) and done: + return DoneAndNotDoneFutures(done, not_done) + elif (return_when == FIRST_EXCEPTION) and done: + if any(f for f in done + if not f.cancelled() and f.exception() is not None): + return DoneAndNotDoneFutures(done, not_done) + + if len(done) == len(fs): + return DoneAndNotDoneFutures(done, not_done) + + waiter = _create_and_install_waiters(fs, return_when) + + waiter.event.wait(timeout) + for f in fs: + f._waiters.remove(waiter) + + done.update(waiter.finished_futures) + return DoneAndNotDoneFutures(done, set(fs) - done) + +class Future(object): + """Represents the result of an asynchronous computation.""" + + def __init__(self): + """Initializes the future. Should not be called by clients.""" + self._condition = threading.Condition() + self._state = PENDING + self._result = None + self._exception = None + self._traceback = None + self._waiters = [] + self._done_callbacks = [] + + def _invoke_callbacks(self): + for callback in self._done_callbacks: + try: + callback(self) + except Exception: + LOGGER.exception('exception calling callback for %r', self) + + def __repr__(self): + with self._condition: + if self._state == FINISHED: + if self._exception: + return '' % ( + hex(id(self)), + _STATE_TO_DESCRIPTION_MAP[self._state], + self._exception.__class__.__name__) + else: + return '' % ( + hex(id(self)), + _STATE_TO_DESCRIPTION_MAP[self._state], + self._result.__class__.__name__) + return '' % ( + hex(id(self)), + _STATE_TO_DESCRIPTION_MAP[self._state]) + + def cancel(self): + """Cancel the future if possible. + + Returns True if the future was cancelled, False otherwise. A future + cannot be cancelled if it is running or has already completed. + """ + with self._condition: + if self._state in [RUNNING, FINISHED]: + return False + + if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: + return True + + self._state = CANCELLED + self._condition.notify_all() + + self._invoke_callbacks() + return True + + def cancelled(self): + """Return True if the future has cancelled.""" + with self._condition: + return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED] + + def running(self): + """Return True if the future is currently executing.""" + with self._condition: + return self._state == RUNNING + + def done(self): + """Return True of the future was cancelled or finished executing.""" + with self._condition: + return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED] + + def __get_result(self): + if self._exception: + reraise(self._exception, self._traceback) + else: + return self._result + + def add_done_callback(self, fn): + """Attaches a callable that will be called when the future finishes. + + Args: + fn: A callable that will be called with this future as its only + argument when the future completes or is cancelled. The callable + will always be called by a thread in the same process in which + it was added. If the future has already completed or been + cancelled then the callable will be called immediately. These + callables are called in the order that they were added. + """ + with self._condition: + if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]: + self._done_callbacks.append(fn) + return + fn(self) + + def result(self, timeout=None): + """Return the result of the call that the future represents. + + Args: + timeout: The number of seconds to wait for the result if the future + isn't done. If None, then there is no limit on the wait time. + + Returns: + The result of the call that the future represents. + + Raises: + CancelledError: If the future was cancelled. + TimeoutError: If the future didn't finish executing before the given + timeout. + Exception: If the call raised then that exception will be raised. + """ + with self._condition: + if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: + raise CancelledError() + elif self._state == FINISHED: + return self.__get_result() + + self._condition.wait(timeout) + + if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: + raise CancelledError() + elif self._state == FINISHED: + return self.__get_result() + else: + raise TimeoutError() + + def exception_info(self, timeout=None): + """Return a tuple of (exception, traceback) raised by the call that the + future represents. + + Args: + timeout: The number of seconds to wait for the exception if the + future isn't done. If None, then there is no limit on the wait + time. + + Returns: + The exception raised by the call that the future represents or None + if the call completed without raising. + + Raises: + CancelledError: If the future was cancelled. + TimeoutError: If the future didn't finish executing before the given + timeout. + """ + with self._condition: + if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: + raise CancelledError() + elif self._state == FINISHED: + return self._exception, self._traceback + + self._condition.wait(timeout) + + if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: + raise CancelledError() + elif self._state == FINISHED: + return self._exception, self._traceback + else: + raise TimeoutError() + + def exception(self, timeout=None): + """Return the exception raised by the call that the future represents. + + Args: + timeout: The number of seconds to wait for the exception if the + future isn't done. If None, then there is no limit on the wait + time. + + Returns: + The exception raised by the call that the future represents or None + if the call completed without raising. + + Raises: + CancelledError: If the future was cancelled. + TimeoutError: If the future didn't finish executing before the given + timeout. + """ + return self.exception_info(timeout)[0] + + # The following methods should only be used by Executors and in tests. + def set_running_or_notify_cancel(self): + """Mark the future as running or process any cancel notifications. + + Should only be used by Executor implementations and unit tests. + + If the future has been cancelled (cancel() was called and returned + True) then any threads waiting on the future completing (though calls + to as_completed() or wait()) are notified and False is returned. + + If the future was not cancelled then it is put in the running state + (future calls to running() will return True) and True is returned. + + This method should be called by Executor implementations before + executing the work associated with this future. If this method returns + False then the work should not be executed. + + Returns: + False if the Future was cancelled, True otherwise. + + Raises: + RuntimeError: if this method was already called or if set_result() + or set_exception() was called. + """ + with self._condition: + if self._state == CANCELLED: + self._state = CANCELLED_AND_NOTIFIED + for waiter in self._waiters: + waiter.add_cancelled(self) + # self._condition.notify_all() is not necessary because + # self.cancel() triggers a notification. + return False + elif self._state == PENDING: + self._state = RUNNING + return True + else: + LOGGER.critical('Future %s in unexpected state: %s', + id(self.future), + self.future._state) + raise RuntimeError('Future in unexpected state') + + def set_result(self, result): + """Sets the return value of work associated with the future. + + Should only be used by Executor implementations and unit tests. + """ + with self._condition: + self._result = result + self._state = FINISHED + for waiter in self._waiters: + waiter.add_result(self) + self._condition.notify_all() + self._invoke_callbacks() + + def set_exception_info(self, exception, traceback): + """Sets the result of the future as being the given exception + and traceback. + + Should only be used by Executor implementations and unit tests. + """ + with self._condition: + self._exception = exception + self._traceback = traceback + self._state = FINISHED + for waiter in self._waiters: + waiter.add_exception(self) + self._condition.notify_all() + self._invoke_callbacks() + + def set_exception(self, exception): + """Sets the result of the future as being the given exception. + + Should only be used by Executor implementations and unit tests. + """ + self.set_exception_info(exception, None) + +class Executor(object): + """This is an abstract base class for concrete asynchronous executors.""" + + def submit(self, fn, *args, **kwargs): + """Submits a callable to be executed with the given arguments. + + Schedules the callable to be executed as fn(*args, **kwargs) and returns + a Future instance representing the execution of the callable. + + Returns: + A Future representing the given call. + """ + raise NotImplementedError() + + def map(self, fn, *iterables, **kwargs): + """Returns a iterator equivalent to map(fn, iter). + + Args: + fn: A callable that will take as many arguments as there are + passed iterables. + timeout: The maximum number of seconds to wait. If None, then there + is no limit on the wait time. + + Returns: + An iterator equivalent to: map(func, *iterables) but the calls may + be evaluated out-of-order. + + Raises: + TimeoutError: If the entire result iterator could not be generated + before the given timeout. + Exception: If fn(*args) raises for any values. + """ + timeout = kwargs.get('timeout') + if timeout is not None: + end_time = timeout + time.time() + + fs = [self.submit(fn, *args) for args in zip(*iterables)] + + try: + for future in fs: + if timeout is None: + yield future.result() + else: + yield future.result(end_time - time.time()) + finally: + for future in fs: + future.cancel() + + def shutdown(self, wait=True): + """Clean-up the resources associated with the Executor. + + It is safe to call this method several times. Otherwise, no other + methods can be called after this one. + + Args: + wait: If True then shutdown will not return until all running + futures have finished executing and the resources used by the + executor have been reclaimed. + """ + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.shutdown(wait=True) + return False diff --git a/lib/concurrent/futures/_compat.py b/lib/concurrent/futures/_compat.py new file mode 100644 index 00000000..e77cf0e5 --- /dev/null +++ b/lib/concurrent/futures/_compat.py @@ -0,0 +1,111 @@ +from keyword import iskeyword as _iskeyword +from operator import itemgetter as _itemgetter +import sys as _sys + + +def namedtuple(typename, field_names): + """Returns a new subclass of tuple with named fields. + + >>> Point = namedtuple('Point', 'x y') + >>> Point.__doc__ # docstring for the new class + 'Point(x, y)' + >>> p = Point(11, y=22) # instantiate with positional args or keywords + >>> p[0] + p[1] # indexable like a plain tuple + 33 + >>> x, y = p # unpack like a regular tuple + >>> x, y + (11, 22) + >>> p.x + p.y # fields also accessable by name + 33 + >>> d = p._asdict() # convert to a dictionary + >>> d['x'] + 11 + >>> Point(**d) # convert from a dictionary + Point(x=11, y=22) + >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields + Point(x=100, y=22) + + """ + + # Parse and validate the field names. Validation serves two purposes, + # generating informative error messages and preventing template injection attacks. + if isinstance(field_names, basestring): + field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas + field_names = tuple(map(str, field_names)) + for name in (typename,) + field_names: + if not all(c.isalnum() or c=='_' for c in name): + raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name) + if _iskeyword(name): + raise ValueError('Type names and field names cannot be a keyword: %r' % name) + if name[0].isdigit(): + raise ValueError('Type names and field names cannot start with a number: %r' % name) + seen_names = set() + for name in field_names: + if name.startswith('_'): + raise ValueError('Field names cannot start with an underscore: %r' % name) + if name in seen_names: + raise ValueError('Encountered duplicate field name: %r' % name) + seen_names.add(name) + + # Create and fill-in the class template + numfields = len(field_names) + argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes + reprtxt = ', '.join('%s=%%r' % name for name in field_names) + dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names)) + template = '''class %(typename)s(tuple): + '%(typename)s(%(argtxt)s)' \n + __slots__ = () \n + _fields = %(field_names)r \n + def __new__(_cls, %(argtxt)s): + return _tuple.__new__(_cls, (%(argtxt)s)) \n + @classmethod + def _make(cls, iterable, new=tuple.__new__, len=len): + 'Make a new %(typename)s object from a sequence or iterable' + result = new(cls, iterable) + if len(result) != %(numfields)d: + raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result)) + return result \n + def __repr__(self): + return '%(typename)s(%(reprtxt)s)' %% self \n + def _asdict(t): + 'Return a new dict which maps field names to their values' + return {%(dicttxt)s} \n + def _replace(_self, **kwds): + 'Return a new %(typename)s object replacing specified fields with new values' + result = _self._make(map(kwds.pop, %(field_names)r, _self)) + if kwds: + raise ValueError('Got unexpected field names: %%r' %% kwds.keys()) + return result \n + def __getnewargs__(self): + return tuple(self) \n\n''' % locals() + for i, name in enumerate(field_names): + template += ' %s = _property(_itemgetter(%d))\n' % (name, i) + + # Execute the template string in a temporary namespace and + # support tracing utilities by setting a value for frame.f_globals['__name__'] + namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename, + _property=property, _tuple=tuple) + try: + exec(template, namespace) + except SyntaxError: + e = _sys.exc_info()[1] + raise SyntaxError(e.message + ':\n' + template) + result = namespace[typename] + + # For pickling to work, the __module__ variable needs to be set to the frame + # where the named tuple is created. Bypass this step in enviroments where + # sys._getframe is not defined (Jython for example). + if hasattr(_sys, '_getframe'): + result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__') + + return result + + +if _sys.version_info[0] < 3: + def reraise(exc, traceback): + locals_ = {'exc_type': type(exc), 'exc_value': exc, 'traceback': traceback} + exec('raise exc_type, exc_value, traceback', {}, locals_) +else: + def reraise(exc, traceback): + # Tracebacks are embedded in exceptions in Python 3 + raise exc diff --git a/lib/concurrent/futures/process.py b/lib/concurrent/futures/process.py new file mode 100644 index 00000000..98684f8e --- /dev/null +++ b/lib/concurrent/futures/process.py @@ -0,0 +1,363 @@ +# Copyright 2009 Brian Quinlan. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Implements ProcessPoolExecutor. + +The follow diagram and text describe the data-flow through the system: + +|======================= In-process =====================|== Out-of-process ==| + ++----------+ +----------+ +--------+ +-----------+ +---------+ +| | => | Work Ids | => | | => | Call Q | => | | +| | +----------+ | | +-----------+ | | +| | | ... | | | | ... | | | +| | | 6 | | | | 5, call() | | | +| | | 7 | | | | ... | | | +| Process | | ... | | Local | +-----------+ | Process | +| Pool | +----------+ | Worker | | #1..n | +| Executor | | Thread | | | +| | +----------- + | | +-----------+ | | +| | <=> | Work Items | <=> | | <= | Result Q | <= | | +| | +------------+ | | +-----------+ | | +| | | 6: call() | | | | ... | | | +| | | future | | | | 4, result | | | +| | | ... | | | | 3, except | | | ++----------+ +------------+ +--------+ +-----------+ +---------+ + +Executor.submit() called: +- creates a uniquely numbered _WorkItem and adds it to the "Work Items" dict +- adds the id of the _WorkItem to the "Work Ids" queue + +Local worker thread: +- reads work ids from the "Work Ids" queue and looks up the corresponding + WorkItem from the "Work Items" dict: if the work item has been cancelled then + it is simply removed from the dict, otherwise it is repackaged as a + _CallItem and put in the "Call Q". New _CallItems are put in the "Call Q" + until "Call Q" is full. NOTE: the size of the "Call Q" is kept small because + calls placed in the "Call Q" can no longer be cancelled with Future.cancel(). +- reads _ResultItems from "Result Q", updates the future stored in the + "Work Items" dict and deletes the dict entry + +Process #1..n: +- reads _CallItems from "Call Q", executes the calls, and puts the resulting + _ResultItems in "Request Q" +""" + +from __future__ import with_statement +import atexit +import multiprocessing +import threading +import weakref +import sys + +from concurrent.futures import _base + +try: + import queue +except ImportError: + import Queue as queue + +__author__ = 'Brian Quinlan (brian@sweetapp.com)' + +# Workers are created as daemon threads and processes. This is done to allow the +# interpreter to exit when there are still idle processes in a +# ProcessPoolExecutor's process pool (i.e. shutdown() was not called). However, +# allowing workers to die with the interpreter has two undesirable properties: +# - The workers would still be running during interpretor shutdown, +# meaning that they would fail in unpredictable ways. +# - The workers could be killed while evaluating a work item, which could +# be bad if the callable being evaluated has external side-effects e.g. +# writing to a file. +# +# To work around this problem, an exit handler is installed which tells the +# workers to exit when their work queues are empty and then waits until the +# threads/processes finish. + +_threads_queues = weakref.WeakKeyDictionary() +_shutdown = False + +def _python_exit(): + global _shutdown + _shutdown = True + items = list(_threads_queues.items()) + for t, q in items: + q.put(None) + for t, q in items: + t.join() + +# Controls how many more calls than processes will be queued in the call queue. +# A smaller number will mean that processes spend more time idle waiting for +# work while a larger number will make Future.cancel() succeed less frequently +# (Futures in the call queue cannot be cancelled). +EXTRA_QUEUED_CALLS = 1 + +class _WorkItem(object): + def __init__(self, future, fn, args, kwargs): + self.future = future + self.fn = fn + self.args = args + self.kwargs = kwargs + +class _ResultItem(object): + def __init__(self, work_id, exception=None, result=None): + self.work_id = work_id + self.exception = exception + self.result = result + +class _CallItem(object): + def __init__(self, work_id, fn, args, kwargs): + self.work_id = work_id + self.fn = fn + self.args = args + self.kwargs = kwargs + +def _process_worker(call_queue, result_queue): + """Evaluates calls from call_queue and places the results in result_queue. + + This worker is run in a separate process. + + Args: + call_queue: A multiprocessing.Queue of _CallItems that will be read and + evaluated by the worker. + result_queue: A multiprocessing.Queue of _ResultItems that will written + to by the worker. + shutdown: A multiprocessing.Event that will be set as a signal to the + worker that it should exit when call_queue is empty. + """ + while True: + call_item = call_queue.get(block=True) + if call_item is None: + # Wake up queue management thread + result_queue.put(None) + return + try: + r = call_item.fn(*call_item.args, **call_item.kwargs) + except BaseException: + e = sys.exc_info()[1] + result_queue.put(_ResultItem(call_item.work_id, + exception=e)) + else: + result_queue.put(_ResultItem(call_item.work_id, + result=r)) + +def _add_call_item_to_queue(pending_work_items, + work_ids, + call_queue): + """Fills call_queue with _WorkItems from pending_work_items. + + This function never blocks. + + Args: + pending_work_items: A dict mapping work ids to _WorkItems e.g. + {5: <_WorkItem...>, 6: <_WorkItem...>, ...} + work_ids: A queue.Queue of work ids e.g. Queue([5, 6, ...]). Work ids + are consumed and the corresponding _WorkItems from + pending_work_items are transformed into _CallItems and put in + call_queue. + call_queue: A multiprocessing.Queue that will be filled with _CallItems + derived from _WorkItems. + """ + while True: + if call_queue.full(): + return + try: + work_id = work_ids.get(block=False) + except queue.Empty: + return + else: + work_item = pending_work_items[work_id] + + if work_item.future.set_running_or_notify_cancel(): + call_queue.put(_CallItem(work_id, + work_item.fn, + work_item.args, + work_item.kwargs), + block=True) + else: + del pending_work_items[work_id] + continue + +def _queue_management_worker(executor_reference, + processes, + pending_work_items, + work_ids_queue, + call_queue, + result_queue): + """Manages the communication between this process and the worker processes. + + This function is run in a local thread. + + Args: + executor_reference: A weakref.ref to the ProcessPoolExecutor that owns + this thread. Used to determine if the ProcessPoolExecutor has been + garbage collected and that this function can exit. + process: A list of the multiprocessing.Process instances used as + workers. + pending_work_items: A dict mapping work ids to _WorkItems e.g. + {5: <_WorkItem...>, 6: <_WorkItem...>, ...} + work_ids_queue: A queue.Queue of work ids e.g. Queue([5, 6, ...]). + call_queue: A multiprocessing.Queue that will be filled with _CallItems + derived from _WorkItems for processing by the process workers. + result_queue: A multiprocessing.Queue of _ResultItems generated by the + process workers. + """ + nb_shutdown_processes = [0] + def shutdown_one_process(): + """Tell a worker to terminate, which will in turn wake us again""" + call_queue.put(None) + nb_shutdown_processes[0] += 1 + while True: + _add_call_item_to_queue(pending_work_items, + work_ids_queue, + call_queue) + + result_item = result_queue.get(block=True) + if result_item is not None: + work_item = pending_work_items[result_item.work_id] + del pending_work_items[result_item.work_id] + + if result_item.exception: + work_item.future.set_exception(result_item.exception) + else: + work_item.future.set_result(result_item.result) + # Check whether we should start shutting down. + executor = executor_reference() + # No more work items can be added if: + # - The interpreter is shutting down OR + # - The executor that owns this worker has been collected OR + # - The executor that owns this worker has been shutdown. + if _shutdown or executor is None or executor._shutdown_thread: + # Since no new work items can be added, it is safe to shutdown + # this thread if there are no pending work items. + if not pending_work_items: + while nb_shutdown_processes[0] < len(processes): + shutdown_one_process() + # If .join() is not called on the created processes then + # some multiprocessing.Queue methods may deadlock on Mac OS + # X. + for p in processes: + p.join() + call_queue.close() + return + del executor + +_system_limits_checked = False +_system_limited = None +def _check_system_limits(): + global _system_limits_checked, _system_limited + if _system_limits_checked: + if _system_limited: + raise NotImplementedError(_system_limited) + _system_limits_checked = True + try: + import os + nsems_max = os.sysconf("SC_SEM_NSEMS_MAX") + except (AttributeError, ValueError): + # sysconf not available or setting not available + return + if nsems_max == -1: + # indetermine limit, assume that limit is determined + # by available memory only + return + if nsems_max >= 256: + # minimum number of semaphores available + # according to POSIX + return + _system_limited = "system provides too few semaphores (%d available, 256 necessary)" % nsems_max + raise NotImplementedError(_system_limited) + +class ProcessPoolExecutor(_base.Executor): + def __init__(self, max_workers=None): + """Initializes a new ProcessPoolExecutor instance. + + Args: + max_workers: The maximum number of processes that can be used to + execute the given calls. If None or not given then as many + worker processes will be created as the machine has processors. + """ + _check_system_limits() + + if max_workers is None: + self._max_workers = multiprocessing.cpu_count() + else: + self._max_workers = max_workers + + # Make the call queue slightly larger than the number of processes to + # prevent the worker processes from idling. But don't make it too big + # because futures in the call queue cannot be cancelled. + self._call_queue = multiprocessing.Queue(self._max_workers + + EXTRA_QUEUED_CALLS) + self._result_queue = multiprocessing.Queue() + self._work_ids = queue.Queue() + self._queue_management_thread = None + self._processes = set() + + # Shutdown is a two-step process. + self._shutdown_thread = False + self._shutdown_lock = threading.Lock() + self._queue_count = 0 + self._pending_work_items = {} + + def _start_queue_management_thread(self): + # When the executor gets lost, the weakref callback will wake up + # the queue management thread. + def weakref_cb(_, q=self._result_queue): + q.put(None) + if self._queue_management_thread is None: + self._queue_management_thread = threading.Thread( + target=_queue_management_worker, + args=(weakref.ref(self, weakref_cb), + self._processes, + self._pending_work_items, + self._work_ids, + self._call_queue, + self._result_queue)) + self._queue_management_thread.daemon = True + self._queue_management_thread.start() + _threads_queues[self._queue_management_thread] = self._result_queue + + def _adjust_process_count(self): + for _ in range(len(self._processes), self._max_workers): + p = multiprocessing.Process( + target=_process_worker, + args=(self._call_queue, + self._result_queue)) + p.start() + self._processes.add(p) + + def submit(self, fn, *args, **kwargs): + with self._shutdown_lock: + if self._shutdown_thread: + raise RuntimeError('cannot schedule new futures after shutdown') + + f = _base.Future() + w = _WorkItem(f, fn, args, kwargs) + + self._pending_work_items[self._queue_count] = w + self._work_ids.put(self._queue_count) + self._queue_count += 1 + # Wake up queue management thread + self._result_queue.put(None) + + self._start_queue_management_thread() + self._adjust_process_count() + return f + submit.__doc__ = _base.Executor.submit.__doc__ + + def shutdown(self, wait=True): + with self._shutdown_lock: + self._shutdown_thread = True + if self._queue_management_thread: + # Wake up queue management thread + self._result_queue.put(None) + if wait: + self._queue_management_thread.join() + # To reduce the risk of openning too many files, remove references to + # objects that use file descriptors. + self._queue_management_thread = None + self._call_queue = None + self._result_queue = None + self._processes = None + shutdown.__doc__ = _base.Executor.shutdown.__doc__ + +atexit.register(_python_exit) diff --git a/lib/concurrent/futures/thread.py b/lib/concurrent/futures/thread.py new file mode 100644 index 00000000..930d1673 --- /dev/null +++ b/lib/concurrent/futures/thread.py @@ -0,0 +1,138 @@ +# Copyright 2009 Brian Quinlan. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Implements ThreadPoolExecutor.""" + +from __future__ import with_statement +import atexit +import threading +import weakref +import sys + +from concurrent.futures import _base + +try: + import queue +except ImportError: + import Queue as queue + +__author__ = 'Brian Quinlan (brian@sweetapp.com)' + +# Workers are created as daemon threads. This is done to allow the interpreter +# to exit when there are still idle threads in a ThreadPoolExecutor's thread +# pool (i.e. shutdown() was not called). However, allowing workers to die with +# the interpreter has two undesirable properties: +# - The workers would still be running during interpretor shutdown, +# meaning that they would fail in unpredictable ways. +# - The workers could be killed while evaluating a work item, which could +# be bad if the callable being evaluated has external side-effects e.g. +# writing to a file. +# +# To work around this problem, an exit handler is installed which tells the +# workers to exit when their work queues are empty and then waits until the +# threads finish. + +_threads_queues = weakref.WeakKeyDictionary() +_shutdown = False + +def _python_exit(): + global _shutdown + _shutdown = True + items = list(_threads_queues.items()) + for t, q in items: + q.put(None) + for t, q in items: + t.join() + +atexit.register(_python_exit) + +class _WorkItem(object): + def __init__(self, future, fn, args, kwargs): + self.future = future + self.fn = fn + self.args = args + self.kwargs = kwargs + + def run(self): + if not self.future.set_running_or_notify_cancel(): + return + + try: + result = self.fn(*self.args, **self.kwargs) + except BaseException: + e, tb = sys.exc_info()[1:] + self.future.set_exception_info(e, tb) + else: + self.future.set_result(result) + +def _worker(executor_reference, work_queue): + try: + while True: + work_item = work_queue.get(block=True) + if work_item is not None: + work_item.run() + continue + executor = executor_reference() + # Exit if: + # - The interpreter is shutting down OR + # - The executor that owns the worker has been collected OR + # - The executor that owns the worker has been shutdown. + if _shutdown or executor is None or executor._shutdown: + # Notice other workers + work_queue.put(None) + return + del executor + except BaseException: + _base.LOGGER.critical('Exception in worker', exc_info=True) + +class ThreadPoolExecutor(_base.Executor): + def __init__(self, max_workers): + """Initializes a new ThreadPoolExecutor instance. + + Args: + max_workers: The maximum number of threads that can be used to + execute the given calls. + """ + self._max_workers = max_workers + self._work_queue = queue.Queue() + self._threads = set() + self._shutdown = False + self._shutdown_lock = threading.Lock() + + def submit(self, fn, *args, **kwargs): + with self._shutdown_lock: + if self._shutdown: + raise RuntimeError('cannot schedule new futures after shutdown') + + f = _base.Future() + w = _WorkItem(f, fn, args, kwargs) + + self._work_queue.put(w) + self._adjust_thread_count() + return f + submit.__doc__ = _base.Executor.submit.__doc__ + + def _adjust_thread_count(self): + # When the executor gets lost, the weakref callback will wake up + # the worker threads. + def weakref_cb(_, q=self._work_queue): + q.put(None) + # TODO(bquinlan): Should avoid creating new threads if there are more + # idle threads than items in the work queue. + if len(self._threads) < self._max_workers: + t = threading.Thread(target=_worker, + args=(weakref.ref(self, weakref_cb), + self._work_queue)) + t.daemon = True + t.start() + self._threads.add(t) + _threads_queues[t] = self._work_queue + + def shutdown(self, wait=True): + with self._shutdown_lock: + self._shutdown = True + self._work_queue.put(None) + if wait: + for t in self._threads: + t.join() + shutdown.__doc__ = _base.Executor.shutdown.__doc__ diff --git a/lib/configobj.py b/lib/configobj.py new file mode 100644 index 00000000..eae556a7 --- /dev/null +++ b/lib/configobj.py @@ -0,0 +1,2468 @@ +# configobj.py +# A config file reader/writer that supports nested sections in config files. +# Copyright (C) 2005-2010 Michael Foord, Nicola Larosa +# E-mail: fuzzyman AT voidspace DOT org DOT uk +# nico AT tekNico DOT net + +# ConfigObj 4 +# http://www.voidspace.org.uk/python/configobj.html + +# Released subject to the BSD License +# Please see http://www.voidspace.org.uk/python/license.shtml + +# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml +# For information about bugfixes, updates and support, please join the +# ConfigObj mailing list: +# http://lists.sourceforge.net/lists/listinfo/configobj-develop +# Comments, suggestions and bug reports welcome. + +from __future__ import generators + +import os +import re +import sys + +from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF16_BE, BOM_UTF16_LE + + +# imported lazily to avoid startup performance hit if it isn't used +compiler = None + +# A dictionary mapping BOM to +# the encoding to decode with, and what to set the +# encoding attribute to. +BOMS = { + BOM_UTF8: ('utf_8', None), + BOM_UTF16_BE: ('utf16_be', 'utf_16'), + BOM_UTF16_LE: ('utf16_le', 'utf_16'), + BOM_UTF16: ('utf_16', 'utf_16'), + } +# All legal variants of the BOM codecs. +# TODO: the list of aliases is not meant to be exhaustive, is there a +# better way ? +BOM_LIST = { + 'utf_16': 'utf_16', + 'u16': 'utf_16', + 'utf16': 'utf_16', + 'utf-16': 'utf_16', + 'utf16_be': 'utf16_be', + 'utf_16_be': 'utf16_be', + 'utf-16be': 'utf16_be', + 'utf16_le': 'utf16_le', + 'utf_16_le': 'utf16_le', + 'utf-16le': 'utf16_le', + 'utf_8': 'utf_8', + 'u8': 'utf_8', + 'utf': 'utf_8', + 'utf8': 'utf_8', + 'utf-8': 'utf_8', + } + +# Map of encodings to the BOM to write. +BOM_SET = { + 'utf_8': BOM_UTF8, + 'utf_16': BOM_UTF16, + 'utf16_be': BOM_UTF16_BE, + 'utf16_le': BOM_UTF16_LE, + None: BOM_UTF8 + } + + +def match_utf8(encoding): + return BOM_LIST.get(encoding.lower()) == 'utf_8' + + +# Quote strings used for writing values +squot = "'%s'" +dquot = '"%s"' +noquot = "%s" +wspace_plus = ' \r\n\v\t\'"' +tsquot = '"""%s"""' +tdquot = "'''%s'''" + +# Sentinel for use in getattr calls to replace hasattr +MISSING = object() + +__version__ = '4.7.2' + +try: + any +except NameError: + def any(iterable): + for entry in iterable: + if entry: + return True + return False + + +__all__ = ( + '__version__', + 'DEFAULT_INDENT_TYPE', + 'DEFAULT_INTERPOLATION', + 'ConfigObjError', + 'NestingError', + 'ParseError', + 'DuplicateError', + 'ConfigspecError', + 'ConfigObj', + 'SimpleVal', + 'InterpolationError', + 'InterpolationLoopError', + 'MissingInterpolationOption', + 'RepeatSectionError', + 'ReloadError', + 'UnreprError', + 'UnknownType', + 'flatten_errors', + 'get_extra_values' +) + +DEFAULT_INTERPOLATION = 'configparser' +DEFAULT_INDENT_TYPE = ' ' +MAX_INTERPOL_DEPTH = 10 + +OPTION_DEFAULTS = { + 'interpolation': True, + 'raise_errors': False, + 'list_values': True, + 'create_empty': False, + 'file_error': False, + 'configspec': None, + 'stringify': True, + # option may be set to one of ('', ' ', '\t') + 'indent_type': None, + 'encoding': None, + 'default_encoding': None, + 'unrepr': False, + 'write_empty_values': False, +} + + + +def getObj(s): + global compiler + if compiler is None: + import compiler + s = "a=" + s + p = compiler.parse(s) + return p.getChildren()[1].getChildren()[0].getChildren()[1] + + +class UnknownType(Exception): + pass + + +class Builder(object): + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise UnknownType(o.__class__.__name__) + return m(o) + + def build_List(self, o): + return map(self.build, o.getChildren()) + + def build_Const(self, o): + return o.value + + def build_Dict(self, o): + d = {} + i = iter(map(self.build, o.getChildren())) + for el in i: + d[el] = i.next() + return d + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + if o.name == 'None': + return None + if o.name == 'True': + return True + if o.name == 'False': + return False + + # An undefined Name + raise UnknownType('Undefined Name') + + def build_Add(self, o): + real, imag = map(self.build_Const, o.getChildren()) + try: + real = float(real) + except TypeError: + raise UnknownType('Add') + if not isinstance(imag, complex) or imag.real != 0.0: + raise UnknownType('Add') + return real+imag + + def build_Getattr(self, o): + parent = self.build(o.expr) + return getattr(parent, o.attrname) + + def build_UnarySub(self, o): + return -self.build_Const(o.getChildren()[0]) + + def build_UnaryAdd(self, o): + return self.build_Const(o.getChildren()[0]) + + +_builder = Builder() + + +def unrepr(s): + if not s: + return s + return _builder.build(getObj(s)) + + + +class ConfigObjError(SyntaxError): + """ + This is the base class for all errors that ConfigObj raises. + It is a subclass of SyntaxError. + """ + def __init__(self, message='', line_number=None, line=''): + self.line = line + self.line_number = line_number + SyntaxError.__init__(self, message) + + +class NestingError(ConfigObjError): + """ + This error indicates a level of nesting that doesn't match. + """ + + +class ParseError(ConfigObjError): + """ + This error indicates that a line is badly written. + It is neither a valid ``key = value`` line, + nor a valid section marker line. + """ + + +class ReloadError(IOError): + """ + A 'reload' operation failed. + This exception is a subclass of ``IOError``. + """ + def __init__(self): + IOError.__init__(self, 'reload failed, filename is not set.') + + +class DuplicateError(ConfigObjError): + """ + The keyword or section specified already exists. + """ + + +class ConfigspecError(ConfigObjError): + """ + An error occured whilst parsing a configspec. + """ + + +class InterpolationError(ConfigObjError): + """Base class for the two interpolation errors.""" + + +class InterpolationLoopError(InterpolationError): + """Maximum interpolation depth exceeded in string interpolation.""" + + def __init__(self, option): + InterpolationError.__init__( + self, + 'interpolation loop detected in value "%s".' % option) + + +class RepeatSectionError(ConfigObjError): + """ + This error indicates additional sections in a section with a + ``__many__`` (repeated) section. + """ + + +class MissingInterpolationOption(InterpolationError): + """A value specified for interpolation was missing.""" + def __init__(self, option): + msg = 'missing option "%s" in interpolation.' % option + InterpolationError.__init__(self, msg) + + +class UnreprError(ConfigObjError): + """An error parsing in unrepr mode.""" + + + +class InterpolationEngine(object): + """ + A helper class to help perform string interpolation. + + This class is an abstract base class; its descendants perform + the actual work. + """ + + # compiled regexp to use in self.interpolate() + _KEYCRE = re.compile(r"%\(([^)]*)\)s") + _cookie = '%' + + def __init__(self, section): + # the Section instance that "owns" this engine + self.section = section + + + def interpolate(self, key, value): + # short-cut + if not self._cookie in value: + return value + + def recursive_interpolate(key, value, section, backtrail): + """The function that does the actual work. + + ``value``: the string we're trying to interpolate. + ``section``: the section in which that string was found + ``backtrail``: a dict to keep track of where we've been, + to detect and prevent infinite recursion loops + + This is similar to a depth-first-search algorithm. + """ + # Have we been here already? + if (key, section.name) in backtrail: + # Yes - infinite loop detected + raise InterpolationLoopError(key) + # Place a marker on our backtrail so we won't come back here again + backtrail[(key, section.name)] = 1 + + # Now start the actual work + match = self._KEYCRE.search(value) + while match: + # The actual parsing of the match is implementation-dependent, + # so delegate to our helper function + k, v, s = self._parse_match(match) + if k is None: + # That's the signal that no further interpolation is needed + replacement = v + else: + # Further interpolation may be needed to obtain final value + replacement = recursive_interpolate(k, v, s, backtrail) + # Replace the matched string with its final value + start, end = match.span() + value = ''.join((value[:start], replacement, value[end:])) + new_search_start = start + len(replacement) + # Pick up the next interpolation key, if any, for next time + # through the while loop + match = self._KEYCRE.search(value, new_search_start) + + # Now safe to come back here again; remove marker from backtrail + del backtrail[(key, section.name)] + + return value + + # Back in interpolate(), all we have to do is kick off the recursive + # function with appropriate starting values + value = recursive_interpolate(key, value, self.section, {}) + return value + + + def _fetch(self, key): + """Helper function to fetch values from owning section. + + Returns a 2-tuple: the value, and the section where it was found. + """ + # switch off interpolation before we try and fetch anything ! + save_interp = self.section.main.interpolation + self.section.main.interpolation = False + + # Start at section that "owns" this InterpolationEngine + current_section = self.section + while True: + # try the current section first + val = current_section.get(key) + if val is not None and not isinstance(val, Section): + break + # try "DEFAULT" next + val = current_section.get('DEFAULT', {}).get(key) + if val is not None and not isinstance(val, Section): + break + # move up to parent and try again + # top-level's parent is itself + if current_section.parent is current_section: + # reached top level, time to give up + break + current_section = current_section.parent + + # restore interpolation to previous value before returning + self.section.main.interpolation = save_interp + if val is None: + raise MissingInterpolationOption(key) + return val, current_section + + + def _parse_match(self, match): + """Implementation-dependent helper function. + + Will be passed a match object corresponding to the interpolation + key we just found (e.g., "%(foo)s" or "$foo"). Should look up that + key in the appropriate config file section (using the ``_fetch()`` + helper function) and return a 3-tuple: (key, value, section) + + ``key`` is the name of the key we're looking for + ``value`` is the value found for that key + ``section`` is a reference to the section where it was found + + ``key`` and ``section`` should be None if no further + interpolation should be performed on the resulting value + (e.g., if we interpolated "$$" and returned "$"). + """ + raise NotImplementedError() + + + +class ConfigParserInterpolation(InterpolationEngine): + """Behaves like ConfigParser.""" + _cookie = '%' + _KEYCRE = re.compile(r"%\(([^)]*)\)s") + + def _parse_match(self, match): + key = match.group(1) + value, section = self._fetch(key) + return key, value, section + + + +class TemplateInterpolation(InterpolationEngine): + """Behaves like string.Template.""" + _cookie = '$' + _delimiter = '$' + _KEYCRE = re.compile(r""" + \$(?: + (?P\$) | # Two $ signs + (?P[_a-z][_a-z0-9]*) | # $name format + {(?P[^}]*)} # ${name} format + ) + """, re.IGNORECASE | re.VERBOSE) + + def _parse_match(self, match): + # Valid name (in or out of braces): fetch value from section + key = match.group('named') or match.group('braced') + if key is not None: + value, section = self._fetch(key) + return key, value, section + # Escaped delimiter (e.g., $$): return single delimiter + if match.group('escaped') is not None: + # Return None for key and section to indicate it's time to stop + return None, self._delimiter, None + # Anything else: ignore completely, just return it unchanged + return None, match.group(), None + + +interpolation_engines = { + 'configparser': ConfigParserInterpolation, + 'template': TemplateInterpolation, +} + + +def __newobj__(cls, *args): + # Hack for pickle + return cls.__new__(cls, *args) + +class Section(dict): + """ + A dictionary-like object that represents a section in a config file. + + It does string interpolation if the 'interpolation' attribute + of the 'main' object is set to True. + + Interpolation is tried first from this object, then from the 'DEFAULT' + section of this object, next from the parent and its 'DEFAULT' section, + and so on until the main object is reached. + + A Section will behave like an ordered dictionary - following the + order of the ``scalars`` and ``sections`` attributes. + You can use this to change the order of members. + + Iteration follows the order: scalars, then sections. + """ + + + def __setstate__(self, state): + dict.update(self, state[0]) + self.__dict__.update(state[1]) + + def __reduce__(self): + state = (dict(self), self.__dict__) + return (__newobj__, (self.__class__,), state) + + + def __init__(self, parent, depth, main, indict=None, name=None): + """ + * parent is the section above + * depth is the depth level of this section + * main is the main ConfigObj + * indict is a dictionary to initialise the section with + """ + if indict is None: + indict = {} + dict.__init__(self) + # used for nesting level *and* interpolation + self.parent = parent + # used for the interpolation attribute + self.main = main + # level of nesting depth of this Section + self.depth = depth + # purely for information + self.name = name + # + self._initialise() + # we do this explicitly so that __setitem__ is used properly + # (rather than just passing to ``dict.__init__``) + for entry, value in indict.iteritems(): + self[entry] = value + + + def _initialise(self): + # the sequence of scalar values in this Section + self.scalars = [] + # the sequence of sections in this Section + self.sections = [] + # for comments :-) + self.comments = {} + self.inline_comments = {} + # the configspec + self.configspec = None + # for defaults + self.defaults = [] + self.default_values = {} + self.extra_values = [] + self._created = False + + + def _interpolate(self, key, value): + try: + # do we already have an interpolation engine? + engine = self._interpolation_engine + except AttributeError: + # not yet: first time running _interpolate(), so pick the engine + name = self.main.interpolation + if name == True: # note that "if name:" would be incorrect here + # backwards-compatibility: interpolation=True means use default + name = DEFAULT_INTERPOLATION + name = name.lower() # so that "Template", "template", etc. all work + class_ = interpolation_engines.get(name, None) + if class_ is None: + # invalid value for self.main.interpolation + self.main.interpolation = False + return value + else: + # save reference to engine so we don't have to do this again + engine = self._interpolation_engine = class_(self) + # let the engine do the actual work + return engine.interpolate(key, value) + + + def __getitem__(self, key): + """Fetch the item and do string interpolation.""" + val = dict.__getitem__(self, key) + if self.main.interpolation: + if isinstance(val, basestring): + return self._interpolate(key, val) + if isinstance(val, list): + def _check(entry): + if isinstance(entry, basestring): + return self._interpolate(key, entry) + return entry + new = [_check(entry) for entry in val] + if new != val: + return new + return val + + + def __setitem__(self, key, value, unrepr=False): + """ + Correctly set a value. + + Making dictionary values Section instances. + (We have to special case 'Section' instances - which are also dicts) + + Keys must be strings. + Values need only be strings (or lists of strings) if + ``main.stringify`` is set. + + ``unrepr`` must be set when setting a value to a dictionary, without + creating a new sub-section. + """ + if not isinstance(key, basestring): + raise ValueError('The key "%s" is not a string.' % key) + + # add the comment + if key not in self.comments: + self.comments[key] = [] + self.inline_comments[key] = '' + # remove the entry from defaults + if key in self.defaults: + self.defaults.remove(key) + # + if isinstance(value, Section): + if key not in self: + self.sections.append(key) + dict.__setitem__(self, key, value) + elif isinstance(value, dict) and not unrepr: + # First create the new depth level, + # then create the section + if key not in self: + self.sections.append(key) + new_depth = self.depth + 1 + dict.__setitem__( + self, + key, + Section( + self, + new_depth, + self.main, + indict=value, + name=key)) + else: + if key not in self: + self.scalars.append(key) + if not self.main.stringify: + if isinstance(value, basestring): + pass + elif isinstance(value, (list, tuple)): + for entry in value: + if not isinstance(entry, basestring): + raise TypeError('Value is not a string "%s".' % entry) + else: + raise TypeError('Value is not a string "%s".' % value) + dict.__setitem__(self, key, value) + + + def __delitem__(self, key): + """Remove items from the sequence when deleting.""" + dict. __delitem__(self, key) + if key in self.scalars: + self.scalars.remove(key) + else: + self.sections.remove(key) + del self.comments[key] + del self.inline_comments[key] + + + def get(self, key, default=None): + """A version of ``get`` that doesn't bypass string interpolation.""" + try: + return self[key] + except KeyError: + return default + + + def update(self, indict): + """ + A version of update that uses our ``__setitem__``. + """ + for entry in indict: + self[entry] = indict[entry] + + + def pop(self, key, default=MISSING): + """ + 'D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised' + """ + try: + val = self[key] + except KeyError: + if default is MISSING: + raise + val = default + else: + del self[key] + return val + + + def popitem(self): + """Pops the first (key,val)""" + sequence = (self.scalars + self.sections) + if not sequence: + raise KeyError(": 'popitem(): dictionary is empty'") + key = sequence[0] + val = self[key] + del self[key] + return key, val + + + def clear(self): + """ + A version of clear that also affects scalars/sections + Also clears comments and configspec. + + Leaves other attributes alone : + depth/main/parent are not affected + """ + dict.clear(self) + self.scalars = [] + self.sections = [] + self.comments = {} + self.inline_comments = {} + self.configspec = None + self.defaults = [] + self.extra_values = [] + + + def setdefault(self, key, default=None): + """A version of setdefault that sets sequence if appropriate.""" + try: + return self[key] + except KeyError: + self[key] = default + return self[key] + + + def items(self): + """D.items() -> list of D's (key, value) pairs, as 2-tuples""" + return zip((self.scalars + self.sections), self.values()) + + + def keys(self): + """D.keys() -> list of D's keys""" + return (self.scalars + self.sections) + + + def values(self): + """D.values() -> list of D's values""" + return [self[key] for key in (self.scalars + self.sections)] + + + def iteritems(self): + """D.iteritems() -> an iterator over the (key, value) items of D""" + return iter(self.items()) + + + def iterkeys(self): + """D.iterkeys() -> an iterator over the keys of D""" + return iter((self.scalars + self.sections)) + + __iter__ = iterkeys + + + def itervalues(self): + """D.itervalues() -> an iterator over the values of D""" + return iter(self.values()) + + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + def _getval(key): + try: + return self[key] + except MissingInterpolationOption: + return dict.__getitem__(self, key) + return '{%s}' % ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) + for key in (self.scalars + self.sections)]) + + __str__ = __repr__ + __str__.__doc__ = "x.__str__() <==> str(x)" + + + # Extra methods - not in a normal dictionary + + def dict(self): + """ + Return a deepcopy of self as a dictionary. + + All members that are ``Section`` instances are recursively turned to + ordinary dictionaries - by calling their ``dict`` method. + + >>> n = a.dict() + >>> n == a + 1 + >>> n is a + 0 + """ + newdict = {} + for entry in self: + this_entry = self[entry] + if isinstance(this_entry, Section): + this_entry = this_entry.dict() + elif isinstance(this_entry, list): + # create a copy rather than a reference + this_entry = list(this_entry) + elif isinstance(this_entry, tuple): + # create a copy rather than a reference + this_entry = tuple(this_entry) + newdict[entry] = this_entry + return newdict + + + def merge(self, indict): + """ + A recursive update - useful for merging config files. + + >>> a = '''[section1] + ... option1 = True + ... [[subsection]] + ... more_options = False + ... # end of file'''.splitlines() + >>> b = '''# File is user.ini + ... [section1] + ... option1 = False + ... # end of file'''.splitlines() + >>> c1 = ConfigObj(b) + >>> c2 = ConfigObj(a) + >>> c2.merge(c1) + >>> c2 + ConfigObj({'section1': {'option1': 'False', 'subsection': {'more_options': 'False'}}}) + """ + for key, val in indict.items(): + if (key in self and isinstance(self[key], dict) and + isinstance(val, dict)): + self[key].merge(val) + else: + self[key] = val + + + def rename(self, oldkey, newkey): + """ + Change a keyname to another, without changing position in sequence. + + Implemented so that transformations can be made on keys, + as well as on values. (used by encode and decode) + + Also renames comments. + """ + if oldkey in self.scalars: + the_list = self.scalars + elif oldkey in self.sections: + the_list = self.sections + else: + raise KeyError('Key "%s" not found.' % oldkey) + pos = the_list.index(oldkey) + # + val = self[oldkey] + dict.__delitem__(self, oldkey) + dict.__setitem__(self, newkey, val) + the_list.remove(oldkey) + the_list.insert(pos, newkey) + comm = self.comments[oldkey] + inline_comment = self.inline_comments[oldkey] + del self.comments[oldkey] + del self.inline_comments[oldkey] + self.comments[newkey] = comm + self.inline_comments[newkey] = inline_comment + + + def walk(self, function, raise_errors=True, + call_on_sections=False, **keywargs): + """ + Walk every member and call a function on the keyword and value. + + Return a dictionary of the return values + + If the function raises an exception, raise the errror + unless ``raise_errors=False``, in which case set the return value to + ``False``. + + Any unrecognised keyword arguments you pass to walk, will be pased on + to the function you pass in. + + Note: if ``call_on_sections`` is ``True`` then - on encountering a + subsection, *first* the function is called for the *whole* subsection, + and then recurses into it's members. This means your function must be + able to handle strings, dictionaries and lists. This allows you + to change the key of subsections as well as for ordinary members. The + return value when called on the whole subsection has to be discarded. + + See the encode and decode methods for examples, including functions. + + .. admonition:: caution + + You can use ``walk`` to transform the names of members of a section + but you mustn't add or delete members. + + >>> config = '''[XXXXsection] + ... XXXXkey = XXXXvalue'''.splitlines() + >>> cfg = ConfigObj(config) + >>> cfg + ConfigObj({'XXXXsection': {'XXXXkey': 'XXXXvalue'}}) + >>> def transform(section, key): + ... val = section[key] + ... newkey = key.replace('XXXX', 'CLIENT1') + ... section.rename(key, newkey) + ... if isinstance(val, (tuple, list, dict)): + ... pass + ... else: + ... val = val.replace('XXXX', 'CLIENT1') + ... section[newkey] = val + >>> cfg.walk(transform, call_on_sections=True) + {'CLIENT1section': {'CLIENT1key': None}} + >>> cfg + ConfigObj({'CLIENT1section': {'CLIENT1key': 'CLIENT1value'}}) + """ + out = {} + # scalars first + for i in range(len(self.scalars)): + entry = self.scalars[i] + try: + val = function(self, entry, **keywargs) + # bound again in case name has changed + entry = self.scalars[i] + out[entry] = val + except Exception: + if raise_errors: + raise + else: + entry = self.scalars[i] + out[entry] = False + # then sections + for i in range(len(self.sections)): + entry = self.sections[i] + if call_on_sections: + try: + function(self, entry, **keywargs) + except Exception: + if raise_errors: + raise + else: + entry = self.sections[i] + out[entry] = False + # bound again in case name has changed + entry = self.sections[i] + # previous result is discarded + out[entry] = self[entry].walk( + function, + raise_errors=raise_errors, + call_on_sections=call_on_sections, + **keywargs) + return out + + + def as_bool(self, key): + """ + Accepts a key as input. The corresponding value must be a string or + the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to + retain compatibility with Python 2.2. + + If the string is one of ``True``, ``On``, ``Yes``, or ``1`` it returns + ``True``. + + If the string is one of ``False``, ``Off``, ``No``, or ``0`` it returns + ``False``. + + ``as_bool`` is not case sensitive. + + Any other input will raise a ``ValueError``. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_bool('a') + Traceback (most recent call last): + ValueError: Value "fish" is neither True nor False + >>> a['b'] = 'True' + >>> a.as_bool('b') + 1 + >>> a['b'] = 'off' + >>> a.as_bool('b') + 0 + """ + val = self[key] + if val == True: + return True + elif val == False: + return False + else: + try: + if not isinstance(val, basestring): + # TODO: Why do we raise a KeyError here? + raise KeyError() + else: + return self.main._bools[val.lower()] + except KeyError: + raise ValueError('Value "%s" is neither True nor False' % val) + + + def as_int(self, key): + """ + A convenience method which coerces the specified value to an integer. + + If the value is an invalid literal for ``int``, a ``ValueError`` will + be raised. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_int('a') + Traceback (most recent call last): + ValueError: invalid literal for int() with base 10: 'fish' + >>> a['b'] = '1' + >>> a.as_int('b') + 1 + >>> a['b'] = '3.2' + >>> a.as_int('b') + Traceback (most recent call last): + ValueError: invalid literal for int() with base 10: '3.2' + """ + return int(self[key]) + + + def as_float(self, key): + """ + A convenience method which coerces the specified value to a float. + + If the value is an invalid literal for ``float``, a ``ValueError`` will + be raised. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_float('a') + Traceback (most recent call last): + ValueError: invalid literal for float(): fish + >>> a['b'] = '1' + >>> a.as_float('b') + 1.0 + >>> a['b'] = '3.2' + >>> a.as_float('b') + 3.2000000000000002 + """ + return float(self[key]) + + + def as_list(self, key): + """ + A convenience method which fetches the specified value, guaranteeing + that it is a list. + + >>> a = ConfigObj() + >>> a['a'] = 1 + >>> a.as_list('a') + [1] + >>> a['a'] = (1,) + >>> a.as_list('a') + [1] + >>> a['a'] = [1] + >>> a.as_list('a') + [1] + """ + result = self[key] + if isinstance(result, (tuple, list)): + return list(result) + return [result] + + + def restore_default(self, key): + """ + Restore (and return) default value for the specified key. + + This method will only work for a ConfigObj that was created + with a configspec and has been validated. + + If there is no default value for this key, ``KeyError`` is raised. + """ + default = self.default_values[key] + dict.__setitem__(self, key, default) + if key not in self.defaults: + self.defaults.append(key) + return default + + + def restore_defaults(self): + """ + Recursively restore default values to all members + that have them. + + This method will only work for a ConfigObj that was created + with a configspec and has been validated. + + It doesn't delete or modify entries without default values. + """ + for key in self.default_values: + self.restore_default(key) + + for section in self.sections: + self[section].restore_defaults() + + +class ConfigObj(Section): + """An object to read, create, and write config files.""" + + _keyword = re.compile(r'''^ # line start + (\s*) # indentation + ( # keyword + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'"=].*?) # no quotes + ) + \s*=\s* # divider + (.*) # value (including list values and comments) + $ # line end + ''', + re.VERBOSE) + + _sectionmarker = re.compile(r'''^ + (\s*) # 1: indentation + ((?:\[\s*)+) # 2: section marker open + ( # 3: section name open + (?:"\s*\S.*?\s*")| # at least one non-space with double quotes + (?:'\s*\S.*?\s*')| # at least one non-space with single quotes + (?:[^'"\s].*?) # at least one non-space unquoted + ) # section name close + ((?:\s*\])+) # 4: section marker close + \s*(\#.*)? # 5: optional comment + $''', + re.VERBOSE) + + # this regexp pulls list values out as a single string + # or single values and comments + # FIXME: this regex adds a '' to the end of comma terminated lists + # workaround in ``_handle_value`` + _valueexp = re.compile(r'''^ + (?: + (?: + ( + (?: + (?: + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'",\#][^,\#]*?) # unquoted + ) + \s*,\s* # comma + )* # match all list items ending in a comma (if any) + ) + ( + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'",\#\s][^,]*?)| # unquoted + (?:(? 1: + msg = "Parsing failed with several errors.\nFirst error %s" % info + error = ConfigObjError(msg) + else: + error = self._errors[0] + # set the errors attribute; it's a list of tuples: + # (error_type, message, line_number) + error.errors = self._errors + # set the config attribute + error.config = self + raise error + # delete private attributes + del self._errors + + if configspec is None: + self.configspec = None + else: + self._handle_configspec(configspec) + + + def _initialise(self, options=None): + if options is None: + options = OPTION_DEFAULTS + + # initialise a few variables + self.filename = None + self._errors = [] + self.raise_errors = options['raise_errors'] + self.interpolation = options['interpolation'] + self.list_values = options['list_values'] + self.create_empty = options['create_empty'] + self.file_error = options['file_error'] + self.stringify = options['stringify'] + self.indent_type = options['indent_type'] + self.encoding = options['encoding'] + self.default_encoding = options['default_encoding'] + self.BOM = False + self.newlines = None + self.write_empty_values = options['write_empty_values'] + self.unrepr = options['unrepr'] + + self.initial_comment = [] + self.final_comment = [] + self.configspec = None + + if self._inspec: + self.list_values = False + + # Clear section attributes as well + Section._initialise(self) + + + def __repr__(self): + def _getval(key): + try: + return self[key] + except MissingInterpolationOption: + return dict.__getitem__(self, key) + return ('ConfigObj({%s})' % + ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) + for key in (self.scalars + self.sections)])) + + + def _handle_bom(self, infile): + """ + Handle any BOM, and decode if necessary. + + If an encoding is specified, that *must* be used - but the BOM should + still be removed (and the BOM attribute set). + + (If the encoding is wrongly specified, then a BOM for an alternative + encoding won't be discovered or removed.) + + If an encoding is not specified, UTF8 or UTF16 BOM will be detected and + removed. The BOM attribute will be set. UTF16 will be decoded to + unicode. + + NOTE: This method must not be called with an empty ``infile``. + + Specifying the *wrong* encoding is likely to cause a + ``UnicodeDecodeError``. + + ``infile`` must always be returned as a list of lines, but may be + passed in as a single string. + """ + if ((self.encoding is not None) and + (self.encoding.lower() not in BOM_LIST)): + # No need to check for a BOM + # the encoding specified doesn't have one + # just decode + return self._decode(infile, self.encoding) + + if isinstance(infile, (list, tuple)): + line = infile[0] + else: + line = infile + if self.encoding is not None: + # encoding explicitly supplied + # And it could have an associated BOM + # TODO: if encoding is just UTF16 - we ought to check for both + # TODO: big endian and little endian versions. + enc = BOM_LIST[self.encoding.lower()] + if enc == 'utf_16': + # For UTF16 we try big endian and little endian + for BOM, (encoding, final_encoding) in BOMS.items(): + if not final_encoding: + # skip UTF8 + continue + if infile.startswith(BOM): + ### BOM discovered + ##self.BOM = True + # Don't need to remove BOM + return self._decode(infile, encoding) + + # If we get this far, will *probably* raise a DecodeError + # As it doesn't appear to start with a BOM + return self._decode(infile, self.encoding) + + # Must be UTF8 + BOM = BOM_SET[enc] + if not line.startswith(BOM): + return self._decode(infile, self.encoding) + + newline = line[len(BOM):] + + # BOM removed + if isinstance(infile, (list, tuple)): + infile[0] = newline + else: + infile = newline + self.BOM = True + return self._decode(infile, self.encoding) + + # No encoding specified - so we need to check for UTF8/UTF16 + for BOM, (encoding, final_encoding) in BOMS.items(): + if not line.startswith(BOM): + continue + else: + # BOM discovered + self.encoding = final_encoding + if not final_encoding: + self.BOM = True + # UTF8 + # remove BOM + newline = line[len(BOM):] + if isinstance(infile, (list, tuple)): + infile[0] = newline + else: + infile = newline + # UTF8 - don't decode + if isinstance(infile, basestring): + return infile.splitlines(True) + else: + return infile + # UTF16 - have to decode + return self._decode(infile, encoding) + + # No BOM discovered and no encoding specified, just return + if isinstance(infile, basestring): + # infile read from a file will be a single string + return infile.splitlines(True) + return infile + + + def _a_to_u(self, aString): + """Decode ASCII strings to unicode if a self.encoding is specified.""" + if self.encoding: + return aString.decode('ascii') + else: + return aString + + + def _decode(self, infile, encoding): + """ + Decode infile to unicode. Using the specified encoding. + + if is a string, it also needs converting to a list. + """ + if isinstance(infile, basestring): + # can't be unicode + # NOTE: Could raise a ``UnicodeDecodeError`` + return infile.decode(encoding).splitlines(True) + for i, line in enumerate(infile): + if not isinstance(line, unicode): + # NOTE: The isinstance test here handles mixed lists of unicode/string + # NOTE: But the decode will break on any non-string values + # NOTE: Or could raise a ``UnicodeDecodeError`` + infile[i] = line.decode(encoding) + return infile + + + def _decode_element(self, line): + """Decode element to unicode if necessary.""" + if not self.encoding: + return line + if isinstance(line, str) and self.default_encoding: + return line.decode(self.default_encoding) + return line + + + def _str(self, value): + """ + Used by ``stringify`` within validate, to turn non-string values + into strings. + """ + if not isinstance(value, basestring): + return str(value) + else: + return value + + + def _parse(self, infile): + """Actually parse the config file.""" + temp_list_values = self.list_values + if self.unrepr: + self.list_values = False + + comment_list = [] + done_start = False + this_section = self + maxline = len(infile) - 1 + cur_index = -1 + reset_comment = False + + while cur_index < maxline: + if reset_comment: + comment_list = [] + cur_index += 1 + line = infile[cur_index] + sline = line.strip() + # do we have anything on the line ? + if not sline or sline.startswith('#'): + reset_comment = False + comment_list.append(line) + continue + + if not done_start: + # preserve initial comment + self.initial_comment = comment_list + comment_list = [] + done_start = True + + reset_comment = True + # first we check if it's a section marker + mat = self._sectionmarker.match(line) + if mat is not None: + # is a section line + (indent, sect_open, sect_name, sect_close, comment) = mat.groups() + if indent and (self.indent_type is None): + self.indent_type = indent + cur_depth = sect_open.count('[') + if cur_depth != sect_close.count(']'): + self._handle_error("Cannot compute the section depth at line %s.", + NestingError, infile, cur_index) + continue + + if cur_depth < this_section.depth: + # the new section is dropping back to a previous level + try: + parent = self._match_depth(this_section, + cur_depth).parent + except SyntaxError: + self._handle_error("Cannot compute nesting level at line %s.", + NestingError, infile, cur_index) + continue + elif cur_depth == this_section.depth: + # the new section is a sibling of the current section + parent = this_section.parent + elif cur_depth == this_section.depth + 1: + # the new section is a child the current section + parent = this_section + else: + self._handle_error("Section too nested at line %s.", + NestingError, infile, cur_index) + + sect_name = self._unquote(sect_name) + if sect_name in parent: + self._handle_error('Duplicate section name at line %s.', + DuplicateError, infile, cur_index) + continue + + # create the new section + this_section = Section( + parent, + cur_depth, + self, + name=sect_name) + parent[sect_name] = this_section + parent.inline_comments[sect_name] = comment + parent.comments[sect_name] = comment_list + continue + # + # it's not a section marker, + # so it should be a valid ``key = value`` line + mat = self._keyword.match(line) + if mat is None: + # it neither matched as a keyword + # or a section marker + self._handle_error( + 'Invalid line at line "%s".', + ParseError, infile, cur_index) + else: + # is a keyword value + # value will include any inline comment + (indent, key, value) = mat.groups() + if indent and (self.indent_type is None): + self.indent_type = indent + # check for a multiline value + if value[:3] in ['"""', "'''"]: + try: + value, comment, cur_index = self._multiline( + value, infile, cur_index, maxline) + except SyntaxError: + self._handle_error( + 'Parse error in value at line %s.', + ParseError, infile, cur_index) + continue + else: + if self.unrepr: + comment = '' + try: + value = unrepr(value) + except Exception, e: + if type(e) == UnknownType: + msg = 'Unknown name or type in value at line %s.' + else: + msg = 'Parse error in value at line %s.' + self._handle_error(msg, UnreprError, infile, + cur_index) + continue + else: + if self.unrepr: + comment = '' + try: + value = unrepr(value) + except Exception, e: + if isinstance(e, UnknownType): + msg = 'Unknown name or type in value at line %s.' + else: + msg = 'Parse error in value at line %s.' + self._handle_error(msg, UnreprError, infile, + cur_index) + continue + else: + # extract comment and lists + try: + (value, comment) = self._handle_value(value) + except SyntaxError: + self._handle_error( + 'Parse error in value at line %s.', + ParseError, infile, cur_index) + continue + # + key = self._unquote(key) + if key in this_section: + self._handle_error( + 'Duplicate keyword name at line %s.', + DuplicateError, infile, cur_index) + continue + # add the key. + # we set unrepr because if we have got this far we will never + # be creating a new section + this_section.__setitem__(key, value, unrepr=True) + this_section.inline_comments[key] = comment + this_section.comments[key] = comment_list + continue + # + if self.indent_type is None: + # no indentation used, set the type accordingly + self.indent_type = '' + + # preserve the final comment + if not self and not self.initial_comment: + self.initial_comment = comment_list + elif not reset_comment: + self.final_comment = comment_list + self.list_values = temp_list_values + + + def _match_depth(self, sect, depth): + """ + Given a section and a depth level, walk back through the sections + parents to see if the depth level matches a previous section. + + Return a reference to the right section, + or raise a SyntaxError. + """ + while depth < sect.depth: + if sect is sect.parent: + # we've reached the top level already + raise SyntaxError() + sect = sect.parent + if sect.depth == depth: + return sect + # shouldn't get here + raise SyntaxError() + + + def _handle_error(self, text, ErrorClass, infile, cur_index): + """ + Handle an error according to the error settings. + + Either raise the error or store it. + The error will have occured at ``cur_index`` + """ + line = infile[cur_index] + cur_index += 1 + message = text % cur_index + error = ErrorClass(message, cur_index, line) + if self.raise_errors: + # raise the error - parsing stops here + raise error + # store the error + # reraise when parsing has finished + self._errors.append(error) + + + def _unquote(self, value): + """Return an unquoted version of a value""" + if not value: + # should only happen during parsing of lists + raise SyntaxError + if (value[0] == value[-1]) and (value[0] in ('"', "'")): + value = value[1:-1] + return value + + + def _quote(self, value, multiline=True): + """ + Return a safely quoted version of a value. + + Raise a ConfigObjError if the value cannot be safely quoted. + If multiline is ``True`` (default) then use triple quotes + if necessary. + + * Don't quote values that don't need it. + * Recursively quote members of a list and return a comma joined list. + * Multiline is ``False`` for lists. + * Obey list syntax for empty and single member lists. + + If ``list_values=False`` then the value is only quoted if it contains + a ``\\n`` (is multiline) or '#'. + + If ``write_empty_values`` is set, and the value is an empty string, it + won't be quoted. + """ + if multiline and self.write_empty_values and value == '': + # Only if multiline is set, so that it is used for values not + # keys, and not values that are part of a list + return '' + + if multiline and isinstance(value, (list, tuple)): + if not value: + return ',' + elif len(value) == 1: + return self._quote(value[0], multiline=False) + ',' + return ', '.join([self._quote(val, multiline=False) + for val in value]) + if not isinstance(value, basestring): + if self.stringify: + value = str(value) + else: + raise TypeError('Value "%s" is not a string.' % value) + + if not value: + return '""' + + no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value + need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value )) + hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value) + check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote + + if check_for_single: + if not self.list_values: + # we don't quote if ``list_values=False`` + quot = noquot + # for normal values either single or double quotes will do + elif '\n' in value: + # will only happen if multiline is off - e.g. '\n' in key + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + elif ((value[0] not in wspace_plus) and + (value[-1] not in wspace_plus) and + (',' not in value)): + quot = noquot + else: + quot = self._get_single_quote(value) + else: + # if value has '\n' or "'" *and* '"', it will need triple quotes + quot = self._get_triple_quote(value) + + if quot == noquot and '#' in value and self.list_values: + quot = self._get_single_quote(value) + + return quot % value + + + def _get_single_quote(self, value): + if ("'" in value) and ('"' in value): + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + elif '"' in value: + quot = squot + else: + quot = dquot + return quot + + + def _get_triple_quote(self, value): + if (value.find('"""') != -1) and (value.find("'''") != -1): + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + if value.find('"""') == -1: + quot = tdquot + else: + quot = tsquot + return quot + + + def _handle_value(self, value): + """ + Given a value string, unquote, remove comment, + handle lists. (including empty and single member lists) + """ + if self._inspec: + # Parsing a configspec so don't handle comments + return (value, '') + # do we look for lists in values ? + if not self.list_values: + mat = self._nolistvalue.match(value) + if mat is None: + raise SyntaxError() + # NOTE: we don't unquote here + return mat.groups() + # + mat = self._valueexp.match(value) + if mat is None: + # the value is badly constructed, probably badly quoted, + # or an invalid list + raise SyntaxError() + (list_values, single, empty_list, comment) = mat.groups() + if (list_values == '') and (single is None): + # change this if you want to accept empty values + raise SyntaxError() + # NOTE: note there is no error handling from here if the regex + # is wrong: then incorrect values will slip through + if empty_list is not None: + # the single comma - meaning an empty list + return ([], comment) + if single is not None: + # handle empty values + if list_values and not single: + # FIXME: the '' is a workaround because our regex now matches + # '' at the end of a list if it has a trailing comma + single = None + else: + single = single or '""' + single = self._unquote(single) + if list_values == '': + # not a list value + return (single, comment) + the_list = self._listvalueexp.findall(list_values) + the_list = [self._unquote(val) for val in the_list] + if single is not None: + the_list += [single] + return (the_list, comment) + + + def _multiline(self, value, infile, cur_index, maxline): + """Extract the value, where we are in a multiline situation.""" + quot = value[:3] + newvalue = value[3:] + single_line = self._triple_quote[quot][0] + multi_line = self._triple_quote[quot][1] + mat = single_line.match(value) + if mat is not None: + retval = list(mat.groups()) + retval.append(cur_index) + return retval + elif newvalue.find(quot) != -1: + # somehow the triple quote is missing + raise SyntaxError() + # + while cur_index < maxline: + cur_index += 1 + newvalue += '\n' + line = infile[cur_index] + if line.find(quot) == -1: + newvalue += line + else: + # end of multiline, process it + break + else: + # we've got to the end of the config, oops... + raise SyntaxError() + mat = multi_line.match(line) + if mat is None: + # a badly formed line + raise SyntaxError() + (value, comment) = mat.groups() + return (newvalue + value, comment, cur_index) + + + def _handle_configspec(self, configspec): + """Parse the configspec.""" + # FIXME: Should we check that the configspec was created with the + # correct settings ? (i.e. ``list_values=False``) + if not isinstance(configspec, ConfigObj): + try: + configspec = ConfigObj(configspec, + raise_errors=True, + file_error=True, + _inspec=True) + except ConfigObjError, e: + # FIXME: Should these errors have a reference + # to the already parsed ConfigObj ? + raise ConfigspecError('Parsing configspec failed: %s' % e) + except IOError, e: + raise IOError('Reading configspec failed: %s' % e) + + self.configspec = configspec + + + + def _set_configspec(self, section, copy): + """ + Called by validate. Handles setting the configspec on subsections + including sections to be validated by __many__ + """ + configspec = section.configspec + many = configspec.get('__many__') + if isinstance(many, dict): + for entry in section.sections: + if entry not in configspec: + section[entry].configspec = many + + for entry in configspec.sections: + if entry == '__many__': + continue + if entry not in section: + section[entry] = {} + section[entry]._created = True + if copy: + # copy comments + section.comments[entry] = configspec.comments.get(entry, []) + section.inline_comments[entry] = configspec.inline_comments.get(entry, '') + + # Could be a scalar when we expect a section + if isinstance(section[entry], Section): + section[entry].configspec = configspec[entry] + + + def _write_line(self, indent_string, entry, this_entry, comment): + """Write an individual line, for the write method""" + # NOTE: the calls to self._quote here handles non-StringType values. + if not self.unrepr: + val = self._decode_element(self._quote(this_entry)) + else: + val = repr(this_entry) + return '%s%s%s%s%s' % (indent_string, + self._decode_element(self._quote(entry, multiline=False)), + self._a_to_u(' = '), + val, + self._decode_element(comment)) + + + def _write_marker(self, indent_string, depth, entry, comment): + """Write a section marker line""" + return '%s%s%s%s%s' % (indent_string, + self._a_to_u('[' * depth), + self._quote(self._decode_element(entry), multiline=False), + self._a_to_u(']' * depth), + self._decode_element(comment)) + + + def _handle_comment(self, comment): + """Deal with a comment.""" + if not comment: + return '' + start = self.indent_type + if not comment.startswith('#'): + start += self._a_to_u(' # ') + return (start + comment) + + + # Public methods + + def write(self, outfile=None, section=None): + """ + Write the current ConfigObj as a file + + tekNico: FIXME: use StringIO instead of real files + + >>> filename = a.filename + >>> a.filename = 'test.ini' + >>> a.write() + >>> a.filename = filename + >>> a == ConfigObj('test.ini', raise_errors=True) + 1 + >>> import os + >>> os.remove('test.ini') + """ + if self.indent_type is None: + # this can be true if initialised from a dictionary + self.indent_type = DEFAULT_INDENT_TYPE + + out = [] + cs = self._a_to_u('#') + csp = self._a_to_u('# ') + if section is None: + int_val = self.interpolation + self.interpolation = False + section = self + for line in self.initial_comment: + line = self._decode_element(line) + stripped_line = line.strip() + if stripped_line and not stripped_line.startswith(cs): + line = csp + line + out.append(line) + + indent_string = self.indent_type * section.depth + for entry in (section.scalars + section.sections): + if entry in section.defaults: + # don't write out default values + continue + for comment_line in section.comments[entry]: + comment_line = self._decode_element(comment_line.lstrip()) + if comment_line and not comment_line.startswith(cs): + comment_line = csp + comment_line + out.append(indent_string + comment_line) + this_entry = section[entry] + comment = self._handle_comment(section.inline_comments[entry]) + + if isinstance(this_entry, dict): + # a section + out.append(self._write_marker( + indent_string, + this_entry.depth, + entry, + comment)) + out.extend(self.write(section=this_entry)) + else: + out.append(self._write_line( + indent_string, + entry, + this_entry, + comment)) + + if section is self: + for line in self.final_comment: + line = self._decode_element(line) + stripped_line = line.strip() + if stripped_line and not stripped_line.startswith(cs): + line = csp + line + out.append(line) + self.interpolation = int_val + + if section is not self: + return out + + if (self.filename is None) and (outfile is None): + # output a list of lines + # might need to encode + # NOTE: This will *screw* UTF16, each line will start with the BOM + if self.encoding: + out = [l.encode(self.encoding) for l in out] + if (self.BOM and ((self.encoding is None) or + (BOM_LIST.get(self.encoding.lower()) == 'utf_8'))): + # Add the UTF8 BOM + if not out: + out.append('') + out[0] = BOM_UTF8 + out[0] + return out + + # Turn the list to a string, joined with correct newlines + newline = self.newlines or os.linesep + if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w' + and sys.platform == 'win32' and newline == '\r\n'): + # Windows specific hack to avoid writing '\r\r\n' + newline = '\n' + output = self._a_to_u(newline).join(out) + if self.encoding: + output = output.encode(self.encoding) + if self.BOM and ((self.encoding is None) or match_utf8(self.encoding)): + # Add the UTF8 BOM + output = BOM_UTF8 + output + + if not output.endswith(newline): + output += newline + if outfile is not None: + outfile.write(output) + else: + h = open(self.filename, 'wb') + h.write(output) + h.close() + + + def validate(self, validator, preserve_errors=False, copy=False, + section=None): + """ + Test the ConfigObj against a configspec. + + It uses the ``validator`` object from *validate.py*. + + To run ``validate`` on the current ConfigObj, call: :: + + test = config.validate(validator) + + (Normally having previously passed in the configspec when the ConfigObj + was created - you can dynamically assign a dictionary of checks to the + ``configspec`` attribute of a section though). + + It returns ``True`` if everything passes, or a dictionary of + pass/fails (True/False). If every member of a subsection passes, it + will just have the value ``True``. (It also returns ``False`` if all + members fail). + + In addition, it converts the values from strings to their native + types if their checks pass (and ``stringify`` is set). + + If ``preserve_errors`` is ``True`` (``False`` is default) then instead + of a marking a fail with a ``False``, it will preserve the actual + exception object. This can contain info about the reason for failure. + For example the ``VdtValueTooSmallError`` indicates that the value + supplied was too small. If a value (or section) is missing it will + still be marked as ``False``. + + You must have the validate module to use ``preserve_errors=True``. + + You can then use the ``flatten_errors`` function to turn your nested + results dictionary into a flattened list of failures - useful for + displaying meaningful error messages. + """ + if section is None: + if self.configspec is None: + raise ValueError('No configspec supplied.') + if preserve_errors: + # We do this once to remove a top level dependency on the validate module + # Which makes importing configobj faster + from validate import VdtMissingValue + self._vdtMissingValue = VdtMissingValue + + section = self + + if copy: + section.initial_comment = section.configspec.initial_comment + section.final_comment = section.configspec.final_comment + section.encoding = section.configspec.encoding + section.BOM = section.configspec.BOM + section.newlines = section.configspec.newlines + section.indent_type = section.configspec.indent_type + + # + # section.default_values.clear() #?? + configspec = section.configspec + self._set_configspec(section, copy) + + + def validate_entry(entry, spec, val, missing, ret_true, ret_false): + section.default_values.pop(entry, None) + + try: + section.default_values[entry] = validator.get_default_value(configspec[entry]) + except (KeyError, AttributeError, validator.baseErrorClass): + # No default, bad default or validator has no 'get_default_value' + # (e.g. SimpleVal) + pass + + try: + check = validator.check(spec, + val, + missing=missing + ) + except validator.baseErrorClass, e: + if not preserve_errors or isinstance(e, self._vdtMissingValue): + out[entry] = False + else: + # preserve the error + out[entry] = e + ret_false = False + ret_true = False + else: + ret_false = False + out[entry] = True + if self.stringify or missing: + # if we are doing type conversion + # or the value is a supplied default + if not self.stringify: + if isinstance(check, (list, tuple)): + # preserve lists + check = [self._str(item) for item in check] + elif missing and check is None: + # convert the None from a default to a '' + check = '' + else: + check = self._str(check) + if (check != val) or missing: + section[entry] = check + if not copy and missing and entry not in section.defaults: + section.defaults.append(entry) + return ret_true, ret_false + + # + out = {} + ret_true = True + ret_false = True + + unvalidated = [k for k in section.scalars if k not in configspec] + incorrect_sections = [k for k in configspec.sections if k in section.scalars] + incorrect_scalars = [k for k in configspec.scalars if k in section.sections] + + for entry in configspec.scalars: + if entry in ('__many__', '___many___'): + # reserved names + continue + if (not entry in section.scalars) or (entry in section.defaults): + # missing entries + # or entries from defaults + missing = True + val = None + if copy and entry not in section.scalars: + # copy comments + section.comments[entry] = ( + configspec.comments.get(entry, [])) + section.inline_comments[entry] = ( + configspec.inline_comments.get(entry, '')) + # + else: + missing = False + val = section[entry] + + ret_true, ret_false = validate_entry(entry, configspec[entry], val, + missing, ret_true, ret_false) + + many = None + if '__many__' in configspec.scalars: + many = configspec['__many__'] + elif '___many___' in configspec.scalars: + many = configspec['___many___'] + + if many is not None: + for entry in unvalidated: + val = section[entry] + ret_true, ret_false = validate_entry(entry, many, val, False, + ret_true, ret_false) + unvalidated = [] + + for entry in incorrect_scalars: + ret_true = False + if not preserve_errors: + out[entry] = False + else: + ret_false = False + msg = 'Value %r was provided as a section' % entry + out[entry] = validator.baseErrorClass(msg) + for entry in incorrect_sections: + ret_true = False + if not preserve_errors: + out[entry] = False + else: + ret_false = False + msg = 'Section %r was provided as a single value' % entry + out[entry] = validator.baseErrorClass(msg) + + # Missing sections will have been created as empty ones when the + # configspec was read. + for entry in section.sections: + # FIXME: this means DEFAULT is not copied in copy mode + if section is self and entry == 'DEFAULT': + continue + if section[entry].configspec is None: + unvalidated.append(entry) + continue + if copy: + section.comments[entry] = configspec.comments.get(entry, []) + section.inline_comments[entry] = configspec.inline_comments.get(entry, '') + check = self.validate(validator, preserve_errors=preserve_errors, copy=copy, section=section[entry]) + out[entry] = check + if check == False: + ret_true = False + elif check == True: + ret_false = False + else: + ret_true = False + + section.extra_values = unvalidated + if preserve_errors and not section._created: + # If the section wasn't created (i.e. it wasn't missing) + # then we can't return False, we need to preserve errors + ret_false = False + # + if ret_false and preserve_errors and out: + # If we are preserving errors, but all + # the failures are from missing sections / values + # then we can return False. Otherwise there is a + # real failure that we need to preserve. + ret_false = not any(out.values()) + if ret_true: + return True + elif ret_false: + return False + return out + + + def reset(self): + """Clear ConfigObj instance and restore to 'freshly created' state.""" + self.clear() + self._initialise() + # FIXME: Should be done by '_initialise', but ConfigObj constructor (and reload) + # requires an empty dictionary + self.configspec = None + # Just to be sure ;-) + self._original_configspec = None + + + def reload(self): + """ + Reload a ConfigObj from file. + + This method raises a ``ReloadError`` if the ConfigObj doesn't have + a filename attribute pointing to a file. + """ + if not isinstance(self.filename, basestring): + raise ReloadError() + + filename = self.filename + current_options = {} + for entry in OPTION_DEFAULTS: + if entry == 'configspec': + continue + current_options[entry] = getattr(self, entry) + + configspec = self._original_configspec + current_options['configspec'] = configspec + + self.clear() + self._initialise(current_options) + self._load(filename, configspec) + + + +class SimpleVal(object): + """ + A simple validator. + Can be used to check that all members expected are present. + + To use it, provide a configspec with all your members in (the value given + will be ignored). Pass an instance of ``SimpleVal`` to the ``validate`` + method of your ``ConfigObj``. ``validate`` will return ``True`` if all + members are present, or a dictionary with True/False meaning + present/missing. (Whole missing sections will be replaced with ``False``) + """ + + def __init__(self): + self.baseErrorClass = ConfigObjError + + def check(self, check, member, missing=False): + """A dummy check method, always returns the value unchanged.""" + if missing: + raise self.baseErrorClass() + return member + + +def flatten_errors(cfg, res, levels=None, results=None): + """ + An example function that will turn a nested dictionary of results + (as returned by ``ConfigObj.validate``) into a flat list. + + ``cfg`` is the ConfigObj instance being checked, ``res`` is the results + dictionary returned by ``validate``. + + (This is a recursive function, so you shouldn't use the ``levels`` or + ``results`` arguments - they are used by the function.) + + Returns a list of keys that failed. Each member of the list is a tuple:: + + ([list of sections...], key, result) + + If ``validate`` was called with ``preserve_errors=False`` (the default) + then ``result`` will always be ``False``. + + *list of sections* is a flattened list of sections that the key was found + in. + + If the section was missing (or a section was expected and a scalar provided + - or vice-versa) then key will be ``None``. + + If the value (or section) was missing then ``result`` will be ``False``. + + If ``validate`` was called with ``preserve_errors=True`` and a value + was present, but failed the check, then ``result`` will be the exception + object returned. You can use this as a string that describes the failure. + + For example *The value "3" is of the wrong type*. + """ + if levels is None: + # first time called + levels = [] + results = [] + if res == True: + return results + if res == False or isinstance(res, Exception): + results.append((levels[:], None, res)) + if levels: + levels.pop() + return results + for (key, val) in res.items(): + if val == True: + continue + if isinstance(cfg.get(key), dict): + # Go down one level + levels.append(key) + flatten_errors(cfg[key], val, levels, results) + continue + results.append((levels[:], key, val)) + # + # Go up one level + if levels: + levels.pop() + # + return results + + +def get_extra_values(conf, _prepend=()): + """ + Find all the values and sections not in the configspec from a validated + ConfigObj. + + ``get_extra_values`` returns a list of tuples where each tuple represents + either an extra section, or an extra value. + + The tuples contain two values, a tuple representing the section the value + is in and the name of the extra values. For extra values in the top level + section the first member will be an empty tuple. For values in the 'foo' + section the first member will be ``('foo',)``. For members in the 'bar' + subsection of the 'foo' section the first member will be ``('foo', 'bar')``. + + NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't + been validated it will return an empty list. + """ + out = [] + + out.extend([(_prepend, name) for name in conf.extra_values]) + for name in conf.sections: + if name not in conf.extra_values: + out.extend(get_extra_values(conf[name], _prepend + (name,))) + return out + + +"""*A programming language is a medium of expression.* - Paul Graham""" diff --git a/lib/enum/LICENSE b/lib/enum/LICENSE new file mode 100644 index 00000000..9003b885 --- /dev/null +++ b/lib/enum/LICENSE @@ -0,0 +1,32 @@ +Copyright (c) 2013, Ethan Furman. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + Redistributions of source code must retain the above + copyright notice, this list of conditions and the + following disclaimer. + + Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + Neither the name Ethan Furman nor the names of any + contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/lib/enum/README b/lib/enum/README new file mode 100644 index 00000000..511af984 --- /dev/null +++ b/lib/enum/README @@ -0,0 +1,2 @@ +enum34 is the new Python stdlib enum module available in Python 3.4 +backported for previous versions of Python from 2.4 to 3.3. diff --git a/lib/enum/__init__.py b/lib/enum/__init__.py new file mode 100644 index 00000000..6a327a8a --- /dev/null +++ b/lib/enum/__init__.py @@ -0,0 +1,790 @@ +"""Python Enumerations""" + +import sys as _sys + +__all__ = ['Enum', 'IntEnum', 'unique'] + +version = 1, 0, 4 + +pyver = float('%s.%s' % _sys.version_info[:2]) + +try: + any +except NameError: + def any(iterable): + for element in iterable: + if element: + return True + return False + +try: + from collections import OrderedDict +except ImportError: + OrderedDict = None + +try: + basestring +except NameError: + # In Python 2 basestring is the ancestor of both str and unicode + # in Python 3 it's just str, but was missing in 3.1 + basestring = str + +try: + unicode +except NameError: + # In Python 3 unicode no longer exists (it's just str) + unicode = str + +class _RouteClassAttributeToGetattr(object): + """Route attribute access on a class to __getattr__. + + This is a descriptor, used to define attributes that act differently when + accessed through an instance and through a class. Instance access remains + normal, but access to an attribute through a class will be routed to the + class's __getattr__ method; this is done by raising AttributeError. + + """ + def __init__(self, fget=None): + self.fget = fget + + def __get__(self, instance, ownerclass=None): + if instance is None: + raise AttributeError() + return self.fget(instance) + + def __set__(self, instance, value): + raise AttributeError("can't set attribute") + + def __delete__(self, instance): + raise AttributeError("can't delete attribute") + + +def _is_descriptor(obj): + """Returns True if obj is a descriptor, False otherwise.""" + return ( + hasattr(obj, '__get__') or + hasattr(obj, '__set__') or + hasattr(obj, '__delete__')) + + +def _is_dunder(name): + """Returns True if a __dunder__ name, False otherwise.""" + return (name[:2] == name[-2:] == '__' and + name[2:3] != '_' and + name[-3:-2] != '_' and + len(name) > 4) + + +def _is_sunder(name): + """Returns True if a _sunder_ name, False otherwise.""" + return (name[0] == name[-1] == '_' and + name[1:2] != '_' and + name[-2:-1] != '_' and + len(name) > 2) + + +def _make_class_unpicklable(cls): + """Make the given class un-picklable.""" + def _break_on_call_reduce(self, protocol=None): + raise TypeError('%r cannot be pickled' % self) + cls.__reduce_ex__ = _break_on_call_reduce + cls.__module__ = '' + + +class _EnumDict(dict): + """Track enum member order and ensure member names are not reused. + + EnumMeta will use the names found in self._member_names as the + enumeration member names. + + """ + def __init__(self): + super(_EnumDict, self).__init__() + self._member_names = [] + + def __setitem__(self, key, value): + """Changes anything not dundered or not a descriptor. + + If a descriptor is added with the same name as an enum member, the name + is removed from _member_names (this may leave a hole in the numerical + sequence of values). + + If an enum member name is used twice, an error is raised; duplicate + values are not checked for. + + Single underscore (sunder) names are reserved. + + Note: in 3.x __order__ is simply discarded as a not necessary piece + leftover from 2.x + + """ + if pyver >= 3.0 and key == '__order__': + return + if _is_sunder(key): + raise ValueError('_names_ are reserved for future Enum use') + elif _is_dunder(key): + pass + elif key in self._member_names: + # descriptor overwriting an enum? + raise TypeError('Attempted to reuse key: %r' % key) + elif not _is_descriptor(value): + if key in self: + # enum overwriting a descriptor? + raise TypeError('Key already defined as: %r' % self[key]) + self._member_names.append(key) + super(_EnumDict, self).__setitem__(key, value) + + +# Dummy value for Enum as EnumMeta explicity checks for it, but of course until +# EnumMeta finishes running the first time the Enum class doesn't exist. This +# is also why there are checks in EnumMeta like `if Enum is not None` +Enum = None + + +class EnumMeta(type): + """Metaclass for Enum""" + @classmethod + def __prepare__(metacls, cls, bases): + return _EnumDict() + + def __new__(metacls, cls, bases, classdict): + # an Enum class is final once enumeration items have been defined; it + # cannot be mixed with other types (int, float, etc.) if it has an + # inherited __new__ unless a new __new__ is defined (or the resulting + # class will fail). + if type(classdict) is dict: + original_dict = classdict + classdict = _EnumDict() + for k, v in original_dict.items(): + classdict[k] = v + + member_type, first_enum = metacls._get_mixins_(bases) + __new__, save_new, use_args = metacls._find_new_(classdict, member_type, + first_enum) + # save enum items into separate mapping so they don't get baked into + # the new class + members = dict((k, classdict[k]) for k in classdict._member_names) + for name in classdict._member_names: + del classdict[name] + + # py2 support for definition order + __order__ = classdict.get('__order__') + if __order__ is None: + if pyver < 3.0: + try: + __order__ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])] + except TypeError: + __order__ = [name for name in sorted(members.keys())] + else: + __order__ = classdict._member_names + else: + del classdict['__order__'] + if pyver < 3.0: + __order__ = __order__.replace(',', ' ').split() + aliases = [name for name in members if name not in __order__] + __order__ += aliases + + # check for illegal enum names (any others?) + invalid_names = set(members) & set(['mro']) + if invalid_names: + raise ValueError('Invalid enum member name(s): %s' % ( + ', '.join(invalid_names), )) + + # create our new Enum type + enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict) + enum_class._member_names_ = [] # names in random order + if OrderedDict is not None: + enum_class._member_map_ = OrderedDict() + else: + enum_class._member_map_ = {} # name->value map + enum_class._member_type_ = member_type + + # Reverse value->name map for hashable values. + enum_class._value2member_map_ = {} + + # instantiate them, checking for duplicates as we go + # we instantiate first instead of checking for duplicates first in case + # a custom __new__ is doing something funky with the values -- such as + # auto-numbering ;) + if __new__ is None: + __new__ = enum_class.__new__ + for member_name in __order__: + value = members[member_name] + if not isinstance(value, tuple): + args = (value, ) + else: + args = value + if member_type is tuple: # special case for tuple enums + args = (args, ) # wrap it one more time + if not use_args or not args: + enum_member = __new__(enum_class) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = value + else: + enum_member = __new__(enum_class, *args) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = member_type(*args) + value = enum_member._value_ + enum_member._name_ = member_name + enum_member.__objclass__ = enum_class + enum_member.__init__(*args) + # If another member with the same value was already defined, the + # new member becomes an alias to the existing one. + for name, canonical_member in enum_class._member_map_.items(): + if canonical_member.value == enum_member._value_: + enum_member = canonical_member + break + else: + # Aliases don't appear in member names (only in __members__). + enum_class._member_names_.append(member_name) + enum_class._member_map_[member_name] = enum_member + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + enum_class._value2member_map_[value] = enum_member + except TypeError: + pass + + + # If a custom type is mixed into the Enum, and it does not know how + # to pickle itself, pickle.dumps will succeed but pickle.loads will + # fail. Rather than have the error show up later and possibly far + # from the source, sabotage the pickle protocol for this class so + # that pickle.dumps also fails. + # + # However, if the new class implements its own __reduce_ex__, do not + # sabotage -- it's on them to make sure it works correctly. We use + # __reduce_ex__ instead of any of the others as it is preferred by + # pickle over __reduce__, and it handles all pickle protocols. + unpicklable = False + if '__reduce_ex__' not in classdict: + if member_type is not object: + methods = ('__getnewargs_ex__', '__getnewargs__', + '__reduce_ex__', '__reduce__') + if not any(m in member_type.__dict__ for m in methods): + _make_class_unpicklable(enum_class) + unpicklable = True + + + # double check that repr and friends are not the mixin's or various + # things break (such as pickle) + for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): + class_method = getattr(enum_class, name) + obj_method = getattr(member_type, name, None) + enum_method = getattr(first_enum, name, None) + if name not in classdict and class_method is not enum_method: + if name == '__reduce_ex__' and unpicklable: + continue + setattr(enum_class, name, enum_method) + + # method resolution and int's are not playing nice + # Python's less than 2.6 use __cmp__ + + if pyver < 2.6: + + if issubclass(enum_class, int): + setattr(enum_class, '__cmp__', getattr(int, '__cmp__')) + + elif pyver < 3.0: + + if issubclass(enum_class, int): + for method in ( + '__le__', + '__lt__', + '__gt__', + '__ge__', + '__eq__', + '__ne__', + '__hash__', + ): + setattr(enum_class, method, getattr(int, method)) + + # replace any other __new__ with our own (as long as Enum is not None, + # anyway) -- again, this is to support pickle + if Enum is not None: + # if the user defined their own __new__, save it before it gets + # clobbered in case they subclass later + if save_new: + setattr(enum_class, '__member_new__', enum_class.__dict__['__new__']) + setattr(enum_class, '__new__', Enum.__dict__['__new__']) + return enum_class + + def __call__(cls, value, names=None, module=None, type=None): + """Either returns an existing member, or creates a new enum class. + + This method is used both when an enum class is given a value to match + to an enumeration member (i.e. Color(3)) and for the functional API + (i.e. Color = Enum('Color', names='red green blue')). + + When used for the functional API: `module`, if set, will be stored in + the new class' __module__ attribute; `type`, if set, will be mixed in + as the first base class. + + Note: if `module` is not set this routine will attempt to discover the + calling module by walking the frame stack; if this is unsuccessful + the resulting class will not be pickleable. + + """ + if names is None: # simple value lookup + return cls.__new__(cls, value) + # otherwise, functional API: we're creating a new Enum type + return cls._create_(value, names, module=module, type=type) + + def __contains__(cls, member): + return isinstance(member, cls) and member.name in cls._member_map_ + + def __delattr__(cls, attr): + # nicer error message when someone tries to delete an attribute + # (see issue19025). + if attr in cls._member_map_: + raise AttributeError( + "%s: cannot delete Enum member." % cls.__name__) + super(EnumMeta, cls).__delattr__(attr) + + def __dir__(self): + return (['__class__', '__doc__', '__members__', '__module__'] + + self._member_names_) + + @property + def __members__(cls): + """Returns a mapping of member name->value. + + This mapping lists all enum members, including aliases. Note that this + is a copy of the internal mapping. + + """ + return cls._member_map_.copy() + + def __getattr__(cls, name): + """Return the enum member matching `name` + + We use __getattr__ instead of descriptors or inserting into the enum + class' __dict__ in order to support `name` and `value` being both + properties for enum members (which live in the class' __dict__) and + enum members themselves. + + """ + if _is_dunder(name): + raise AttributeError(name) + try: + return cls._member_map_[name] + except KeyError: + raise AttributeError(name) + + def __getitem__(cls, name): + return cls._member_map_[name] + + def __iter__(cls): + return (cls._member_map_[name] for name in cls._member_names_) + + def __reversed__(cls): + return (cls._member_map_[name] for name in reversed(cls._member_names_)) + + def __len__(cls): + return len(cls._member_names_) + + def __repr__(cls): + return "" % cls.__name__ + + def __setattr__(cls, name, value): + """Block attempts to reassign Enum members. + + A simple assignment to the class namespace only changes one of the + several possible ways to get an Enum member from the Enum class, + resulting in an inconsistent Enumeration. + + """ + member_map = cls.__dict__.get('_member_map_', {}) + if name in member_map: + raise AttributeError('Cannot reassign members.') + super(EnumMeta, cls).__setattr__(name, value) + + def _create_(cls, class_name, names=None, module=None, type=None): + """Convenience method to create a new Enum class. + + `names` can be: + + * A string containing member names, separated either with spaces or + commas. Values are auto-numbered from 1. + * An iterable of member names. Values are auto-numbered from 1. + * An iterable of (member name, value) pairs. + * A mapping of member name -> value. + + """ + if pyver < 3.0: + # if class_name is unicode, attempt a conversion to ASCII + if isinstance(class_name, unicode): + try: + class_name = class_name.encode('ascii') + except UnicodeEncodeError: + raise TypeError('%r is not representable in ASCII' % class_name) + metacls = cls.__class__ + if type is None: + bases = (cls, ) + else: + bases = (type, cls) + classdict = metacls.__prepare__(class_name, bases) + __order__ = [] + + # special processing needed for names? + if isinstance(names, basestring): + names = names.replace(',', ' ').split() + if isinstance(names, (tuple, list)) and isinstance(names[0], basestring): + names = [(e, i+1) for (i, e) in enumerate(names)] + + # Here, names is either an iterable of (name, value) or a mapping. + for item in names: + if isinstance(item, basestring): + member_name, member_value = item, names[item] + else: + member_name, member_value = item + classdict[member_name] = member_value + __order__.append(member_name) + # only set __order__ in classdict if name/value was not from a mapping + if not isinstance(item, basestring): + classdict['__order__'] = ' '.join(__order__) + enum_class = metacls.__new__(metacls, class_name, bases, classdict) + + # TODO: replace the frame hack if a blessed way to know the calling + # module is ever developed + if module is None: + try: + module = _sys._getframe(2).f_globals['__name__'] + except (AttributeError, ValueError): + pass + if module is None: + _make_class_unpicklable(enum_class) + else: + enum_class.__module__ = module + + return enum_class + + @staticmethod + def _get_mixins_(bases): + """Returns the type for creating enum members, and the first inherited + enum class. + + bases: the tuple of bases that was given to __new__ + + """ + if not bases or Enum is None: + return object, Enum + + + # double check that we are not subclassing a class with existing + # enumeration members; while we're at it, see if any other data + # type has been mixed in so we can use the correct __new__ + member_type = first_enum = None + for base in bases: + if (base is not Enum and + issubclass(base, Enum) and + base._member_names_): + raise TypeError("Cannot extend enumerations") + # base is now the last base in bases + if not issubclass(base, Enum): + raise TypeError("new enumerations must be created as " + "`ClassName([mixin_type,] enum_type)`") + + # get correct mix-in type (either mix-in type of Enum subclass, or + # first base if last base is Enum) + if not issubclass(bases[0], Enum): + member_type = bases[0] # first data type + first_enum = bases[-1] # enum type + else: + for base in bases[0].__mro__: + # most common: (IntEnum, int, Enum, object) + # possible: (, , + # , , + # ) + if issubclass(base, Enum): + if first_enum is None: + first_enum = base + else: + if member_type is None: + member_type = base + + return member_type, first_enum + + if pyver < 3.0: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + if __new__: + return None, True, True # __new__, save_new, use_args + + N__new__ = getattr(None, '__new__') + O__new__ = getattr(object, '__new__') + if Enum is None: + E__new__ = N__new__ + else: + E__new__ = Enum.__dict__['__new__'] + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + try: + target = possible.__dict__[method] + except (AttributeError, KeyError): + target = getattr(possible, method, None) + if target not in [ + None, + N__new__, + O__new__, + E__new__, + ]: + if method == '__member_new__': + classdict['__new__'] = target + return None, False, True + if isinstance(target, staticmethod): + target = target.__get__(member_type) + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, False, use_args + else: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + + # should __new__ be saved as __member_new__ later? + save_new = __new__ is not None + + if __new__ is None: + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + target = getattr(possible, method, None) + if target not in ( + None, + None.__new__, + object.__new__, + Enum.__new__, + ): + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, save_new, use_args + + +######################################################## +# In order to support Python 2 and 3 with a single +# codebase we have to create the Enum methods separately +# and then use the `type(name, bases, dict)` method to +# create the class. +######################################################## +temp_enum_dict = {} +temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n" + +def __new__(cls, value): + # all enum instances are actually created during class construction + # without calling this method; this method is called by the metaclass' + # __call__ (i.e. Color(3) ), and by pickle + if type(value) is cls: + # For lookups like Color(Color.red) + value = value.value + #return value + # by-value search for a matching enum member + # see if it's in the reverse mapping (for hashable values) + try: + if value in cls._value2member_map_: + return cls._value2member_map_[value] + except TypeError: + # not there, now do long search -- O(n) behavior + for member in cls._member_map_.values(): + if member.value == value: + return member + raise ValueError("%s is not a valid %s" % (value, cls.__name__)) +temp_enum_dict['__new__'] = __new__ +del __new__ + +def __repr__(self): + return "<%s.%s: %r>" % ( + self.__class__.__name__, self._name_, self._value_) +temp_enum_dict['__repr__'] = __repr__ +del __repr__ + +def __str__(self): + return "%s.%s" % (self.__class__.__name__, self._name_) +temp_enum_dict['__str__'] = __str__ +del __str__ + +def __dir__(self): + added_behavior = [ + m + for cls in self.__class__.mro() + for m in cls.__dict__ + if m[0] != '_' + ] + return (['__class__', '__doc__', '__module__', ] + added_behavior) +temp_enum_dict['__dir__'] = __dir__ +del __dir__ + +def __format__(self, format_spec): + # mixed-in Enums should use the mixed-in type's __format__, otherwise + # we can get strange results with the Enum name showing up instead of + # the value + + # pure Enum branch + if self._member_type_ is object: + cls = str + val = str(self) + # mix-in branch + else: + cls = self._member_type_ + val = self.value + return cls.__format__(val, format_spec) +temp_enum_dict['__format__'] = __format__ +del __format__ + + +#################################### +# Python's less than 2.6 use __cmp__ + +if pyver < 2.6: + + def __cmp__(self, other): + if type(other) is self.__class__: + if self is other: + return 0 + return -1 + return NotImplemented + raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__cmp__'] = __cmp__ + del __cmp__ + +else: + + def __le__(self, other): + raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__le__'] = __le__ + del __le__ + + def __lt__(self, other): + raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__lt__'] = __lt__ + del __lt__ + + def __ge__(self, other): + raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__ge__'] = __ge__ + del __ge__ + + def __gt__(self, other): + raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__gt__'] = __gt__ + del __gt__ + + +def __eq__(self, other): + if type(other) is self.__class__: + return self is other + return NotImplemented +temp_enum_dict['__eq__'] = __eq__ +del __eq__ + +def __ne__(self, other): + if type(other) is self.__class__: + return self is not other + return NotImplemented +temp_enum_dict['__ne__'] = __ne__ +del __ne__ + +def __hash__(self): + return hash(self._name_) +temp_enum_dict['__hash__'] = __hash__ +del __hash__ + +def __reduce_ex__(self, proto): + return self.__class__, (self._value_, ) +temp_enum_dict['__reduce_ex__'] = __reduce_ex__ +del __reduce_ex__ + +# _RouteClassAttributeToGetattr is used to provide access to the `name` +# and `value` properties of enum members while keeping some measure of +# protection from modification, while still allowing for an enumeration +# to have members named `name` and `value`. This works because enumeration +# members are not set directly on the enum class -- __getattr__ is +# used to look them up. + +@_RouteClassAttributeToGetattr +def name(self): + return self._name_ +temp_enum_dict['name'] = name +del name + +@_RouteClassAttributeToGetattr +def value(self): + return self._value_ +temp_enum_dict['value'] = value +del value + +Enum = EnumMeta('Enum', (object, ), temp_enum_dict) +del temp_enum_dict + +# Enum has now been created +########################### + +class IntEnum(int, Enum): + """Enum where members are also (and must be) ints""" + + +def unique(enumeration): + """Class decorator that ensures only unique members exist in an enumeration.""" + duplicates = [] + for name, member in enumeration.__members__.items(): + if name != member.name: + duplicates.append((name, member.name)) + if duplicates: + duplicate_names = ', '.join( + ["%s -> %s" % (alias, name) for (alias, name) in duplicates] + ) + raise ValueError('duplicate names found in %r: %s' % + (enumeration, duplicate_names) + ) + return enumeration diff --git a/lib/enum/doc/enum.rst b/lib/enum/doc/enum.rst new file mode 100644 index 00000000..0d429bfc --- /dev/null +++ b/lib/enum/doc/enum.rst @@ -0,0 +1,725 @@ +``enum`` --- support for enumerations +======================================== + +.. :synopsis: enumerations are sets of symbolic names bound to unique, constant + values. +.. :moduleauthor:: Ethan Furman +.. :sectionauthor:: Barry Warsaw , +.. :sectionauthor:: Eli Bendersky , +.. :sectionauthor:: Ethan Furman + +---------------- + +An enumeration is a set of symbolic names (members) bound to unique, constant +values. Within an enumeration, the members can be compared by identity, and +the enumeration itself can be iterated over. + + +Module Contents +--------------- + +This module defines two enumeration classes that can be used to define unique +sets of names and values: ``Enum`` and ``IntEnum``. It also defines +one decorator, ``unique``. + +``Enum`` + +Base class for creating enumerated constants. See section `Functional API`_ +for an alternate construction syntax. + +``IntEnum`` + +Base class for creating enumerated constants that are also subclasses of ``int``. + +``unique`` + +Enum class decorator that ensures only one name is bound to any one value. + + +Creating an Enum +---------------- + +Enumerations are created using the ``class`` syntax, which makes them +easy to read and write. An alternative creation method is described in +`Functional API`_. To define an enumeration, subclass ``Enum`` as +follows:: + + >>> from enum import Enum + >>> class Color(Enum): + ... red = 1 + ... green = 2 + ... blue = 3 + +Note: Nomenclature + + - The class ``Color`` is an *enumeration* (or *enum*) + - The attributes ``Color.red``, ``Color.green``, etc., are + *enumeration members* (or *enum members*). + - The enum members have *names* and *values* (the name of + ``Color.red`` is ``red``, the value of ``Color.blue`` is + ``3``, etc.) + +Note: + + Even though we use the ``class`` syntax to create Enums, Enums + are not normal Python classes. See `How are Enums different?`_ for + more details. + +Enumeration members have human readable string representations:: + + >>> print(Color.red) + Color.red + +...while their ``repr`` has more information:: + + >>> print(repr(Color.red)) + + +The *type* of an enumeration member is the enumeration it belongs to:: + + >>> type(Color.red) + + >>> isinstance(Color.green, Color) + True + >>> + +Enum members also have a property that contains just their item name:: + + >>> print(Color.red.name) + red + +Enumerations support iteration. In Python 3.x definition order is used; in +Python 2.x the definition order is not available, but class attribute +``__order__`` is supported; otherwise, value order is used:: + + >>> class Shake(Enum): + ... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x + ... vanilla = 7 + ... chocolate = 4 + ... cookies = 9 + ... mint = 3 + ... + >>> for shake in Shake: + ... print(shake) + ... + Shake.vanilla + Shake.chocolate + Shake.cookies + Shake.mint + +The ``__order__`` attribute is always removed, and in 3.x it is also ignored +(order is definition order); however, in the stdlib version it will be ignored +but not removed. + +Enumeration members are hashable, so they can be used in dictionaries and sets:: + + >>> apples = {} + >>> apples[Color.red] = 'red delicious' + >>> apples[Color.green] = 'granny smith' + >>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'} + True + + +Programmatic access to enumeration members and their attributes +--------------------------------------------------------------- + +Sometimes it's useful to access members in enumerations programmatically (i.e. +situations where ``Color.red`` won't do because the exact color is not known +at program-writing time). ``Enum`` allows such access:: + + >>> Color(1) + + >>> Color(3) + + +If you want to access enum members by *name*, use item access:: + + >>> Color['red'] + + >>> Color['green'] + + +If have an enum member and need its ``name`` or ``value``:: + + >>> member = Color.red + >>> member.name + 'red' + >>> member.value + 1 + + +Duplicating enum members and values +----------------------------------- + +Having two enum members (or any other attribute) with the same name is invalid; +in Python 3.x this would raise an error, but in Python 2.x the second member +simply overwrites the first:: + + >>> # python 2.x + >>> class Shape(Enum): + ... square = 2 + ... square = 3 + ... + >>> Shape.square + + + >>> # python 3.x + >>> class Shape(Enum): + ... square = 2 + ... square = 3 + Traceback (most recent call last): + ... + TypeError: Attempted to reuse key: 'square' + +However, two enum members are allowed to have the same value. Given two members +A and B with the same value (and A defined first), B is an alias to A. By-value +lookup of the value of A and B will return A. By-name lookup of B will also +return A:: + + >>> class Shape(Enum): + ... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x + ... square = 2 + ... diamond = 1 + ... circle = 3 + ... alias_for_square = 2 + ... + >>> Shape.square + + >>> Shape.alias_for_square + + >>> Shape(2) + + + +Allowing aliases is not always desirable. ``unique`` can be used to ensure +that none exist in a particular enumeration:: + + >>> from enum import unique + >>> @unique + ... class Mistake(Enum): + ... __order__ = 'one two three four' # only needed in 2.x + ... one = 1 + ... two = 2 + ... three = 3 + ... four = 3 + Traceback (most recent call last): + ... + ValueError: duplicate names found in : four -> three + +Iterating over the members of an enum does not provide the aliases:: + + >>> list(Shape) + [, , ] + +The special attribute ``__members__`` is a dictionary mapping names to members. +It includes all names defined in the enumeration, including the aliases:: + + >>> for name, member in sorted(Shape.__members__.items()): + ... name, member + ... + ('alias_for_square', ) + ('circle', ) + ('diamond', ) + ('square', ) + +The ``__members__`` attribute can be used for detailed programmatic access to +the enumeration members. For example, finding all the aliases:: + + >>> [name for name, member in Shape.__members__.items() if member.name != name] + ['alias_for_square'] + +Comparisons +----------- + +Enumeration members are compared by identity:: + + >>> Color.red is Color.red + True + >>> Color.red is Color.blue + False + >>> Color.red is not Color.blue + True + +Ordered comparisons between enumeration values are *not* supported. Enum +members are not integers (but see `IntEnum`_ below):: + + >>> Color.red < Color.blue + Traceback (most recent call last): + File "", line 1, in + TypeError: unorderable types: Color() < Color() + +.. warning:: + + In Python 2 *everything* is ordered, even though the ordering may not + make sense. If you want your enumerations to have a sensible ordering + check out the `OrderedEnum`_ recipe below. + + +Equality comparisons are defined though:: + + >>> Color.blue == Color.red + False + >>> Color.blue != Color.red + True + >>> Color.blue == Color.blue + True + +Comparisons against non-enumeration values will always compare not equal +(again, ``IntEnum`` was explicitly designed to behave differently, see +below):: + + >>> Color.blue == 2 + False + + +Allowed members and attributes of enumerations +---------------------------------------------- + +The examples above use integers for enumeration values. Using integers is +short and handy (and provided by default by the `Functional API`_), but not +strictly enforced. In the vast majority of use-cases, one doesn't care what +the actual value of an enumeration is. But if the value *is* important, +enumerations can have arbitrary values. + +Enumerations are Python classes, and can have methods and special methods as +usual. If we have this enumeration:: + + >>> class Mood(Enum): + ... funky = 1 + ... happy = 3 + ... + ... def describe(self): + ... # self is the member here + ... return self.name, self.value + ... + ... def __str__(self): + ... return 'my custom str! {0}'.format(self.value) + ... + ... @classmethod + ... def favorite_mood(cls): + ... # cls here is the enumeration + ... return cls.happy + +Then:: + + >>> Mood.favorite_mood() + + >>> Mood.happy.describe() + ('happy', 3) + >>> str(Mood.funky) + 'my custom str! 1' + +The rules for what is allowed are as follows: _sunder_ names (starting and +ending with a single underscore) are reserved by enum and cannot be used; +all other attributes defined within an enumeration will become members of this +enumeration, with the exception of *__dunder__* names and descriptors (methods +are also descriptors). + +Note: + + If your enumeration defines ``__new__`` and/or ``__init__`` then + whatever value(s) were given to the enum member will be passed into + those methods. See `Planet`_ for an example. + + +Restricted subclassing of enumerations +-------------------------------------- + +Subclassing an enumeration is allowed only if the enumeration does not define +any members. So this is forbidden:: + + >>> class MoreColor(Color): + ... pink = 17 + Traceback (most recent call last): + ... + TypeError: Cannot extend enumerations + +But this is allowed:: + + >>> class Foo(Enum): + ... def some_behavior(self): + ... pass + ... + >>> class Bar(Foo): + ... happy = 1 + ... sad = 2 + ... + +Allowing subclassing of enums that define members would lead to a violation of +some important invariants of types and instances. On the other hand, it makes +sense to allow sharing some common behavior between a group of enumerations. +(See `OrderedEnum`_ for an example.) + + +Pickling +-------- + +Enumerations can be pickled and unpickled:: + + >>> from enum.test_enum import Fruit + >>> from pickle import dumps, loads + >>> Fruit.tomato is loads(dumps(Fruit.tomato, 2)) + True + +The usual restrictions for pickling apply: picklable enums must be defined in +the top level of a module, since unpickling requires them to be importable +from that module. + +Note: + + With pickle protocol version 4 (introduced in Python 3.4) it is possible + to easily pickle enums nested in other classes. + + + +Functional API +-------------- + +The ``Enum`` class is callable, providing the following functional API:: + + >>> Animal = Enum('Animal', 'ant bee cat dog') + >>> Animal + + >>> Animal.ant + + >>> Animal.ant.value + 1 + >>> list(Animal) + [, , , ] + +The semantics of this API resemble ``namedtuple``. The first argument +of the call to ``Enum`` is the name of the enumeration. + +The second argument is the *source* of enumeration member names. It can be a +whitespace-separated string of names, a sequence of names, a sequence of +2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to +values. The last two options enable assigning arbitrary values to +enumerations; the others auto-assign increasing integers starting with 1. A +new class derived from ``Enum`` is returned. In other words, the above +assignment to ``Animal`` is equivalent to:: + + >>> class Animals(Enum): + ... ant = 1 + ... bee = 2 + ... cat = 3 + ... dog = 4 + +Pickling enums created with the functional API can be tricky as frame stack +implementation details are used to try and figure out which module the +enumeration is being created in (e.g. it will fail if you use a utility +function in separate module, and also may not work on IronPython or Jython). +The solution is to specify the module name explicitly as follows:: + + >>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__) + +Derived Enumerations +-------------------- + +IntEnum +^^^^^^^ + +A variation of ``Enum`` is provided which is also a subclass of +``int``. Members of an ``IntEnum`` can be compared to integers; +by extension, integer enumerations of different types can also be compared +to each other:: + + >>> from enum import IntEnum + >>> class Shape(IntEnum): + ... circle = 1 + ... square = 2 + ... + >>> class Request(IntEnum): + ... post = 1 + ... get = 2 + ... + >>> Shape == 1 + False + >>> Shape.circle == 1 + True + >>> Shape.circle == Request.post + True + +However, they still can't be compared to standard ``Enum`` enumerations:: + + >>> class Shape(IntEnum): + ... circle = 1 + ... square = 2 + ... + >>> class Color(Enum): + ... red = 1 + ... green = 2 + ... + >>> Shape.circle == Color.red + False + +``IntEnum`` values behave like integers in other ways you'd expect:: + + >>> int(Shape.circle) + 1 + >>> ['a', 'b', 'c'][Shape.circle] + 'b' + >>> [i for i in range(Shape.square)] + [0, 1] + +For the vast majority of code, ``Enum`` is strongly recommended, +since ``IntEnum`` breaks some semantic promises of an enumeration (by +being comparable to integers, and thus by transitivity to other +unrelated enumerations). It should be used only in special cases where +there's no other choice; for example, when integer constants are +replaced with enumerations and backwards compatibility is required with code +that still expects integers. + + +Others +^^^^^^ + +While ``IntEnum`` is part of the ``enum`` module, it would be very +simple to implement independently:: + + class IntEnum(int, Enum): + pass + +This demonstrates how similar derived enumerations can be defined; for example +a ``StrEnum`` that mixes in ``str`` instead of ``int``. + +Some rules: + +1. When subclassing ``Enum``, mix-in types must appear before + ``Enum`` itself in the sequence of bases, as in the ``IntEnum`` + example above. +2. While ``Enum`` can have members of any type, once you mix in an + additional type, all the members must have values of that type, e.g. + ``int`` above. This restriction does not apply to mix-ins which only + add methods and don't specify another data type such as ``int`` or + ``str``. +3. When another data type is mixed in, the ``value`` attribute is *not the + same* as the enum member itself, although it is equivalant and will compare + equal. +4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and + ``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for + IntEnum) treat the enum member as its mixed-in type. + + Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int`` + subclasses are printed as strings and not numbers when the ``%d``, ``%i``, + or ``%u`` codes are used. +5. ``str.__format__`` (or ``format``) will use the mixed-in + type's ``__format__``. If the ``Enum``'s ``str`` or + ``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes. + + +Decorators +---------- + +unique +^^^^^^ + +A ``class`` decorator specifically for enumerations. It searches an +enumeration's ``__members__`` gathering any aliases it finds; if any are +found ``ValueError`` is raised with the details:: + + >>> @unique + ... class NoDupes(Enum): + ... first = 'one' + ... second = 'two' + ... third = 'two' + Traceback (most recent call last): + ... + ValueError: duplicate names found in : third -> second + + +Interesting examples +-------------------- + +While ``Enum`` and ``IntEnum`` are expected to cover the majority of +use-cases, they cannot cover them all. Here are recipes for some different +types of enumerations that can be used directly, or as examples for creating +one's own. + + +AutoNumber +^^^^^^^^^^ + +Avoids having to specify the value for each enumeration member:: + + >>> class AutoNumber(Enum): + ... def __new__(cls): + ... value = len(cls.__members__) + 1 + ... obj = object.__new__(cls) + ... obj._value_ = value + ... return obj + ... + >>> class Color(AutoNumber): + ... __order__ = "red green blue" # only needed in 2.x + ... red = () + ... green = () + ... blue = () + ... + >>> Color.green.value == 2 + True + +Note: + + The `__new__` method, if defined, is used during creation of the Enum + members; it is then replaced by Enum's `__new__` which is used after + class creation for lookup of existing members. Due to the way Enums are + supposed to behave, there is no way to customize Enum's `__new__`. + + +UniqueEnum +^^^^^^^^^^ + +Raises an error if a duplicate member name is found instead of creating an +alias:: + + >>> class UniqueEnum(Enum): + ... def __init__(self, *args): + ... cls = self.__class__ + ... if any(self.value == e.value for e in cls): + ... a = self.name + ... e = cls(self.value).name + ... raise ValueError( + ... "aliases not allowed in UniqueEnum: %r --> %r" + ... % (a, e)) + ... + >>> class Color(UniqueEnum): + ... red = 1 + ... green = 2 + ... blue = 3 + ... grene = 2 + Traceback (most recent call last): + ... + ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green' + + +OrderedEnum +^^^^^^^^^^^ + +An ordered enumeration that is not based on ``IntEnum`` and so maintains +the normal ``Enum`` invariants (such as not being comparable to other +enumerations):: + + >>> class OrderedEnum(Enum): + ... def __ge__(self, other): + ... if self.__class__ is other.__class__: + ... return self._value_ >= other._value_ + ... return NotImplemented + ... def __gt__(self, other): + ... if self.__class__ is other.__class__: + ... return self._value_ > other._value_ + ... return NotImplemented + ... def __le__(self, other): + ... if self.__class__ is other.__class__: + ... return self._value_ <= other._value_ + ... return NotImplemented + ... def __lt__(self, other): + ... if self.__class__ is other.__class__: + ... return self._value_ < other._value_ + ... return NotImplemented + ... + >>> class Grade(OrderedEnum): + ... __ordered__ = 'A B C D F' + ... A = 5 + ... B = 4 + ... C = 3 + ... D = 2 + ... F = 1 + ... + >>> Grade.C < Grade.A + True + + +Planet +^^^^^^ + +If ``__new__`` or ``__init__`` is defined the value of the enum member +will be passed to those methods:: + + >>> class Planet(Enum): + ... MERCURY = (3.303e+23, 2.4397e6) + ... VENUS = (4.869e+24, 6.0518e6) + ... EARTH = (5.976e+24, 6.37814e6) + ... MARS = (6.421e+23, 3.3972e6) + ... JUPITER = (1.9e+27, 7.1492e7) + ... SATURN = (5.688e+26, 6.0268e7) + ... URANUS = (8.686e+25, 2.5559e7) + ... NEPTUNE = (1.024e+26, 2.4746e7) + ... def __init__(self, mass, radius): + ... self.mass = mass # in kilograms + ... self.radius = radius # in meters + ... @property + ... def surface_gravity(self): + ... # universal gravitational constant (m3 kg-1 s-2) + ... G = 6.67300E-11 + ... return G * self.mass / (self.radius * self.radius) + ... + >>> Planet.EARTH.value + (5.976e+24, 6378140.0) + >>> Planet.EARTH.surface_gravity + 9.802652743337129 + + +How are Enums different? +------------------------ + +Enums have a custom metaclass that affects many aspects of both derived Enum +classes and their instances (members). + + +Enum Classes +^^^^^^^^^^^^ + +The ``EnumMeta`` metaclass is responsible for providing the +``__contains__``, ``__dir__``, ``__iter__`` and other methods that +allow one to do things with an ``Enum`` class that fail on a typical +class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is +responsible for ensuring that various other methods on the final ``Enum`` +class are correct (such as ``__new__``, ``__getnewargs__``, +``__str__`` and ``__repr__``) + + +Enum Members (aka instances) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The most interesting thing about Enum members is that they are singletons. +``EnumMeta`` creates them all while it is creating the ``Enum`` +class itself, and then puts a custom ``__new__`` in place to ensure +that no new ones are ever instantiated by returning only the existing +member instances. + + +Finer Points +^^^^^^^^^^^^ + +Enum members are instances of an Enum class, and even though they are +accessible as ``EnumClass.member``, they are not accessible directly from +the member:: + + >>> Color.red + + >>> Color.red.blue + Traceback (most recent call last): + ... + AttributeError: 'Color' object has no attribute 'blue' + +Likewise, ``__members__`` is only available on the class. + +In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being +the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if +``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python +2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified +as the ``OrderedDict`` type didn't exist yet. + +If you give your ``Enum`` subclass extra methods, like the `Planet`_ +class above, those methods will show up in a `dir` of the member, +but not of the class:: + + >>> dir(Planet) + ['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS', + 'VENUS', '__class__', '__doc__', '__members__', '__module__'] + >>> dir(Planet.EARTH) + ['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value'] + +A ``__new__`` method will only be used for the creation of the +``Enum`` members -- after that it is replaced. This means if you wish to +change how ``Enum`` members are looked up you either have to write a +helper function or a ``classmethod``. diff --git a/lib/enum/enum.py b/lib/enum/enum.py new file mode 100644 index 00000000..6a327a8a --- /dev/null +++ b/lib/enum/enum.py @@ -0,0 +1,790 @@ +"""Python Enumerations""" + +import sys as _sys + +__all__ = ['Enum', 'IntEnum', 'unique'] + +version = 1, 0, 4 + +pyver = float('%s.%s' % _sys.version_info[:2]) + +try: + any +except NameError: + def any(iterable): + for element in iterable: + if element: + return True + return False + +try: + from collections import OrderedDict +except ImportError: + OrderedDict = None + +try: + basestring +except NameError: + # In Python 2 basestring is the ancestor of both str and unicode + # in Python 3 it's just str, but was missing in 3.1 + basestring = str + +try: + unicode +except NameError: + # In Python 3 unicode no longer exists (it's just str) + unicode = str + +class _RouteClassAttributeToGetattr(object): + """Route attribute access on a class to __getattr__. + + This is a descriptor, used to define attributes that act differently when + accessed through an instance and through a class. Instance access remains + normal, but access to an attribute through a class will be routed to the + class's __getattr__ method; this is done by raising AttributeError. + + """ + def __init__(self, fget=None): + self.fget = fget + + def __get__(self, instance, ownerclass=None): + if instance is None: + raise AttributeError() + return self.fget(instance) + + def __set__(self, instance, value): + raise AttributeError("can't set attribute") + + def __delete__(self, instance): + raise AttributeError("can't delete attribute") + + +def _is_descriptor(obj): + """Returns True if obj is a descriptor, False otherwise.""" + return ( + hasattr(obj, '__get__') or + hasattr(obj, '__set__') or + hasattr(obj, '__delete__')) + + +def _is_dunder(name): + """Returns True if a __dunder__ name, False otherwise.""" + return (name[:2] == name[-2:] == '__' and + name[2:3] != '_' and + name[-3:-2] != '_' and + len(name) > 4) + + +def _is_sunder(name): + """Returns True if a _sunder_ name, False otherwise.""" + return (name[0] == name[-1] == '_' and + name[1:2] != '_' and + name[-2:-1] != '_' and + len(name) > 2) + + +def _make_class_unpicklable(cls): + """Make the given class un-picklable.""" + def _break_on_call_reduce(self, protocol=None): + raise TypeError('%r cannot be pickled' % self) + cls.__reduce_ex__ = _break_on_call_reduce + cls.__module__ = '' + + +class _EnumDict(dict): + """Track enum member order and ensure member names are not reused. + + EnumMeta will use the names found in self._member_names as the + enumeration member names. + + """ + def __init__(self): + super(_EnumDict, self).__init__() + self._member_names = [] + + def __setitem__(self, key, value): + """Changes anything not dundered or not a descriptor. + + If a descriptor is added with the same name as an enum member, the name + is removed from _member_names (this may leave a hole in the numerical + sequence of values). + + If an enum member name is used twice, an error is raised; duplicate + values are not checked for. + + Single underscore (sunder) names are reserved. + + Note: in 3.x __order__ is simply discarded as a not necessary piece + leftover from 2.x + + """ + if pyver >= 3.0 and key == '__order__': + return + if _is_sunder(key): + raise ValueError('_names_ are reserved for future Enum use') + elif _is_dunder(key): + pass + elif key in self._member_names: + # descriptor overwriting an enum? + raise TypeError('Attempted to reuse key: %r' % key) + elif not _is_descriptor(value): + if key in self: + # enum overwriting a descriptor? + raise TypeError('Key already defined as: %r' % self[key]) + self._member_names.append(key) + super(_EnumDict, self).__setitem__(key, value) + + +# Dummy value for Enum as EnumMeta explicity checks for it, but of course until +# EnumMeta finishes running the first time the Enum class doesn't exist. This +# is also why there are checks in EnumMeta like `if Enum is not None` +Enum = None + + +class EnumMeta(type): + """Metaclass for Enum""" + @classmethod + def __prepare__(metacls, cls, bases): + return _EnumDict() + + def __new__(metacls, cls, bases, classdict): + # an Enum class is final once enumeration items have been defined; it + # cannot be mixed with other types (int, float, etc.) if it has an + # inherited __new__ unless a new __new__ is defined (or the resulting + # class will fail). + if type(classdict) is dict: + original_dict = classdict + classdict = _EnumDict() + for k, v in original_dict.items(): + classdict[k] = v + + member_type, first_enum = metacls._get_mixins_(bases) + __new__, save_new, use_args = metacls._find_new_(classdict, member_type, + first_enum) + # save enum items into separate mapping so they don't get baked into + # the new class + members = dict((k, classdict[k]) for k in classdict._member_names) + for name in classdict._member_names: + del classdict[name] + + # py2 support for definition order + __order__ = classdict.get('__order__') + if __order__ is None: + if pyver < 3.0: + try: + __order__ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])] + except TypeError: + __order__ = [name for name in sorted(members.keys())] + else: + __order__ = classdict._member_names + else: + del classdict['__order__'] + if pyver < 3.0: + __order__ = __order__.replace(',', ' ').split() + aliases = [name for name in members if name not in __order__] + __order__ += aliases + + # check for illegal enum names (any others?) + invalid_names = set(members) & set(['mro']) + if invalid_names: + raise ValueError('Invalid enum member name(s): %s' % ( + ', '.join(invalid_names), )) + + # create our new Enum type + enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict) + enum_class._member_names_ = [] # names in random order + if OrderedDict is not None: + enum_class._member_map_ = OrderedDict() + else: + enum_class._member_map_ = {} # name->value map + enum_class._member_type_ = member_type + + # Reverse value->name map for hashable values. + enum_class._value2member_map_ = {} + + # instantiate them, checking for duplicates as we go + # we instantiate first instead of checking for duplicates first in case + # a custom __new__ is doing something funky with the values -- such as + # auto-numbering ;) + if __new__ is None: + __new__ = enum_class.__new__ + for member_name in __order__: + value = members[member_name] + if not isinstance(value, tuple): + args = (value, ) + else: + args = value + if member_type is tuple: # special case for tuple enums + args = (args, ) # wrap it one more time + if not use_args or not args: + enum_member = __new__(enum_class) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = value + else: + enum_member = __new__(enum_class, *args) + if not hasattr(enum_member, '_value_'): + enum_member._value_ = member_type(*args) + value = enum_member._value_ + enum_member._name_ = member_name + enum_member.__objclass__ = enum_class + enum_member.__init__(*args) + # If another member with the same value was already defined, the + # new member becomes an alias to the existing one. + for name, canonical_member in enum_class._member_map_.items(): + if canonical_member.value == enum_member._value_: + enum_member = canonical_member + break + else: + # Aliases don't appear in member names (only in __members__). + enum_class._member_names_.append(member_name) + enum_class._member_map_[member_name] = enum_member + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + enum_class._value2member_map_[value] = enum_member + except TypeError: + pass + + + # If a custom type is mixed into the Enum, and it does not know how + # to pickle itself, pickle.dumps will succeed but pickle.loads will + # fail. Rather than have the error show up later and possibly far + # from the source, sabotage the pickle protocol for this class so + # that pickle.dumps also fails. + # + # However, if the new class implements its own __reduce_ex__, do not + # sabotage -- it's on them to make sure it works correctly. We use + # __reduce_ex__ instead of any of the others as it is preferred by + # pickle over __reduce__, and it handles all pickle protocols. + unpicklable = False + if '__reduce_ex__' not in classdict: + if member_type is not object: + methods = ('__getnewargs_ex__', '__getnewargs__', + '__reduce_ex__', '__reduce__') + if not any(m in member_type.__dict__ for m in methods): + _make_class_unpicklable(enum_class) + unpicklable = True + + + # double check that repr and friends are not the mixin's or various + # things break (such as pickle) + for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): + class_method = getattr(enum_class, name) + obj_method = getattr(member_type, name, None) + enum_method = getattr(first_enum, name, None) + if name not in classdict and class_method is not enum_method: + if name == '__reduce_ex__' and unpicklable: + continue + setattr(enum_class, name, enum_method) + + # method resolution and int's are not playing nice + # Python's less than 2.6 use __cmp__ + + if pyver < 2.6: + + if issubclass(enum_class, int): + setattr(enum_class, '__cmp__', getattr(int, '__cmp__')) + + elif pyver < 3.0: + + if issubclass(enum_class, int): + for method in ( + '__le__', + '__lt__', + '__gt__', + '__ge__', + '__eq__', + '__ne__', + '__hash__', + ): + setattr(enum_class, method, getattr(int, method)) + + # replace any other __new__ with our own (as long as Enum is not None, + # anyway) -- again, this is to support pickle + if Enum is not None: + # if the user defined their own __new__, save it before it gets + # clobbered in case they subclass later + if save_new: + setattr(enum_class, '__member_new__', enum_class.__dict__['__new__']) + setattr(enum_class, '__new__', Enum.__dict__['__new__']) + return enum_class + + def __call__(cls, value, names=None, module=None, type=None): + """Either returns an existing member, or creates a new enum class. + + This method is used both when an enum class is given a value to match + to an enumeration member (i.e. Color(3)) and for the functional API + (i.e. Color = Enum('Color', names='red green blue')). + + When used for the functional API: `module`, if set, will be stored in + the new class' __module__ attribute; `type`, if set, will be mixed in + as the first base class. + + Note: if `module` is not set this routine will attempt to discover the + calling module by walking the frame stack; if this is unsuccessful + the resulting class will not be pickleable. + + """ + if names is None: # simple value lookup + return cls.__new__(cls, value) + # otherwise, functional API: we're creating a new Enum type + return cls._create_(value, names, module=module, type=type) + + def __contains__(cls, member): + return isinstance(member, cls) and member.name in cls._member_map_ + + def __delattr__(cls, attr): + # nicer error message when someone tries to delete an attribute + # (see issue19025). + if attr in cls._member_map_: + raise AttributeError( + "%s: cannot delete Enum member." % cls.__name__) + super(EnumMeta, cls).__delattr__(attr) + + def __dir__(self): + return (['__class__', '__doc__', '__members__', '__module__'] + + self._member_names_) + + @property + def __members__(cls): + """Returns a mapping of member name->value. + + This mapping lists all enum members, including aliases. Note that this + is a copy of the internal mapping. + + """ + return cls._member_map_.copy() + + def __getattr__(cls, name): + """Return the enum member matching `name` + + We use __getattr__ instead of descriptors or inserting into the enum + class' __dict__ in order to support `name` and `value` being both + properties for enum members (which live in the class' __dict__) and + enum members themselves. + + """ + if _is_dunder(name): + raise AttributeError(name) + try: + return cls._member_map_[name] + except KeyError: + raise AttributeError(name) + + def __getitem__(cls, name): + return cls._member_map_[name] + + def __iter__(cls): + return (cls._member_map_[name] for name in cls._member_names_) + + def __reversed__(cls): + return (cls._member_map_[name] for name in reversed(cls._member_names_)) + + def __len__(cls): + return len(cls._member_names_) + + def __repr__(cls): + return "" % cls.__name__ + + def __setattr__(cls, name, value): + """Block attempts to reassign Enum members. + + A simple assignment to the class namespace only changes one of the + several possible ways to get an Enum member from the Enum class, + resulting in an inconsistent Enumeration. + + """ + member_map = cls.__dict__.get('_member_map_', {}) + if name in member_map: + raise AttributeError('Cannot reassign members.') + super(EnumMeta, cls).__setattr__(name, value) + + def _create_(cls, class_name, names=None, module=None, type=None): + """Convenience method to create a new Enum class. + + `names` can be: + + * A string containing member names, separated either with spaces or + commas. Values are auto-numbered from 1. + * An iterable of member names. Values are auto-numbered from 1. + * An iterable of (member name, value) pairs. + * A mapping of member name -> value. + + """ + if pyver < 3.0: + # if class_name is unicode, attempt a conversion to ASCII + if isinstance(class_name, unicode): + try: + class_name = class_name.encode('ascii') + except UnicodeEncodeError: + raise TypeError('%r is not representable in ASCII' % class_name) + metacls = cls.__class__ + if type is None: + bases = (cls, ) + else: + bases = (type, cls) + classdict = metacls.__prepare__(class_name, bases) + __order__ = [] + + # special processing needed for names? + if isinstance(names, basestring): + names = names.replace(',', ' ').split() + if isinstance(names, (tuple, list)) and isinstance(names[0], basestring): + names = [(e, i+1) for (i, e) in enumerate(names)] + + # Here, names is either an iterable of (name, value) or a mapping. + for item in names: + if isinstance(item, basestring): + member_name, member_value = item, names[item] + else: + member_name, member_value = item + classdict[member_name] = member_value + __order__.append(member_name) + # only set __order__ in classdict if name/value was not from a mapping + if not isinstance(item, basestring): + classdict['__order__'] = ' '.join(__order__) + enum_class = metacls.__new__(metacls, class_name, bases, classdict) + + # TODO: replace the frame hack if a blessed way to know the calling + # module is ever developed + if module is None: + try: + module = _sys._getframe(2).f_globals['__name__'] + except (AttributeError, ValueError): + pass + if module is None: + _make_class_unpicklable(enum_class) + else: + enum_class.__module__ = module + + return enum_class + + @staticmethod + def _get_mixins_(bases): + """Returns the type for creating enum members, and the first inherited + enum class. + + bases: the tuple of bases that was given to __new__ + + """ + if not bases or Enum is None: + return object, Enum + + + # double check that we are not subclassing a class with existing + # enumeration members; while we're at it, see if any other data + # type has been mixed in so we can use the correct __new__ + member_type = first_enum = None + for base in bases: + if (base is not Enum and + issubclass(base, Enum) and + base._member_names_): + raise TypeError("Cannot extend enumerations") + # base is now the last base in bases + if not issubclass(base, Enum): + raise TypeError("new enumerations must be created as " + "`ClassName([mixin_type,] enum_type)`") + + # get correct mix-in type (either mix-in type of Enum subclass, or + # first base if last base is Enum) + if not issubclass(bases[0], Enum): + member_type = bases[0] # first data type + first_enum = bases[-1] # enum type + else: + for base in bases[0].__mro__: + # most common: (IntEnum, int, Enum, object) + # possible: (, , + # , , + # ) + if issubclass(base, Enum): + if first_enum is None: + first_enum = base + else: + if member_type is None: + member_type = base + + return member_type, first_enum + + if pyver < 3.0: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + if __new__: + return None, True, True # __new__, save_new, use_args + + N__new__ = getattr(None, '__new__') + O__new__ = getattr(object, '__new__') + if Enum is None: + E__new__ = N__new__ + else: + E__new__ = Enum.__dict__['__new__'] + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + try: + target = possible.__dict__[method] + except (AttributeError, KeyError): + target = getattr(possible, method, None) + if target not in [ + None, + N__new__, + O__new__, + E__new__, + ]: + if method == '__member_new__': + classdict['__new__'] = target + return None, False, True + if isinstance(target, staticmethod): + target = target.__get__(member_type) + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, False, use_args + else: + @staticmethod + def _find_new_(classdict, member_type, first_enum): + """Returns the __new__ to be used for creating the enum members. + + classdict: the class dictionary given to __new__ + member_type: the data type whose __new__ will be used by default + first_enum: enumeration to check for an overriding __new__ + + """ + # now find the correct __new__, checking to see of one was defined + # by the user; also check earlier enum classes in case a __new__ was + # saved as __member_new__ + __new__ = classdict.get('__new__', None) + + # should __new__ be saved as __member_new__ later? + save_new = __new__ is not None + + if __new__ is None: + # check all possibles for __member_new__ before falling back to + # __new__ + for method in ('__member_new__', '__new__'): + for possible in (member_type, first_enum): + target = getattr(possible, method, None) + if target not in ( + None, + None.__new__, + object.__new__, + Enum.__new__, + ): + __new__ = target + break + if __new__ is not None: + break + else: + __new__ = object.__new__ + + # if a non-object.__new__ is used then whatever value/tuple was + # assigned to the enum member name will be passed to __new__ and to the + # new enum member's __init__ + if __new__ is object.__new__: + use_args = False + else: + use_args = True + + return __new__, save_new, use_args + + +######################################################## +# In order to support Python 2 and 3 with a single +# codebase we have to create the Enum methods separately +# and then use the `type(name, bases, dict)` method to +# create the class. +######################################################## +temp_enum_dict = {} +temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n" + +def __new__(cls, value): + # all enum instances are actually created during class construction + # without calling this method; this method is called by the metaclass' + # __call__ (i.e. Color(3) ), and by pickle + if type(value) is cls: + # For lookups like Color(Color.red) + value = value.value + #return value + # by-value search for a matching enum member + # see if it's in the reverse mapping (for hashable values) + try: + if value in cls._value2member_map_: + return cls._value2member_map_[value] + except TypeError: + # not there, now do long search -- O(n) behavior + for member in cls._member_map_.values(): + if member.value == value: + return member + raise ValueError("%s is not a valid %s" % (value, cls.__name__)) +temp_enum_dict['__new__'] = __new__ +del __new__ + +def __repr__(self): + return "<%s.%s: %r>" % ( + self.__class__.__name__, self._name_, self._value_) +temp_enum_dict['__repr__'] = __repr__ +del __repr__ + +def __str__(self): + return "%s.%s" % (self.__class__.__name__, self._name_) +temp_enum_dict['__str__'] = __str__ +del __str__ + +def __dir__(self): + added_behavior = [ + m + for cls in self.__class__.mro() + for m in cls.__dict__ + if m[0] != '_' + ] + return (['__class__', '__doc__', '__module__', ] + added_behavior) +temp_enum_dict['__dir__'] = __dir__ +del __dir__ + +def __format__(self, format_spec): + # mixed-in Enums should use the mixed-in type's __format__, otherwise + # we can get strange results with the Enum name showing up instead of + # the value + + # pure Enum branch + if self._member_type_ is object: + cls = str + val = str(self) + # mix-in branch + else: + cls = self._member_type_ + val = self.value + return cls.__format__(val, format_spec) +temp_enum_dict['__format__'] = __format__ +del __format__ + + +#################################### +# Python's less than 2.6 use __cmp__ + +if pyver < 2.6: + + def __cmp__(self, other): + if type(other) is self.__class__: + if self is other: + return 0 + return -1 + return NotImplemented + raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__cmp__'] = __cmp__ + del __cmp__ + +else: + + def __le__(self, other): + raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__le__'] = __le__ + del __le__ + + def __lt__(self, other): + raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__lt__'] = __lt__ + del __lt__ + + def __ge__(self, other): + raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__ge__'] = __ge__ + del __ge__ + + def __gt__(self, other): + raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__)) + temp_enum_dict['__gt__'] = __gt__ + del __gt__ + + +def __eq__(self, other): + if type(other) is self.__class__: + return self is other + return NotImplemented +temp_enum_dict['__eq__'] = __eq__ +del __eq__ + +def __ne__(self, other): + if type(other) is self.__class__: + return self is not other + return NotImplemented +temp_enum_dict['__ne__'] = __ne__ +del __ne__ + +def __hash__(self): + return hash(self._name_) +temp_enum_dict['__hash__'] = __hash__ +del __hash__ + +def __reduce_ex__(self, proto): + return self.__class__, (self._value_, ) +temp_enum_dict['__reduce_ex__'] = __reduce_ex__ +del __reduce_ex__ + +# _RouteClassAttributeToGetattr is used to provide access to the `name` +# and `value` properties of enum members while keeping some measure of +# protection from modification, while still allowing for an enumeration +# to have members named `name` and `value`. This works because enumeration +# members are not set directly on the enum class -- __getattr__ is +# used to look them up. + +@_RouteClassAttributeToGetattr +def name(self): + return self._name_ +temp_enum_dict['name'] = name +del name + +@_RouteClassAttributeToGetattr +def value(self): + return self._value_ +temp_enum_dict['value'] = value +del value + +Enum = EnumMeta('Enum', (object, ), temp_enum_dict) +del temp_enum_dict + +# Enum has now been created +########################### + +class IntEnum(int, Enum): + """Enum where members are also (and must be) ints""" + + +def unique(enumeration): + """Class decorator that ensures only unique members exist in an enumeration.""" + duplicates = [] + for name, member in enumeration.__members__.items(): + if name != member.name: + duplicates.append((name, member.name)) + if duplicates: + duplicate_names = ', '.join( + ["%s -> %s" % (alias, name) for (alias, name) in duplicates] + ) + raise ValueError('duplicate names found in %r: %s' % + (enumeration, duplicate_names) + ) + return enumeration diff --git a/lib/enum/test_enum.py b/lib/enum/test_enum.py new file mode 100644 index 00000000..d7a97942 --- /dev/null +++ b/lib/enum/test_enum.py @@ -0,0 +1,1690 @@ +import enum +import sys +import unittest +from enum import Enum, IntEnum, unique, EnumMeta +from pickle import dumps, loads, PicklingError, HIGHEST_PROTOCOL + +pyver = float('%s.%s' % sys.version_info[:2]) + +try: + any +except NameError: + def any(iterable): + for element in iterable: + if element: + return True + return False + +try: + unicode +except NameError: + unicode = str + +try: + from collections import OrderedDict +except ImportError: + OrderedDict = None + +# for pickle tests +try: + class Stooges(Enum): + LARRY = 1 + CURLY = 2 + MOE = 3 +except Exception: + Stooges = sys.exc_info()[1] + +try: + class IntStooges(int, Enum): + LARRY = 1 + CURLY = 2 + MOE = 3 +except Exception: + IntStooges = sys.exc_info()[1] + +try: + class FloatStooges(float, Enum): + LARRY = 1.39 + CURLY = 2.72 + MOE = 3.142596 +except Exception: + FloatStooges = sys.exc_info()[1] + +# for pickle test and subclass tests +try: + class StrEnum(str, Enum): + 'accepts only string values' + class Name(StrEnum): + BDFL = 'Guido van Rossum' + FLUFL = 'Barry Warsaw' +except Exception: + Name = sys.exc_info()[1] + +try: + Question = Enum('Question', 'who what when where why', module=__name__) +except Exception: + Question = sys.exc_info()[1] + +try: + Answer = Enum('Answer', 'him this then there because') +except Exception: + Answer = sys.exc_info()[1] + +try: + Theory = Enum('Theory', 'rule law supposition', qualname='spanish_inquisition') +except Exception: + Theory = sys.exc_info()[1] + +# for doctests +try: + class Fruit(Enum): + tomato = 1 + banana = 2 + cherry = 3 +except Exception: + pass + +def test_pickle_dump_load(assertion, source, target=None, + protocol=(0, HIGHEST_PROTOCOL)): + start, stop = protocol + failures = [] + for protocol in range(start, stop+1): + try: + if target is None: + assertion(loads(dumps(source, protocol=protocol)) is source) + else: + assertion(loads(dumps(source, protocol=protocol)), target) + except Exception: + exc, tb = sys.exc_info()[1:] + failures.append('%2d: %s' %(protocol, exc)) + if failures: + raise ValueError('Failed with protocols: %s' % ', '.join(failures)) + +def test_pickle_exception(assertion, exception, obj, + protocol=(0, HIGHEST_PROTOCOL)): + start, stop = protocol + failures = [] + for protocol in range(start, stop+1): + try: + assertion(exception, dumps, obj, protocol=protocol) + except Exception: + exc = sys.exc_info()[1] + failures.append('%d: %s %s' % (protocol, exc.__class__.__name__, exc)) + if failures: + raise ValueError('Failed with protocols: %s' % ', '.join(failures)) + + +class TestHelpers(unittest.TestCase): + # _is_descriptor, _is_sunder, _is_dunder + + def test_is_descriptor(self): + class foo: + pass + for attr in ('__get__','__set__','__delete__'): + obj = foo() + self.assertFalse(enum._is_descriptor(obj)) + setattr(obj, attr, 1) + self.assertTrue(enum._is_descriptor(obj)) + + def test_is_sunder(self): + for s in ('_a_', '_aa_'): + self.assertTrue(enum._is_sunder(s)) + + for s in ('a', 'a_', '_a', '__a', 'a__', '__a__', '_a__', '__a_', '_', + '__', '___', '____', '_____',): + self.assertFalse(enum._is_sunder(s)) + + def test_is_dunder(self): + for s in ('__a__', '__aa__'): + self.assertTrue(enum._is_dunder(s)) + for s in ('a', 'a_', '_a', '__a', 'a__', '_a_', '_a__', '__a_', '_', + '__', '___', '____', '_____',): + self.assertFalse(enum._is_dunder(s)) + + +class TestEnum(unittest.TestCase): + def setUp(self): + class Season(Enum): + SPRING = 1 + SUMMER = 2 + AUTUMN = 3 + WINTER = 4 + self.Season = Season + + class Konstants(float, Enum): + E = 2.7182818 + PI = 3.1415926 + TAU = 2 * PI + self.Konstants = Konstants + + class Grades(IntEnum): + A = 5 + B = 4 + C = 3 + D = 2 + F = 0 + self.Grades = Grades + + class Directional(str, Enum): + EAST = 'east' + WEST = 'west' + NORTH = 'north' + SOUTH = 'south' + self.Directional = Directional + + from datetime import date + class Holiday(date, Enum): + NEW_YEAR = 2013, 1, 1 + IDES_OF_MARCH = 2013, 3, 15 + self.Holiday = Holiday + + if pyver >= 2.6: # cannot specify custom `dir` on previous versions + def test_dir_on_class(self): + Season = self.Season + self.assertEqual( + set(dir(Season)), + set(['__class__', '__doc__', '__members__', '__module__', + 'SPRING', 'SUMMER', 'AUTUMN', 'WINTER']), + ) + + def test_dir_on_item(self): + Season = self.Season + self.assertEqual( + set(dir(Season.WINTER)), + set(['__class__', '__doc__', '__module__', 'name', 'value']), + ) + + def test_dir_on_sub_with_behavior_on_super(self): + # see issue22506 + class SuperEnum(Enum): + def invisible(self): + return "did you see me?" + class SubEnum(SuperEnum): + sample = 5 + self.assertEqual( + set(dir(SubEnum.sample)), + set(['__class__', '__doc__', '__module__', 'name', 'value', 'invisible']), + ) + + if pyver >= 2.7: # OrderedDict first available here + def test_members_is_ordereddict_if_ordered(self): + class Ordered(Enum): + __order__ = 'first second third' + first = 'bippity' + second = 'boppity' + third = 'boo' + self.assertTrue(type(Ordered.__members__) is OrderedDict) + + def test_members_is_ordereddict_if_not_ordered(self): + class Unordered(Enum): + this = 'that' + these = 'those' + self.assertTrue(type(Unordered.__members__) is OrderedDict) + + if pyver >= 3.0: # all objects are ordered in Python 2.x + def test_members_is_always_ordered(self): + class AlwaysOrdered(Enum): + first = 1 + second = 2 + third = 3 + self.assertTrue(type(AlwaysOrdered.__members__) is OrderedDict) + + def test_comparisons(self): + def bad_compare(): + Season.SPRING > 4 + Season = self.Season + self.assertNotEqual(Season.SPRING, 1) + self.assertRaises(TypeError, bad_compare) + + class Part(Enum): + SPRING = 1 + CLIP = 2 + BARREL = 3 + + self.assertNotEqual(Season.SPRING, Part.SPRING) + def bad_compare(): + Season.SPRING < Part.CLIP + self.assertRaises(TypeError, bad_compare) + + def test_enum_in_enum_out(self): + Season = self.Season + self.assertTrue(Season(Season.WINTER) is Season.WINTER) + + def test_enum_value(self): + Season = self.Season + self.assertEqual(Season.SPRING.value, 1) + + def test_intenum_value(self): + self.assertEqual(IntStooges.CURLY.value, 2) + + def test_enum(self): + Season = self.Season + lst = list(Season) + self.assertEqual(len(lst), len(Season)) + self.assertEqual(len(Season), 4, Season) + self.assertEqual( + [Season.SPRING, Season.SUMMER, Season.AUTUMN, Season.WINTER], lst) + + for i, season in enumerate('SPRING SUMMER AUTUMN WINTER'.split()): + i += 1 + e = Season(i) + self.assertEqual(e, getattr(Season, season)) + self.assertEqual(e.value, i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, season) + self.assertTrue(e in Season) + self.assertTrue(type(e) is Season) + self.assertTrue(isinstance(e, Season)) + self.assertEqual(str(e), 'Season.' + season) + self.assertEqual( + repr(e), + '' % (season, i), + ) + + def test_value_name(self): + Season = self.Season + self.assertEqual(Season.SPRING.name, 'SPRING') + self.assertEqual(Season.SPRING.value, 1) + def set_name(obj, new_value): + obj.name = new_value + def set_value(obj, new_value): + obj.value = new_value + self.assertRaises(AttributeError, set_name, Season.SPRING, 'invierno', ) + self.assertRaises(AttributeError, set_value, Season.SPRING, 2) + + def test_attribute_deletion(self): + class Season(Enum): + SPRING = 1 + SUMMER = 2 + AUTUMN = 3 + WINTER = 4 + + def spam(cls): + pass + + self.assertTrue(hasattr(Season, 'spam')) + del Season.spam + self.assertFalse(hasattr(Season, 'spam')) + + self.assertRaises(AttributeError, delattr, Season, 'SPRING') + self.assertRaises(AttributeError, delattr, Season, 'DRY') + self.assertRaises(AttributeError, delattr, Season.SPRING, 'name') + + def test_invalid_names(self): + def create_bad_class_1(): + class Wrong(Enum): + mro = 9 + def create_bad_class_2(): + class Wrong(Enum): + _reserved_ = 3 + self.assertRaises(ValueError, create_bad_class_1) + self.assertRaises(ValueError, create_bad_class_2) + + def test_contains(self): + Season = self.Season + self.assertTrue(Season.AUTUMN in Season) + self.assertTrue(3 not in Season) + + val = Season(3) + self.assertTrue(val in Season) + + class OtherEnum(Enum): + one = 1; two = 2 + self.assertTrue(OtherEnum.two not in Season) + + if pyver >= 2.6: # when `format` came into being + + def test_format_enum(self): + Season = self.Season + self.assertEqual('{0}'.format(Season.SPRING), + '{0}'.format(str(Season.SPRING))) + self.assertEqual( '{0:}'.format(Season.SPRING), + '{0:}'.format(str(Season.SPRING))) + self.assertEqual('{0:20}'.format(Season.SPRING), + '{0:20}'.format(str(Season.SPRING))) + self.assertEqual('{0:^20}'.format(Season.SPRING), + '{0:^20}'.format(str(Season.SPRING))) + self.assertEqual('{0:>20}'.format(Season.SPRING), + '{0:>20}'.format(str(Season.SPRING))) + self.assertEqual('{0:<20}'.format(Season.SPRING), + '{0:<20}'.format(str(Season.SPRING))) + + def test_format_enum_custom(self): + class TestFloat(float, Enum): + one = 1.0 + two = 2.0 + def __format__(self, spec): + return 'TestFloat success!' + self.assertEqual('{0}'.format(TestFloat.one), 'TestFloat success!') + + def assertFormatIsValue(self, spec, member): + self.assertEqual(spec.format(member), spec.format(member.value)) + + def test_format_enum_date(self): + Holiday = self.Holiday + self.assertFormatIsValue('{0}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:20}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:^20}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:>20}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:<20}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:%Y %m}', Holiday.IDES_OF_MARCH) + self.assertFormatIsValue('{0:%Y %m %M:00}', Holiday.IDES_OF_MARCH) + + def test_format_enum_float(self): + Konstants = self.Konstants + self.assertFormatIsValue('{0}', Konstants.TAU) + self.assertFormatIsValue('{0:}', Konstants.TAU) + self.assertFormatIsValue('{0:20}', Konstants.TAU) + self.assertFormatIsValue('{0:^20}', Konstants.TAU) + self.assertFormatIsValue('{0:>20}', Konstants.TAU) + self.assertFormatIsValue('{0:<20}', Konstants.TAU) + self.assertFormatIsValue('{0:n}', Konstants.TAU) + self.assertFormatIsValue('{0:5.2}', Konstants.TAU) + self.assertFormatIsValue('{0:f}', Konstants.TAU) + + def test_format_enum_int(self): + Grades = self.Grades + self.assertFormatIsValue('{0}', Grades.C) + self.assertFormatIsValue('{0:}', Grades.C) + self.assertFormatIsValue('{0:20}', Grades.C) + self.assertFormatIsValue('{0:^20}', Grades.C) + self.assertFormatIsValue('{0:>20}', Grades.C) + self.assertFormatIsValue('{0:<20}', Grades.C) + self.assertFormatIsValue('{0:+}', Grades.C) + self.assertFormatIsValue('{0:08X}', Grades.C) + self.assertFormatIsValue('{0:b}', Grades.C) + + def test_format_enum_str(self): + Directional = self.Directional + self.assertFormatIsValue('{0}', Directional.WEST) + self.assertFormatIsValue('{0:}', Directional.WEST) + self.assertFormatIsValue('{0:20}', Directional.WEST) + self.assertFormatIsValue('{0:^20}', Directional.WEST) + self.assertFormatIsValue('{0:>20}', Directional.WEST) + self.assertFormatIsValue('{0:<20}', Directional.WEST) + + def test_hash(self): + Season = self.Season + dates = {} + dates[Season.WINTER] = '1225' + dates[Season.SPRING] = '0315' + dates[Season.SUMMER] = '0704' + dates[Season.AUTUMN] = '1031' + self.assertEqual(dates[Season.AUTUMN], '1031') + + def test_enum_duplicates(self): + __order__ = "SPRING SUMMER AUTUMN WINTER" + class Season(Enum): + SPRING = 1 + SUMMER = 2 + AUTUMN = FALL = 3 + WINTER = 4 + ANOTHER_SPRING = 1 + lst = list(Season) + self.assertEqual( + lst, + [Season.SPRING, Season.SUMMER, + Season.AUTUMN, Season.WINTER, + ]) + self.assertTrue(Season.FALL is Season.AUTUMN) + self.assertEqual(Season.FALL.value, 3) + self.assertEqual(Season.AUTUMN.value, 3) + self.assertTrue(Season(3) is Season.AUTUMN) + self.assertTrue(Season(1) is Season.SPRING) + self.assertEqual(Season.FALL.name, 'AUTUMN') + self.assertEqual( + set([k for k,v in Season.__members__.items() if v.name != k]), + set(['FALL', 'ANOTHER_SPRING']), + ) + + if pyver >= 3.0: + cls = vars() + result = {'Enum':Enum} + exec("""def test_duplicate_name(self): + with self.assertRaises(TypeError): + class Color(Enum): + red = 1 + green = 2 + blue = 3 + red = 4 + + with self.assertRaises(TypeError): + class Color(Enum): + red = 1 + green = 2 + blue = 3 + def red(self): + return 'red' + + with self.assertRaises(TypeError): + class Color(Enum): + @property + + def red(self): + return 'redder' + red = 1 + green = 2 + blue = 3""", + result) + cls['test_duplicate_name'] = result['test_duplicate_name'] + + def test_enum_with_value_name(self): + class Huh(Enum): + name = 1 + value = 2 + self.assertEqual( + list(Huh), + [Huh.name, Huh.value], + ) + self.assertTrue(type(Huh.name) is Huh) + self.assertEqual(Huh.name.name, 'name') + self.assertEqual(Huh.name.value, 1) + + def test_intenum_from_scratch(self): + class phy(int, Enum): + pi = 3 + tau = 2 * pi + self.assertTrue(phy.pi < phy.tau) + + def test_intenum_inherited(self): + class IntEnum(int, Enum): + pass + class phy(IntEnum): + pi = 3 + tau = 2 * pi + self.assertTrue(phy.pi < phy.tau) + + def test_floatenum_from_scratch(self): + class phy(float, Enum): + pi = 3.1415926 + tau = 2 * pi + self.assertTrue(phy.pi < phy.tau) + + def test_floatenum_inherited(self): + class FloatEnum(float, Enum): + pass + class phy(FloatEnum): + pi = 3.1415926 + tau = 2 * pi + self.assertTrue(phy.pi < phy.tau) + + def test_strenum_from_scratch(self): + class phy(str, Enum): + pi = 'Pi' + tau = 'Tau' + self.assertTrue(phy.pi < phy.tau) + + def test_strenum_inherited(self): + class StrEnum(str, Enum): + pass + class phy(StrEnum): + pi = 'Pi' + tau = 'Tau' + self.assertTrue(phy.pi < phy.tau) + + def test_intenum(self): + class WeekDay(IntEnum): + SUNDAY = 1 + MONDAY = 2 + TUESDAY = 3 + WEDNESDAY = 4 + THURSDAY = 5 + FRIDAY = 6 + SATURDAY = 7 + + self.assertEqual(['a', 'b', 'c'][WeekDay.MONDAY], 'c') + self.assertEqual([i for i in range(WeekDay.TUESDAY)], [0, 1, 2]) + + lst = list(WeekDay) + self.assertEqual(len(lst), len(WeekDay)) + self.assertEqual(len(WeekDay), 7) + target = 'SUNDAY MONDAY TUESDAY WEDNESDAY THURSDAY FRIDAY SATURDAY' + target = target.split() + for i, weekday in enumerate(target): + i += 1 + e = WeekDay(i) + self.assertEqual(e, i) + self.assertEqual(int(e), i) + self.assertEqual(e.name, weekday) + self.assertTrue(e in WeekDay) + self.assertEqual(lst.index(e)+1, i) + self.assertTrue(0 < e < 8) + self.assertTrue(type(e) is WeekDay) + self.assertTrue(isinstance(e, int)) + self.assertTrue(isinstance(e, Enum)) + + def test_intenum_duplicates(self): + class WeekDay(IntEnum): + __order__ = 'SUNDAY MONDAY TUESDAY WEDNESDAY THURSDAY FRIDAY SATURDAY' + SUNDAY = 1 + MONDAY = 2 + TUESDAY = TEUSDAY = 3 + WEDNESDAY = 4 + THURSDAY = 5 + FRIDAY = 6 + SATURDAY = 7 + self.assertTrue(WeekDay.TEUSDAY is WeekDay.TUESDAY) + self.assertEqual(WeekDay(3).name, 'TUESDAY') + self.assertEqual([k for k,v in WeekDay.__members__.items() + if v.name != k], ['TEUSDAY', ]) + + def test_pickle_enum(self): + if isinstance(Stooges, Exception): + raise Stooges + test_pickle_dump_load(self.assertTrue, Stooges.CURLY) + test_pickle_dump_load(self.assertTrue, Stooges) + + def test_pickle_int(self): + if isinstance(IntStooges, Exception): + raise IntStooges + test_pickle_dump_load(self.assertTrue, IntStooges.CURLY) + test_pickle_dump_load(self.assertTrue, IntStooges) + + def test_pickle_float(self): + if isinstance(FloatStooges, Exception): + raise FloatStooges + test_pickle_dump_load(self.assertTrue, FloatStooges.CURLY) + test_pickle_dump_load(self.assertTrue, FloatStooges) + + def test_pickle_enum_function(self): + if isinstance(Answer, Exception): + raise Answer + test_pickle_dump_load(self.assertTrue, Answer.him) + test_pickle_dump_load(self.assertTrue, Answer) + + def test_pickle_enum_function_with_module(self): + if isinstance(Question, Exception): + raise Question + test_pickle_dump_load(self.assertTrue, Question.who) + test_pickle_dump_load(self.assertTrue, Question) + + if pyver >= 3.4: + def test_class_nested_enum_and_pickle_protocol_four(self): + # would normally just have this directly in the class namespace + class NestedEnum(Enum): + twigs = 'common' + shiny = 'rare' + + self.__class__.NestedEnum = NestedEnum + self.NestedEnum.__qualname__ = '%s.NestedEnum' % self.__class__.__name__ + test_pickle_exception( + self.assertRaises, PicklingError, self.NestedEnum.twigs, + protocol=(0, 3)) + test_pickle_dump_load(self.assertTrue, self.NestedEnum.twigs, + protocol=(4, HIGHEST_PROTOCOL)) + + def test_exploding_pickle(self): + BadPickle = Enum('BadPickle', 'dill sweet bread-n-butter') + enum._make_class_unpicklable(BadPickle) + globals()['BadPickle'] = BadPickle + test_pickle_exception(self.assertRaises, TypeError, BadPickle.dill) + test_pickle_exception(self.assertRaises, PicklingError, BadPickle) + + def test_string_enum(self): + class SkillLevel(str, Enum): + master = 'what is the sound of one hand clapping?' + journeyman = 'why did the chicken cross the road?' + apprentice = 'knock, knock!' + self.assertEqual(SkillLevel.apprentice, 'knock, knock!') + + def test_getattr_getitem(self): + class Period(Enum): + morning = 1 + noon = 2 + evening = 3 + night = 4 + self.assertTrue(Period(2) is Period.noon) + self.assertTrue(getattr(Period, 'night') is Period.night) + self.assertTrue(Period['morning'] is Period.morning) + + def test_getattr_dunder(self): + Season = self.Season + self.assertTrue(getattr(Season, '__hash__')) + + def test_iteration_order(self): + class Season(Enum): + __order__ = 'SUMMER WINTER AUTUMN SPRING' + SUMMER = 2 + WINTER = 4 + AUTUMN = 3 + SPRING = 1 + self.assertEqual( + list(Season), + [Season.SUMMER, Season.WINTER, Season.AUTUMN, Season.SPRING], + ) + + def test_iteration_order_with_unorderable_values(self): + class Complex(Enum): + a = complex(7, 9) + b = complex(3.14, 2) + c = complex(1, -1) + d = complex(-77, 32) + self.assertEqual( + list(Complex), + [Complex.a, Complex.b, Complex.c, Complex.d], + ) + + def test_programatic_function_string(self): + SummerMonth = Enum('SummerMonth', 'june july august') + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_string_list(self): + SummerMonth = Enum('SummerMonth', ['june', 'july', 'august']) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_iterable(self): + SummerMonth = Enum( + 'SummerMonth', + (('june', 1), ('july', 2), ('august', 3)) + ) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_from_dict(self): + SummerMonth = Enum( + 'SummerMonth', + dict((('june', 1), ('july', 2), ('august', 3))) + ) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + if pyver < 3.0: + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_type(self): + SummerMonth = Enum('SummerMonth', 'june july august', type=int) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_type_from_subclass(self): + SummerMonth = IntEnum('SummerMonth', 'june july august') + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate('june july august'.split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_unicode(self): + SummerMonth = Enum('SummerMonth', unicode('june july august')) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_unicode_list(self): + SummerMonth = Enum('SummerMonth', [unicode('june'), unicode('july'), unicode('august')]) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_unicode_iterable(self): + SummerMonth = Enum( + 'SummerMonth', + ((unicode('june'), 1), (unicode('july'), 2), (unicode('august'), 3)) + ) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_from_unicode_dict(self): + SummerMonth = Enum( + 'SummerMonth', + dict(((unicode('june'), 1), (unicode('july'), 2), (unicode('august'), 3))) + ) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + if pyver < 3.0: + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(int(e.value), i) + self.assertNotEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_unicode_type(self): + SummerMonth = Enum('SummerMonth', unicode('june july august'), type=int) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programatic_function_unicode_type_from_subclass(self): + SummerMonth = IntEnum('SummerMonth', unicode('june july august')) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(e, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_programmatic_function_unicode_class(self): + if pyver < 3.0: + class_names = unicode('SummerMonth'), 'S\xfcmm\xe9rM\xf6nth'.decode('latin1') + else: + class_names = 'SummerMonth', 'S\xfcmm\xe9rM\xf6nth' + for i, class_name in enumerate(class_names): + if pyver < 3.0 and i == 1: + self.assertRaises(TypeError, Enum, class_name, unicode('june july august')) + else: + SummerMonth = Enum(class_name, unicode('june july august')) + lst = list(SummerMonth) + self.assertEqual(len(lst), len(SummerMonth)) + self.assertEqual(len(SummerMonth), 3, SummerMonth) + self.assertEqual( + [SummerMonth.june, SummerMonth.july, SummerMonth.august], + lst, + ) + for i, month in enumerate(unicode('june july august').split()): + i += 1 + e = SummerMonth(i) + self.assertEqual(e.value, i) + self.assertEqual(e.name, month) + self.assertTrue(e in SummerMonth) + self.assertTrue(type(e) is SummerMonth) + + def test_subclassing(self): + if isinstance(Name, Exception): + raise Name + self.assertEqual(Name.BDFL, 'Guido van Rossum') + self.assertTrue(Name.BDFL, Name('Guido van Rossum')) + self.assertTrue(Name.BDFL is getattr(Name, 'BDFL')) + test_pickle_dump_load(self.assertTrue, Name.BDFL) + + def test_extending(self): + def bad_extension(): + class Color(Enum): + red = 1 + green = 2 + blue = 3 + class MoreColor(Color): + cyan = 4 + magenta = 5 + yellow = 6 + self.assertRaises(TypeError, bad_extension) + + def test_exclude_methods(self): + class whatever(Enum): + this = 'that' + these = 'those' + def really(self): + return 'no, not %s' % self.value + self.assertFalse(type(whatever.really) is whatever) + self.assertEqual(whatever.this.really(), 'no, not that') + + def test_wrong_inheritance_order(self): + def wrong_inherit(): + class Wrong(Enum, str): + NotHere = 'error before this point' + self.assertRaises(TypeError, wrong_inherit) + + def test_intenum_transitivity(self): + class number(IntEnum): + one = 1 + two = 2 + three = 3 + class numero(IntEnum): + uno = 1 + dos = 2 + tres = 3 + self.assertEqual(number.one, numero.uno) + self.assertEqual(number.two, numero.dos) + self.assertEqual(number.three, numero.tres) + + def test_introspection(self): + class Number(IntEnum): + one = 100 + two = 200 + self.assertTrue(Number.one._member_type_ is int) + self.assertTrue(Number._member_type_ is int) + class String(str, Enum): + yarn = 'soft' + rope = 'rough' + wire = 'hard' + self.assertTrue(String.yarn._member_type_ is str) + self.assertTrue(String._member_type_ is str) + class Plain(Enum): + vanilla = 'white' + one = 1 + self.assertTrue(Plain.vanilla._member_type_ is object) + self.assertTrue(Plain._member_type_ is object) + + def test_wrong_enum_in_call(self): + class Monochrome(Enum): + black = 0 + white = 1 + class Gender(Enum): + male = 0 + female = 1 + self.assertRaises(ValueError, Monochrome, Gender.male) + + def test_wrong_enum_in_mixed_call(self): + class Monochrome(IntEnum): + black = 0 + white = 1 + class Gender(Enum): + male = 0 + female = 1 + self.assertRaises(ValueError, Monochrome, Gender.male) + + def test_mixed_enum_in_call_1(self): + class Monochrome(IntEnum): + black = 0 + white = 1 + class Gender(IntEnum): + male = 0 + female = 1 + self.assertTrue(Monochrome(Gender.female) is Monochrome.white) + + def test_mixed_enum_in_call_2(self): + class Monochrome(Enum): + black = 0 + white = 1 + class Gender(IntEnum): + male = 0 + female = 1 + self.assertTrue(Monochrome(Gender.male) is Monochrome.black) + + def test_flufl_enum(self): + class Fluflnum(Enum): + def __int__(self): + return int(self.value) + class MailManOptions(Fluflnum): + option1 = 1 + option2 = 2 + option3 = 3 + self.assertEqual(int(MailManOptions.option1), 1) + + def test_no_such_enum_member(self): + class Color(Enum): + red = 1 + green = 2 + blue = 3 + self.assertRaises(ValueError, Color, 4) + self.assertRaises(KeyError, Color.__getitem__, 'chartreuse') + + def test_new_repr(self): + class Color(Enum): + red = 1 + green = 2 + blue = 3 + def __repr__(self): + return "don't you just love shades of %s?" % self.name + self.assertEqual( + repr(Color.blue), + "don't you just love shades of blue?", + ) + + def test_inherited_repr(self): + class MyEnum(Enum): + def __repr__(self): + return "My name is %s." % self.name + class MyIntEnum(int, MyEnum): + this = 1 + that = 2 + theother = 3 + self.assertEqual(repr(MyIntEnum.that), "My name is that.") + + def test_multiple_mixin_mro(self): + class auto_enum(EnumMeta): + def __new__(metacls, cls, bases, classdict): + original_dict = classdict + classdict = enum._EnumDict() + for k, v in original_dict.items(): + classdict[k] = v + temp = type(classdict)() + names = set(classdict._member_names) + i = 0 + for k in classdict._member_names: + v = classdict[k] + if v == (): + v = i + else: + i = v + i += 1 + temp[k] = v + for k, v in classdict.items(): + if k not in names: + temp[k] = v + return super(auto_enum, metacls).__new__( + metacls, cls, bases, temp) + + AutoNumberedEnum = auto_enum('AutoNumberedEnum', (Enum,), {}) + + AutoIntEnum = auto_enum('AutoIntEnum', (IntEnum,), {}) + + class TestAutoNumber(AutoNumberedEnum): + a = () + b = 3 + c = () + + class TestAutoInt(AutoIntEnum): + a = () + b = 3 + c = () + + def test_subclasses_with_getnewargs(self): + class NamedInt(int): + __qualname__ = 'NamedInt' # needed for pickle protocol 4 + def __new__(cls, *args): + _args = args + if len(args) < 1: + raise TypeError("name and value must be specified") + name, args = args[0], args[1:] + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + def __getnewargs__(self): + return self._args + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "%s(%r, %s)" % (type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '(%s + %s)' % (self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' # needed for pickle protocol 4 + x = ('the-x', 1) + y = ('the-y', 2) + + self.assertTrue(NEI.__new__ is Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + test_pickle_dump_load(self.assertTrue, NI5, 5) + self.assertEqual(NEI.y.value, 2) + test_pickle_dump_load(self.assertTrue, NEI.y) + + if pyver >= 3.4: + def test_subclasses_with_getnewargs_ex(self): + class NamedInt(int): + __qualname__ = 'NamedInt' # needed for pickle protocol 4 + def __new__(cls, *args): + _args = args + if len(args) < 2: + raise TypeError("name and value must be specified") + name, args = args[0], args[1:] + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + def __getnewargs_ex__(self): + return self._args, {} + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "{}({!r}, {})".format(type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '({0} + {1})'.format(self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' # needed for pickle protocol 4 + x = ('the-x', 1) + y = ('the-y', 2) + + + self.assertIs(NEI.__new__, Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + test_pickle_dump_load(self.assertEqual, NI5, 5, protocol=(4, HIGHEST_PROTOCOL)) + self.assertEqual(NEI.y.value, 2) + test_pickle_dump_load(self.assertTrue, NEI.y, protocol=(4, HIGHEST_PROTOCOL)) + + def test_subclasses_with_reduce(self): + class NamedInt(int): + __qualname__ = 'NamedInt' # needed for pickle protocol 4 + def __new__(cls, *args): + _args = args + if len(args) < 1: + raise TypeError("name and value must be specified") + name, args = args[0], args[1:] + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + def __reduce__(self): + return self.__class__, self._args + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "%s(%r, %s)" % (type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '(%s + %s)' % (self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' # needed for pickle protocol 4 + x = ('the-x', 1) + y = ('the-y', 2) + + + self.assertTrue(NEI.__new__ is Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + test_pickle_dump_load(self.assertEqual, NI5, 5) + self.assertEqual(NEI.y.value, 2) + test_pickle_dump_load(self.assertTrue, NEI.y) + + def test_subclasses_with_reduce_ex(self): + class NamedInt(int): + __qualname__ = 'NamedInt' # needed for pickle protocol 4 + def __new__(cls, *args): + _args = args + if len(args) < 1: + raise TypeError("name and value must be specified") + name, args = args[0], args[1:] + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + def __reduce_ex__(self, proto): + return self.__class__, self._args + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "%s(%r, %s)" % (type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '(%s + %s)' % (self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' # needed for pickle protocol 4 + x = ('the-x', 1) + y = ('the-y', 2) + + + self.assertTrue(NEI.__new__ is Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + test_pickle_dump_load(self.assertEqual, NI5, 5) + self.assertEqual(NEI.y.value, 2) + test_pickle_dump_load(self.assertTrue, NEI.y) + + def test_subclasses_without_direct_pickle_support(self): + class NamedInt(int): + __qualname__ = 'NamedInt' + def __new__(cls, *args): + _args = args + name, args = args[0], args[1:] + if len(args) == 0: + raise TypeError("name and value must be specified") + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "%s(%r, %s)" % (type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '(%s + %s)' % (self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' + x = ('the-x', 1) + y = ('the-y', 2) + + self.assertTrue(NEI.__new__ is Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + self.assertEqual(NEI.y.value, 2) + test_pickle_exception(self.assertRaises, TypeError, NEI.x) + test_pickle_exception(self.assertRaises, PicklingError, NEI) + + def test_subclasses_without_direct_pickle_support_using_name(self): + class NamedInt(int): + __qualname__ = 'NamedInt' + def __new__(cls, *args): + _args = args + name, args = args[0], args[1:] + if len(args) == 0: + raise TypeError("name and value must be specified") + self = int.__new__(cls, *args) + self._intname = name + self._args = _args + return self + @property + def __name__(self): + return self._intname + def __repr__(self): + # repr() is updated to include the name and type info + return "%s(%r, %s)" % (type(self).__name__, + self.__name__, + int.__repr__(self)) + def __str__(self): + # str() is unchanged, even if it relies on the repr() fallback + base = int + base_str = base.__str__ + if base_str.__objclass__ is object: + return base.__repr__(self) + return base_str(self) + # for simplicity, we only define one operator that + # propagates expressions + def __add__(self, other): + temp = int(self) + int( other) + if isinstance(self, NamedInt) and isinstance(other, NamedInt): + return NamedInt( + '(%s + %s)' % (self.__name__, other.__name__), + temp ) + else: + return temp + + class NEI(NamedInt, Enum): + __qualname__ = 'NEI' + x = ('the-x', 1) + y = ('the-y', 2) + def __reduce_ex__(self, proto): + return getattr, (self.__class__, self._name_) + + self.assertTrue(NEI.__new__ is Enum.__new__) + self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") + globals()['NamedInt'] = NamedInt + globals()['NEI'] = NEI + NI5 = NamedInt('test', 5) + self.assertEqual(NI5, 5) + self.assertEqual(NEI.y.value, 2) + test_pickle_dump_load(self.assertTrue, NEI.y) + test_pickle_dump_load(self.assertTrue, NEI) + + def test_tuple_subclass(self): + class SomeTuple(tuple, Enum): + __qualname__ = 'SomeTuple' + first = (1, 'for the money') + second = (2, 'for the show') + third = (3, 'for the music') + self.assertTrue(type(SomeTuple.first) is SomeTuple) + self.assertTrue(isinstance(SomeTuple.second, tuple)) + self.assertEqual(SomeTuple.third, (3, 'for the music')) + globals()['SomeTuple'] = SomeTuple + test_pickle_dump_load(self.assertTrue, SomeTuple.first) + + def test_duplicate_values_give_unique_enum_items(self): + class AutoNumber(Enum): + __order__ = 'enum_m enum_d enum_y' + enum_m = () + enum_d = () + enum_y = () + def __new__(cls): + value = len(cls.__members__) + 1 + obj = object.__new__(cls) + obj._value_ = value + return obj + def __int__(self): + return int(self._value_) + self.assertEqual(int(AutoNumber.enum_d), 2) + self.assertEqual(AutoNumber.enum_y.value, 3) + self.assertTrue(AutoNumber(1) is AutoNumber.enum_m) + self.assertEqual( + list(AutoNumber), + [AutoNumber.enum_m, AutoNumber.enum_d, AutoNumber.enum_y], + ) + + def test_inherited_new_from_enhanced_enum(self): + class AutoNumber2(Enum): + def __new__(cls): + value = len(cls.__members__) + 1 + obj = object.__new__(cls) + obj._value_ = value + return obj + def __int__(self): + return int(self._value_) + class Color(AutoNumber2): + __order__ = 'red green blue' + red = () + green = () + blue = () + self.assertEqual(len(Color), 3, "wrong number of elements: %d (should be %d)" % (len(Color), 3)) + self.assertEqual(list(Color), [Color.red, Color.green, Color.blue]) + if pyver >= 3.0: + self.assertEqual(list(map(int, Color)), [1, 2, 3]) + + def test_inherited_new_from_mixed_enum(self): + class AutoNumber3(IntEnum): + def __new__(cls): + value = len(cls.__members__) + 1 + obj = int.__new__(cls, value) + obj._value_ = value + return obj + class Color(AutoNumber3): + red = () + green = () + blue = () + self.assertEqual(len(Color), 3, "wrong number of elements: %d (should be %d)" % (len(Color), 3)) + Color.red + Color.green + Color.blue + + def test_ordered_mixin(self): + class OrderedEnum(Enum): + def __ge__(self, other): + if self.__class__ is other.__class__: + return self._value_ >= other._value_ + return NotImplemented + def __gt__(self, other): + if self.__class__ is other.__class__: + return self._value_ > other._value_ + return NotImplemented + def __le__(self, other): + if self.__class__ is other.__class__: + return self._value_ <= other._value_ + return NotImplemented + def __lt__(self, other): + if self.__class__ is other.__class__: + return self._value_ < other._value_ + return NotImplemented + class Grade(OrderedEnum): + __order__ = 'A B C D F' + A = 5 + B = 4 + C = 3 + D = 2 + F = 1 + self.assertEqual(list(Grade), [Grade.A, Grade.B, Grade.C, Grade.D, Grade.F]) + self.assertTrue(Grade.A > Grade.B) + self.assertTrue(Grade.F <= Grade.C) + self.assertTrue(Grade.D < Grade.A) + self.assertTrue(Grade.B >= Grade.B) + + def test_extending2(self): + def bad_extension(): + class Shade(Enum): + def shade(self): + print(self.name) + class Color(Shade): + red = 1 + green = 2 + blue = 3 + class MoreColor(Color): + cyan = 4 + magenta = 5 + yellow = 6 + self.assertRaises(TypeError, bad_extension) + + def test_extending3(self): + class Shade(Enum): + def shade(self): + return self.name + class Color(Shade): + def hex(self): + return '%s hexlified!' % self.value + class MoreColor(Color): + cyan = 4 + magenta = 5 + yellow = 6 + self.assertEqual(MoreColor.magenta.hex(), '5 hexlified!') + + def test_no_duplicates(self): + def bad_duplicates(): + class UniqueEnum(Enum): + def __init__(self, *args): + cls = self.__class__ + if any(self.value == e.value for e in cls): + a = self.name + e = cls(self.value).name + raise ValueError( + "aliases not allowed in UniqueEnum: %r --> %r" + % (a, e) + ) + class Color(UniqueEnum): + red = 1 + green = 2 + blue = 3 + class Color(UniqueEnum): + red = 1 + green = 2 + blue = 3 + grene = 2 + self.assertRaises(ValueError, bad_duplicates) + + def test_reversed(self): + self.assertEqual( + list(reversed(self.Season)), + [self.Season.WINTER, self.Season.AUTUMN, self.Season.SUMMER, + self.Season.SPRING] + ) + + def test_init(self): + class Planet(Enum): + MERCURY = (3.303e+23, 2.4397e6) + VENUS = (4.869e+24, 6.0518e6) + EARTH = (5.976e+24, 6.37814e6) + MARS = (6.421e+23, 3.3972e6) + JUPITER = (1.9e+27, 7.1492e7) + SATURN = (5.688e+26, 6.0268e7) + URANUS = (8.686e+25, 2.5559e7) + NEPTUNE = (1.024e+26, 2.4746e7) + def __init__(self, mass, radius): + self.mass = mass # in kilograms + self.radius = radius # in meters + @property + def surface_gravity(self): + # universal gravitational constant (m3 kg-1 s-2) + G = 6.67300E-11 + return G * self.mass / (self.radius * self.radius) + self.assertEqual(round(Planet.EARTH.surface_gravity, 2), 9.80) + self.assertEqual(Planet.EARTH.value, (5.976e+24, 6.37814e6)) + + def test_nonhash_value(self): + class AutoNumberInAList(Enum): + def __new__(cls): + value = [len(cls.__members__) + 1] + obj = object.__new__(cls) + obj._value_ = value + return obj + class ColorInAList(AutoNumberInAList): + __order__ = 'red green blue' + red = () + green = () + blue = () + self.assertEqual(list(ColorInAList), [ColorInAList.red, ColorInAList.green, ColorInAList.blue]) + self.assertEqual(ColorInAList.red.value, [1]) + self.assertEqual(ColorInAList([1]), ColorInAList.red) + + def test_conflicting_types_resolved_in_new(self): + class LabelledIntEnum(int, Enum): + def __new__(cls, *args): + value, label = args + obj = int.__new__(cls, value) + obj.label = label + obj._value_ = value + return obj + + class LabelledList(LabelledIntEnum): + unprocessed = (1, "Unprocessed") + payment_complete = (2, "Payment Complete") + + self.assertEqual(list(LabelledList), [LabelledList.unprocessed, LabelledList.payment_complete]) + self.assertEqual(LabelledList.unprocessed, 1) + self.assertEqual(LabelledList(1), LabelledList.unprocessed) + +class TestUnique(unittest.TestCase): + """2.4 doesn't allow class decorators, use function syntax.""" + + def test_unique_clean(self): + class Clean(Enum): + one = 1 + two = 'dos' + tres = 4.0 + unique(Clean) + class Cleaner(IntEnum): + single = 1 + double = 2 + triple = 3 + unique(Cleaner) + + def test_unique_dirty(self): + try: + class Dirty(Enum): + __order__ = 'one two tres' + one = 1 + two = 'dos' + tres = 1 + unique(Dirty) + except ValueError: + exc = sys.exc_info()[1] + message = exc.args[0] + self.assertTrue('tres -> one' in message) + + try: + class Dirtier(IntEnum): + __order__ = 'single double triple turkey' + single = 1 + double = 1 + triple = 3 + turkey = 3 + unique(Dirtier) + except ValueError: + exc = sys.exc_info()[1] + message = exc.args[0] + self.assertTrue('double -> single' in message) + self.assertTrue('turkey -> triple' in message) + + +class TestMe(unittest.TestCase): + + pass + +if __name__ == '__main__': + unittest.main() diff --git a/lib/feedparser.py b/lib/feedparser.py new file mode 100644 index 00000000..15fdc95b --- /dev/null +++ b/lib/feedparser.py @@ -0,0 +1,3909 @@ +#!/usr/bin/env python +"""Universal feed parser + +Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds + +Visit http://feedparser.org/ for the latest version +Visit http://feedparser.org/docs/ for the latest documentation + +Required: Python 2.4 or later +Recommended: CJKCodecs and iconv_codec +""" + +__version__ = "5.0.1" +__license__ = """Copyright (c) 2002-2008, Mark Pilgrim, All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS' +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE.""" +__author__ = "Mark Pilgrim " +__contributors__ = ["Jason Diamond ", + "John Beimler ", + "Fazal Majid ", + "Aaron Swartz ", + "Kevin Marks ", + "Sam Ruby ", + "Ade Oshineye ", + "Martin Pool ", + "Kurt McKee "] +_debug = 0 + +# HTTP "User-Agent" header to send to servers when downloading feeds. +# If you are embedding feedparser in a larger application, you should +# change this to your application name and URL. +USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % __version__ + +# HTTP "Accept" header to send to servers when downloading feeds. If you don't +# want to send an Accept header, set this to None. +ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1" + +# List of preferred XML parsers, by SAX driver name. These will be tried first, +# but if they're not installed, Python will keep searching through its own list +# of pre-installed parsers until it finds one that supports everything we need. +PREFERRED_XML_PARSERS = ["drv_libxml2"] + +# If you want feedparser to automatically run HTML markup through HTML Tidy, set +# this to 1. Requires mxTidy +# or utidylib . +TIDY_MARKUP = 0 + +# List of Python interfaces for HTML Tidy, in order of preference. Only useful +# if TIDY_MARKUP = 1 +PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] + +# If you want feedparser to automatically resolve all relative URIs, set this +# to 1. +RESOLVE_RELATIVE_URIS = 1 + +# If you want feedparser to automatically sanitize all potentially unsafe +# HTML content, set this to 1. +SANITIZE_HTML = 1 + +# ---------- Python 3 modules (make it work if possible) ---------- +try: + import rfc822 +except ImportError: + from email import _parseaddr as rfc822 + +try: + # Python 3.1 introduces bytes.maketrans and simultaneously + # deprecates string.maketrans; use bytes.maketrans if possible + _maketrans = bytes.maketrans +except (NameError, AttributeError): + import string + _maketrans = string.maketrans + +# base64 support for Atom feeds that contain embedded binary data +try: + import base64, binascii + # Python 3.1 deprecates decodestring in favor of decodebytes + _base64decode = getattr(base64, 'decodebytes', base64.decodestring) +except: + base64 = binascii = None + +def _s2bytes(s): + # Convert a UTF-8 str to bytes if the interpreter is Python 3 + try: + return bytes(s, 'utf8') + except (NameError, TypeError): + # In Python 2.5 and below, bytes doesn't exist (NameError) + # In Python 2.6 and above, bytes and str are the same (TypeError) + return s + +def _l2bytes(l): + # Convert a list of ints to bytes if the interpreter is Python 3 + try: + if bytes is not str: + # In Python 2.6 and above, this call won't raise an exception + # but it will return bytes([65]) as '[65]' instead of 'A' + return bytes(l) + raise NameError + except NameError: + return ''.join(map(chr, l)) + +# If you want feedparser to allow all URL schemes, set this to () +# List culled from Python's urlparse documentation at: +# http://docs.python.org/library/urlparse.html +# as well as from "URI scheme" at Wikipedia: +# https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme +# Many more will likely need to be added! +ACCEPTABLE_URI_SCHEMES = ( + 'file', 'ftp', 'gopher', 'h323', 'hdl', 'http', 'https', 'imap', 'mailto', + 'mms', 'news', 'nntp', 'prospero', 'rsync', 'rtsp', 'rtspu', 'sftp', + 'shttp', 'sip', 'sips', 'snews', 'svn', 'svn+ssh', 'telnet', 'wais', + # Additional common-but-unofficial schemes + 'aim', 'callto', 'cvs', 'facetime', 'feed', 'git', 'gtalk', 'irc', 'ircs', + 'irc6', 'itms', 'mms', 'msnim', 'skype', 'ssh', 'smb', 'svn', 'ymsg', +) +#ACCEPTABLE_URI_SCHEMES = () + +# ---------- required modules (should come with any Python distribution) ---------- +import sgmllib, re, sys, copy, urlparse, time, types, cgi, urllib, urllib2, datetime +try: + from io import BytesIO as _StringIO +except ImportError: + try: + from cStringIO import StringIO as _StringIO + except: + from StringIO import StringIO as _StringIO + +# ---------- optional modules (feedparser will work without these, but with reduced functionality) ---------- + +# gzip is included with most Python distributions, but may not be available if you compiled your own +try: + import gzip +except: + gzip = None +try: + import zlib +except: + zlib = None + +# If a real XML parser is available, feedparser will attempt to use it. feedparser has +# been tested with the built-in SAX parser, PyXML, and libxml2. On platforms where the +# Python distribution does not come with an XML parser (such as Mac OS X 10.2 and some +# versions of FreeBSD), feedparser will quietly fall back on regex-based parsing. +try: + import xml.sax + xml.sax.make_parser(PREFERRED_XML_PARSERS) # test for valid parsers + from xml.sax.saxutils import escape as _xmlescape + _XML_AVAILABLE = 1 +except: + _XML_AVAILABLE = 0 + def _xmlescape(data,entities={}): + data = data.replace('&', '&') + data = data.replace('>', '>') + data = data.replace('<', '<') + for char, entity in entities: + data = data.replace(char, entity) + return data + +# cjkcodecs and iconv_codec provide support for more character encodings. +# Both are available from http://cjkpython.i18n.org/ +try: + import cjkcodecs.aliases +except: + pass +try: + import iconv_codec +except: + pass + +# chardet library auto-detects character encodings +# Download from http://chardet.feedparser.org/ +try: + import chardet + if _debug: + import chardet.constants + chardet.constants._debug = 1 +except: + chardet = None + +# reversable htmlentitydefs mappings for Python 2.2 +try: + from htmlentitydefs import name2codepoint, codepoint2name +except: + import htmlentitydefs + name2codepoint={} + codepoint2name={} + for (name,codepoint) in htmlentitydefs.entitydefs.iteritems(): + if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1])) + name2codepoint[name]=ord(codepoint) + codepoint2name[ord(codepoint)]=name + +# BeautifulSoup parser used for parsing microformats from embedded HTML content +# http://www.crummy.com/software/BeautifulSoup/ +# feedparser is tested with BeautifulSoup 3.0.x, but it might work with the +# older 2.x series. If it doesn't, and you can figure out why, I'll accept a +# patch and modify the compatibility statement accordingly. +try: + import BeautifulSoup +except: + BeautifulSoup = None + +# ---------- don't touch these ---------- +class ThingsNobodyCaresAboutButMe(Exception): pass +class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe): pass +class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe): pass +class NonXMLContentType(ThingsNobodyCaresAboutButMe): pass +class UndeclaredNamespace(Exception): pass + +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +sgmllib.special = re.compile(']|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''') + def search(self,string,index=0): + match = self.endbracket.match(string,index) + if match is not None: + # Returning a new object in the calling thread's context + # resolves a thread-safety. + return EndBracketMatch(match) + return None + class EndBracketMatch: + def __init__(self, match): + self.match = match + def start(self, n): + return self.match.end(n) + sgmllib.endbracket = EndBracketRegEx() + +SUPPORTED_VERSIONS = {'': 'unknown', + 'rss090': 'RSS 0.90', + 'rss091n': 'RSS 0.91 (Netscape)', + 'rss091u': 'RSS 0.91 (Userland)', + 'rss092': 'RSS 0.92', + 'rss093': 'RSS 0.93', + 'rss094': 'RSS 0.94', + 'rss20': 'RSS 2.0', + 'rss10': 'RSS 1.0', + 'rss': 'RSS (unknown version)', + 'atom01': 'Atom 0.1', + 'atom02': 'Atom 0.2', + 'atom03': 'Atom 0.3', + 'atom10': 'Atom 1.0', + 'atom': 'Atom (unknown version)', + 'cdf': 'CDF', + 'hotrss': 'Hot RSS' + } + +try: + UserDict = dict +except NameError: + # Python 2.1 does not have dict + from UserDict import UserDict + def dict(aList): + rc = {} + for k, v in aList: + rc[k] = v + return rc + +class FeedParserDict(UserDict): + keymap = {'channel': 'feed', + 'items': 'entries', + 'guid': 'id', + 'date': 'updated', + 'date_parsed': 'updated_parsed', + 'description': ['summary', 'subtitle'], + 'url': ['href'], + 'modified': 'updated', + 'modified_parsed': 'updated_parsed', + 'issued': 'published', + 'issued_parsed': 'published_parsed', + 'copyright': 'rights', + 'copyright_detail': 'rights_detail', + 'tagline': 'subtitle', + 'tagline_detail': 'subtitle_detail'} + def __getitem__(self, key): + if key == 'category': + return UserDict.__getitem__(self, 'tags')[0]['term'] + if key == 'enclosures': + norel = lambda link: FeedParserDict([(name,value) for (name,value) in link.items() if name!='rel']) + return [norel(link) for link in UserDict.__getitem__(self, 'links') if link['rel']=='enclosure'] + if key == 'license': + for link in UserDict.__getitem__(self, 'links'): + if link['rel']=='license' and link.has_key('href'): + return link['href'] + if key == 'categories': + return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')] + realkey = self.keymap.get(key, key) + if type(realkey) == types.ListType: + for k in realkey: + if UserDict.__contains__(self, k): + return UserDict.__getitem__(self, k) + if UserDict.__contains__(self, key): + return UserDict.__getitem__(self, key) + return UserDict.__getitem__(self, realkey) + + def __setitem__(self, key, value): + for k in self.keymap.keys(): + if key == k: + key = self.keymap[k] + if type(key) == types.ListType: + key = key[0] + return UserDict.__setitem__(self, key, value) + + def get(self, key, default=None): + if self.has_key(key): + return self[key] + else: + return default + + def setdefault(self, key, value): + if not self.has_key(key): + self[key] = value + return self[key] + + def has_key(self, key): + try: + return hasattr(self, key) or UserDict.__contains__(self, key) + except AttributeError: + return False + # This alias prevents the 2to3 tool from changing the semantics of the + # __contains__ function below and exhausting the maximum recursion depth + __has_key = has_key + + def __getattr__(self, key): + try: + return self.__dict__[key] + except KeyError: + pass + try: + assert not key.startswith('_') + return self.__getitem__(key) + except: + raise AttributeError, "object has no attribute '%s'" % key + + def __setattr__(self, key, value): + if key.startswith('_') or key == 'data': + self.__dict__[key] = value + else: + return self.__setitem__(key, value) + + def __contains__(self, key): + return self.__has_key(key) + +def zopeCompatibilityHack(): + global FeedParserDict + del FeedParserDict + def FeedParserDict(aDict=None): + rc = {} + if aDict: + rc.update(aDict) + return rc + +_ebcdic_to_ascii_map = None +def _ebcdic_to_ascii(s): + global _ebcdic_to_ascii_map + if not _ebcdic_to_ascii_map: + emap = ( + 0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, + 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, + 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, + 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, + 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, + 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, + 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, + 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, + 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,201, + 202,106,107,108,109,110,111,112,113,114,203,204,205,206,207,208, + 209,126,115,116,117,118,119,120,121,122,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231, + 123,65,66,67,68,69,70,71,72,73,232,233,234,235,236,237, + 125,74,75,76,77,78,79,80,81,82,238,239,240,241,242,243, + 92,159,83,84,85,86,87,88,89,90,244,245,246,247,248,249, + 48,49,50,51,52,53,54,55,56,57,250,251,252,253,254,255 + ) + _ebcdic_to_ascii_map = _maketrans( \ + _l2bytes(range(256)), _l2bytes(emap)) + return s.translate(_ebcdic_to_ascii_map) + +_cp1252 = { + unichr(128): unichr(8364), # euro sign + unichr(130): unichr(8218), # single low-9 quotation mark + unichr(131): unichr( 402), # latin small letter f with hook + unichr(132): unichr(8222), # double low-9 quotation mark + unichr(133): unichr(8230), # horizontal ellipsis + unichr(134): unichr(8224), # dagger + unichr(135): unichr(8225), # double dagger + unichr(136): unichr( 710), # modifier letter circumflex accent + unichr(137): unichr(8240), # per mille sign + unichr(138): unichr( 352), # latin capital letter s with caron + unichr(139): unichr(8249), # single left-pointing angle quotation mark + unichr(140): unichr( 338), # latin capital ligature oe + unichr(142): unichr( 381), # latin capital letter z with caron + unichr(145): unichr(8216), # left single quotation mark + unichr(146): unichr(8217), # right single quotation mark + unichr(147): unichr(8220), # left double quotation mark + unichr(148): unichr(8221), # right double quotation mark + unichr(149): unichr(8226), # bullet + unichr(150): unichr(8211), # en dash + unichr(151): unichr(8212), # em dash + unichr(152): unichr( 732), # small tilde + unichr(153): unichr(8482), # trade mark sign + unichr(154): unichr( 353), # latin small letter s with caron + unichr(155): unichr(8250), # single right-pointing angle quotation mark + unichr(156): unichr( 339), # latin small ligature oe + unichr(158): unichr( 382), # latin small letter z with caron + unichr(159): unichr( 376)} # latin capital letter y with diaeresis + +_urifixer = re.compile('^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)') +def _urljoin(base, uri): + uri = _urifixer.sub(r'\1\3', uri) + try: + return urlparse.urljoin(base, uri) + except: + uri = urlparse.urlunparse([urllib.quote(part) for part in urlparse.urlparse(uri)]) + return urlparse.urljoin(base, uri) + +class _FeedParserMixin: + namespaces = {'': '', + 'http://backend.userland.com/rss': '', + 'http://blogs.law.harvard.edu/tech/rss': '', + 'http://purl.org/rss/1.0/': '', + 'http://my.netscape.com/rdf/simple/0.9/': '', + 'http://example.com/newformat#': '', + 'http://example.com/necho': '', + 'http://purl.org/echo/': '', + 'uri/of/echo/namespace#': '', + 'http://purl.org/pie/': '', + 'http://purl.org/atom/ns#': '', + 'http://www.w3.org/2005/Atom': '', + 'http://purl.org/rss/1.0/modules/rss091#': '', + + 'http://webns.net/mvcb/': 'admin', + 'http://purl.org/rss/1.0/modules/aggregation/': 'ag', + 'http://purl.org/rss/1.0/modules/annotate/': 'annotate', + 'http://media.tangent.org/rss/1.0/': 'audio', + 'http://backend.userland.com/blogChannelModule': 'blogChannel', + 'http://web.resource.org/cc/': 'cc', + 'http://backend.userland.com/creativeCommonsRssModule': 'creativeCommons', + 'http://purl.org/rss/1.0/modules/company': 'co', + 'http://purl.org/rss/1.0/modules/content/': 'content', + 'http://my.theinfo.org/changed/1.0/rss/': 'cp', + 'http://purl.org/dc/elements/1.1/': 'dc', + 'http://purl.org/dc/terms/': 'dcterms', + 'http://purl.org/rss/1.0/modules/email/': 'email', + 'http://purl.org/rss/1.0/modules/event/': 'ev', + 'http://rssnamespace.org/feedburner/ext/1.0': 'feedburner', + 'http://freshmeat.net/rss/fm/': 'fm', + 'http://xmlns.com/foaf/0.1/': 'foaf', + 'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo', + 'http://postneo.com/icbm/': 'icbm', + 'http://purl.org/rss/1.0/modules/image/': 'image', + 'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes', + 'http://example.com/DTDs/PodCast-1.0.dtd': 'itunes', + 'http://purl.org/rss/1.0/modules/link/': 'l', + 'http://search.yahoo.com/mrss': 'media', + #Version 1.1.2 of the Media RSS spec added the trailing slash on the namespace + 'http://search.yahoo.com/mrss/': 'media', + 'http://madskills.com/public/xml/rss/module/pingback/': 'pingback', + 'http://prismstandard.org/namespaces/1.2/basic/': 'prism', + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#': 'rdf', + 'http://www.w3.org/2000/01/rdf-schema#': 'rdfs', + 'http://purl.org/rss/1.0/modules/reference/': 'ref', + 'http://purl.org/rss/1.0/modules/richequiv/': 'reqv', + 'http://purl.org/rss/1.0/modules/search/': 'search', + 'http://purl.org/rss/1.0/modules/slash/': 'slash', + 'http://schemas.xmlsoap.org/soap/envelope/': 'soap', + 'http://purl.org/rss/1.0/modules/servicestatus/': 'ss', + 'http://hacks.benhammersley.com/rss/streaming/': 'str', + 'http://purl.org/rss/1.0/modules/subscription/': 'sub', + 'http://purl.org/rss/1.0/modules/syndication/': 'sy', + 'http://schemas.pocketsoap.com/rss/myDescModule/': 'szf', + 'http://purl.org/rss/1.0/modules/taxonomy/': 'taxo', + 'http://purl.org/rss/1.0/modules/threading/': 'thr', + 'http://purl.org/rss/1.0/modules/textinput/': 'ti', + 'http://madskills.com/public/xml/rss/module/trackback/':'trackback', + 'http://wellformedweb.org/commentAPI/': 'wfw', + 'http://purl.org/rss/1.0/modules/wiki/': 'wiki', + 'http://www.w3.org/1999/xhtml': 'xhtml', + 'http://www.w3.org/1999/xlink': 'xlink', + 'http://www.w3.org/XML/1998/namespace': 'xml' +} + _matchnamespaces = {} + + can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'icon', 'logo'] + can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description'] + can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description'] + html_types = ['text/html', 'application/xhtml+xml'] + + def __init__(self, baseuri=None, baselang=None, encoding='utf-8'): + if _debug: sys.stderr.write('initializing FeedParser\n') + if not self._matchnamespaces: + for k, v in self.namespaces.items(): + self._matchnamespaces[k.lower()] = v + self.feeddata = FeedParserDict() # feed-level data + self.encoding = encoding # character encoding + self.entries = [] # list of entry-level data + self.version = '' # feed type/version, see SUPPORTED_VERSIONS + self.namespacesInUse = {} # dictionary of namespaces defined by the feed + + # the following are used internally to track state; + # this is really out of control and should be refactored + self.infeed = 0 + self.inentry = 0 + self.incontent = 0 + self.intextinput = 0 + self.inimage = 0 + self.inauthor = 0 + self.incontributor = 0 + self.inpublisher = 0 + self.insource = 0 + self.sourcedata = FeedParserDict() + self.contentparams = FeedParserDict() + self._summaryKey = None + self.namespacemap = {} + self.elementstack = [] + self.basestack = [] + self.langstack = [] + self.baseuri = baseuri or '' + self.lang = baselang or None + self.svgOK = 0 + self.hasTitle = 0 + if baselang: + self.feeddata['language'] = baselang.replace('_','-') + + def unknown_starttag(self, tag, attrs): + if _debug: sys.stderr.write('start %s with %s\n' % (tag, attrs)) + # normalize attrs + attrs = [(k.lower(), v) for k, v in attrs] + attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] + # the sgml parser doesn't handle entities in attributes, but + # strict xml parsers do -- account for this difference + if isinstance(self, _LooseFeedParser): + attrs = [(k, v.replace('&', '&')) for k, v in attrs] + + # track xml:base and xml:lang + attrsD = dict(attrs) + baseuri = attrsD.get('xml:base', attrsD.get('base')) or self.baseuri + if type(baseuri) != type(u''): + try: + baseuri = unicode(baseuri, self.encoding) + except: + baseuri = unicode(baseuri, 'iso-8859-1') + # ensure that self.baseuri is always an absolute URI that + # uses a whitelisted URI scheme (e.g. not `javscript:`) + if self.baseuri: + self.baseuri = _makeSafeAbsoluteURI(self.baseuri, baseuri) or self.baseuri + else: + self.baseuri = _urljoin(self.baseuri, baseuri) + lang = attrsD.get('xml:lang', attrsD.get('lang')) + if lang == '': + # xml:lang could be explicitly set to '', we need to capture that + lang = None + elif lang is None: + # if no xml:lang is specified, use parent lang + lang = self.lang + if lang: + if tag in ('feed', 'rss', 'rdf:RDF'): + self.feeddata['language'] = lang.replace('_','-') + self.lang = lang + self.basestack.append(self.baseuri) + self.langstack.append(lang) + + # track namespaces + for prefix, uri in attrs: + if prefix.startswith('xmlns:'): + self.trackNamespace(prefix[6:], uri) + elif prefix == 'xmlns': + self.trackNamespace(None, uri) + + # track inline content + if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): + if tag in ['xhtml:div', 'div']: return # typepad does this 10/2007 + # element declared itself as escaped markup, but it isn't really + self.contentparams['type'] = 'application/xhtml+xml' + if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': + if tag.find(':') <> -1: + prefix, tag = tag.split(':', 1) + namespace = self.namespacesInUse.get(prefix, '') + if tag=='math' and namespace=='http://www.w3.org/1998/Math/MathML': + attrs.append(('xmlns',namespace)) + if tag=='svg' and namespace=='http://www.w3.org/2000/svg': + attrs.append(('xmlns',namespace)) + if tag == 'svg': self.svgOK += 1 + return self.handle_data('<%s%s>' % (tag, self.strattrs(attrs)), escape=0) + + # match namespaces + if tag.find(':') <> -1: + prefix, suffix = tag.split(':', 1) + else: + prefix, suffix = '', tag + prefix = self.namespacemap.get(prefix, prefix) + if prefix: + prefix = prefix + '_' + + # special hack for better tracking of empty textinput/image elements in illformed feeds + if (not prefix) and tag not in ('title', 'link', 'description', 'name'): + self.intextinput = 0 + if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'): + self.inimage = 0 + + # call special handler (if defined) or default handler + methodname = '_start_' + prefix + suffix + try: + method = getattr(self, methodname) + return method(attrsD) + except AttributeError: + # Since there's no handler or something has gone wrong we explicitly add the element and its attributes + unknown_tag = prefix + suffix + if len(attrsD) == 0: + # No attributes so merge it into the encosing dictionary + return self.push(unknown_tag, 1) + else: + # Has attributes so create it in its own dictionary + context = self._getContext() + context[unknown_tag] = attrsD + + def unknown_endtag(self, tag): + if _debug: sys.stderr.write('end %s\n' % tag) + # match namespaces + if tag.find(':') <> -1: + prefix, suffix = tag.split(':', 1) + else: + prefix, suffix = '', tag + prefix = self.namespacemap.get(prefix, prefix) + if prefix: + prefix = prefix + '_' + if suffix == 'svg' and self.svgOK: self.svgOK -= 1 + + # call special handler (if defined) or default handler + methodname = '_end_' + prefix + suffix + try: + if self.svgOK: raise AttributeError() + method = getattr(self, methodname) + method() + except AttributeError: + self.pop(prefix + suffix) + + # track inline content + if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): + # element declared itself as escaped markup, but it isn't really + if tag in ['xhtml:div', 'div']: return # typepad does this 10/2007 + self.contentparams['type'] = 'application/xhtml+xml' + if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': + tag = tag.split(':')[-1] + self.handle_data('' % tag, escape=0) + + # track xml:base and xml:lang going out of scope + if self.basestack: + self.basestack.pop() + if self.basestack and self.basestack[-1]: + self.baseuri = self.basestack[-1] + if self.langstack: + self.langstack.pop() + if self.langstack: # and (self.langstack[-1] is not None): + self.lang = self.langstack[-1] + + def handle_charref(self, ref): + # called for each character reference, e.g. for ' ', ref will be '160' + if not self.elementstack: return + ref = ref.lower() + if ref in ('34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e'): + text = '&#%s;' % ref + else: + if ref[0] == 'x': + c = int(ref[1:], 16) + else: + c = int(ref) + text = unichr(c).encode('utf-8') + self.elementstack[-1][2].append(text) + + def handle_entityref(self, ref): + # called for each entity reference, e.g. for '©', ref will be 'copy' + if not self.elementstack: return + if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref) + if ref in ('lt', 'gt', 'quot', 'amp', 'apos'): + text = '&%s;' % ref + elif ref in self.entities.keys(): + text = self.entities[ref] + if text.startswith('&#') and text.endswith(';'): + return self.handle_entityref(text) + else: + try: name2codepoint[ref] + except KeyError: text = '&%s;' % ref + else: text = unichr(name2codepoint[ref]).encode('utf-8') + self.elementstack[-1][2].append(text) + + def handle_data(self, text, escape=1): + # called for each block of plain text, i.e. outside of any tag and + # not containing any character or entity references + if not self.elementstack: return + if escape and self.contentparams.get('type') == 'application/xhtml+xml': + text = _xmlescape(text) + self.elementstack[-1][2].append(text) + + def handle_comment(self, text): + # called for each comment, e.g. + pass + + def handle_pi(self, text): + # called for each processing instruction, e.g. + pass + + def handle_decl(self, text): + pass + + def parse_declaration(self, i): + # override internal declaration handler to handle CDATA blocks + if _debug: sys.stderr.write('entering parse_declaration\n') + if self.rawdata[i:i+9] == '', i) + if k == -1: + # CDATA block began but didn't finish + k = len(self.rawdata) + return k + self.handle_data(_xmlescape(self.rawdata[i+9:k]), 0) + return k+3 + else: + k = self.rawdata.find('>', i) + if k >= 0: + return k+1 + else: + # We have an incomplete CDATA block. + return k + + def mapContentType(self, contentType): + contentType = contentType.lower() + if contentType == 'text' or contentType == 'plain': + contentType = 'text/plain' + elif contentType == 'html': + contentType = 'text/html' + elif contentType == 'xhtml': + contentType = 'application/xhtml+xml' + return contentType + + def trackNamespace(self, prefix, uri): + loweruri = uri.lower() + if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version: + self.version = 'rss090' + if loweruri == 'http://purl.org/rss/1.0/' and not self.version: + self.version = 'rss10' + if loweruri == 'http://www.w3.org/2005/atom' and not self.version: + self.version = 'atom10' + if loweruri.find('backend.userland.com/rss') <> -1: + # match any backend.userland.com namespace + uri = 'http://backend.userland.com/rss' + loweruri = uri + if self._matchnamespaces.has_key(loweruri): + self.namespacemap[prefix] = self._matchnamespaces[loweruri] + self.namespacesInUse[self._matchnamespaces[loweruri]] = uri + else: + self.namespacesInUse[prefix or ''] = uri + + def resolveURI(self, uri): + return _urljoin(self.baseuri or '', uri) + + def decodeEntities(self, element, data): + return data + + def strattrs(self, attrs): + return ''.join([' %s="%s"' % (t[0],_xmlescape(t[1],{'"':'"'})) for t in attrs]) + + def push(self, element, expectingText): + self.elementstack.append([element, expectingText, []]) + + def pop(self, element, stripWhitespace=1): + if not self.elementstack: return + if self.elementstack[-1][0] != element: return + + element, expectingText, pieces = self.elementstack.pop() + + if self.version == 'atom10' and self.contentparams.get('type','text') == 'application/xhtml+xml': + # remove enclosing child element, but only if it is a
and + # only if all the remaining content is nested underneath it. + # This means that the divs would be retained in the following: + #
foo
bar
+ while pieces and len(pieces)>1 and not pieces[-1].strip(): + del pieces[-1] + while pieces and len(pieces)>1 and not pieces[0].strip(): + del pieces[0] + if pieces and (pieces[0] == '
' or pieces[0].startswith('
': + depth = 0 + for piece in pieces[:-1]: + if piece.startswith(''): + depth += 1 + else: + pieces = pieces[1:-1] + + # Ensure each piece is a str for Python 3 + for (i, v) in enumerate(pieces): + if not isinstance(v, basestring): + pieces[i] = v.decode('utf-8') + + output = ''.join(pieces) + if stripWhitespace: + output = output.strip() + if not expectingText: return output + + # decode base64 content + if base64 and self.contentparams.get('base64', 0): + try: + output = _base64decode(output) + except binascii.Error: + pass + except binascii.Incomplete: + pass + except TypeError: + # In Python 3, base64 takes and outputs bytes, not str + # This may not be the most correct way to accomplish this + output = _base64decode(output.encode('utf-8')).decode('utf-8') + + # resolve relative URIs + if (element in self.can_be_relative_uri) and output: + output = self.resolveURI(output) + + # decode entities within embedded markup + if not self.contentparams.get('base64', 0): + output = self.decodeEntities(element, output) + + if self.lookslikehtml(output): + self.contentparams['type']='text/html' + + # remove temporary cruft from contentparams + try: + del self.contentparams['mode'] + except KeyError: + pass + try: + del self.contentparams['base64'] + except KeyError: + pass + + is_htmlish = self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types + # resolve relative URIs within embedded markup + if is_htmlish and RESOLVE_RELATIVE_URIS: + if element in self.can_contain_relative_uris: + output = _resolveRelativeURIs(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html')) + + # parse microformats + # (must do this before sanitizing because some microformats + # rely on elements that we sanitize) + if is_htmlish and element in ['content', 'description', 'summary']: + mfresults = _parseMicroformats(output, self.baseuri, self.encoding) + if mfresults: + for tag in mfresults.get('tags', []): + self._addTag(tag['term'], tag['scheme'], tag['label']) + for enclosure in mfresults.get('enclosures', []): + self._start_enclosure(enclosure) + for xfn in mfresults.get('xfn', []): + self._addXFN(xfn['relationships'], xfn['href'], xfn['name']) + vcard = mfresults.get('vcard') + if vcard: + self._getContext()['vcard'] = vcard + + # sanitize embedded markup + if is_htmlish and SANITIZE_HTML: + if element in self.can_contain_dangerous_markup: + output = _sanitizeHTML(output, self.encoding, self.contentparams.get('type', 'text/html')) + + if self.encoding and type(output) != type(u''): + try: + output = unicode(output, self.encoding) + except: + pass + + # address common error where people take data that is already + # utf-8, presume that it is iso-8859-1, and re-encode it. + if self.encoding in ('utf-8', 'utf-8_INVALID_PYTHON_3') and type(output) == type(u''): + try: + output = unicode(output.encode('iso-8859-1'), 'utf-8') + except: + pass + + # map win-1252 extensions to the proper code points + if type(output) == type(u''): + output = u''.join([c in _cp1252.keys() and _cp1252[c] or c for c in output]) + + # categories/tags/keywords/whatever are handled in _end_category + if element == 'category': + return output + + if element == 'title' and self.hasTitle: + return output + + # store output in appropriate place(s) + if self.inentry and not self.insource: + if element == 'content': + self.entries[-1].setdefault(element, []) + contentparams = copy.deepcopy(self.contentparams) + contentparams['value'] = output + self.entries[-1][element].append(contentparams) + elif element == 'link': + if not self.inimage: + # query variables in urls in link elements are improperly + # converted from `?a=1&b=2` to `?a=1&b;=2` as if they're + # unhandled character references. fix this special case. + output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) + self.entries[-1][element] = output + if output: + self.entries[-1]['links'][-1]['href'] = output + else: + if element == 'description': + element = 'summary' + self.entries[-1][element] = output + if self.incontent: + contentparams = copy.deepcopy(self.contentparams) + contentparams['value'] = output + self.entries[-1][element + '_detail'] = contentparams + elif (self.infeed or self.insource):# and (not self.intextinput) and (not self.inimage): + context = self._getContext() + if element == 'description': + element = 'subtitle' + context[element] = output + if element == 'link': + # fix query variables; see above for the explanation + output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) + context[element] = output + context['links'][-1]['href'] = output + elif self.incontent: + contentparams = copy.deepcopy(self.contentparams) + contentparams['value'] = output + context[element + '_detail'] = contentparams + return output + + def pushContent(self, tag, attrsD, defaultContentType, expectingText): + self.incontent += 1 + if self.lang: self.lang=self.lang.replace('_','-') + self.contentparams = FeedParserDict({ + 'type': self.mapContentType(attrsD.get('type', defaultContentType)), + 'language': self.lang, + 'base': self.baseuri}) + self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams) + self.push(tag, expectingText) + + def popContent(self, tag): + value = self.pop(tag) + self.incontent -= 1 + self.contentparams.clear() + return value + + # a number of elements in a number of RSS variants are nominally plain + # text, but this is routinely ignored. This is an attempt to detect + # the most common cases. As false positives often result in silent + # data loss, this function errs on the conservative side. + def lookslikehtml(self, s): + if self.version.startswith('atom'): return + if self.contentparams.get('type','text/html') != 'text/plain': return + + # must have a close tag or a entity reference to qualify + if not (re.search(r'',s) or re.search("&#?\w+;",s)): return + + # all tags must be in a restricted subset of valid HTML tags + if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements, + re.findall(r' -1: + prefix = name[:colonpos] + suffix = name[colonpos+1:] + prefix = self.namespacemap.get(prefix, prefix) + name = prefix + ':' + suffix + return name + + def _getAttribute(self, attrsD, name): + return attrsD.get(self._mapToStandardPrefix(name)) + + def _isBase64(self, attrsD, contentparams): + if attrsD.get('mode', '') == 'base64': + return 1 + if self.contentparams['type'].startswith('text/'): + return 0 + if self.contentparams['type'].endswith('+xml'): + return 0 + if self.contentparams['type'].endswith('/xml'): + return 0 + return 1 + + def _itsAnHrefDamnIt(self, attrsD): + href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None))) + if href: + try: + del attrsD['url'] + except KeyError: + pass + try: + del attrsD['uri'] + except KeyError: + pass + attrsD['href'] = href + return attrsD + + def _save(self, key, value, overwrite=False): + context = self._getContext() + if overwrite: + context[key] = value + else: + context.setdefault(key, value) + + def _start_rss(self, attrsD): + versionmap = {'0.91': 'rss091u', + '0.92': 'rss092', + '0.93': 'rss093', + '0.94': 'rss094'} + #If we're here then this is an RSS feed. + #If we don't have a version or have a version that starts with something + #other than RSS then there's been a mistake. Correct it. + if not self.version or not self.version.startswith('rss'): + attr_version = attrsD.get('version', '') + version = versionmap.get(attr_version) + if version: + self.version = version + elif attr_version.startswith('2.'): + self.version = 'rss20' + else: + self.version = 'rss' + + def _start_dlhottitles(self, attrsD): + self.version = 'hotrss' + + def _start_channel(self, attrsD): + self.infeed = 1 + self._cdf_common(attrsD) + _start_feedinfo = _start_channel + + def _cdf_common(self, attrsD): + if attrsD.has_key('lastmod'): + self._start_modified({}) + self.elementstack[-1][-1] = attrsD['lastmod'] + self._end_modified() + if attrsD.has_key('href'): + self._start_link({}) + self.elementstack[-1][-1] = attrsD['href'] + self._end_link() + + def _start_feed(self, attrsD): + self.infeed = 1 + versionmap = {'0.1': 'atom01', + '0.2': 'atom02', + '0.3': 'atom03'} + if not self.version: + attr_version = attrsD.get('version') + version = versionmap.get(attr_version) + if version: + self.version = version + else: + self.version = 'atom' + + def _end_channel(self): + self.infeed = 0 + _end_feed = _end_channel + + def _start_image(self, attrsD): + context = self._getContext() + if not self.inentry: + context.setdefault('image', FeedParserDict()) + self.inimage = 1 + self.hasTitle = 0 + self.push('image', 0) + + def _end_image(self): + self.pop('image') + self.inimage = 0 + + def _start_textinput(self, attrsD): + context = self._getContext() + context.setdefault('textinput', FeedParserDict()) + self.intextinput = 1 + self.hasTitle = 0 + self.push('textinput', 0) + _start_textInput = _start_textinput + + def _end_textinput(self): + self.pop('textinput') + self.intextinput = 0 + _end_textInput = _end_textinput + + def _start_author(self, attrsD): + self.inauthor = 1 + self.push('author', 1) + # Append a new FeedParserDict when expecting an author + context = self._getContext() + context.setdefault('authors', []) + context['authors'].append(FeedParserDict()) + _start_managingeditor = _start_author + _start_dc_author = _start_author + _start_dc_creator = _start_author + _start_itunes_author = _start_author + + def _end_author(self): + self.pop('author') + self.inauthor = 0 + self._sync_author_detail() + _end_managingeditor = _end_author + _end_dc_author = _end_author + _end_dc_creator = _end_author + _end_itunes_author = _end_author + + def _start_itunes_owner(self, attrsD): + self.inpublisher = 1 + self.push('publisher', 0) + + def _end_itunes_owner(self): + self.pop('publisher') + self.inpublisher = 0 + self._sync_author_detail('publisher') + + def _start_contributor(self, attrsD): + self.incontributor = 1 + context = self._getContext() + context.setdefault('contributors', []) + context['contributors'].append(FeedParserDict()) + self.push('contributor', 0) + + def _end_contributor(self): + self.pop('contributor') + self.incontributor = 0 + + def _start_dc_contributor(self, attrsD): + self.incontributor = 1 + context = self._getContext() + context.setdefault('contributors', []) + context['contributors'].append(FeedParserDict()) + self.push('name', 0) + + def _end_dc_contributor(self): + self._end_name() + self.incontributor = 0 + + def _start_name(self, attrsD): + self.push('name', 0) + _start_itunes_name = _start_name + + def _end_name(self): + value = self.pop('name') + if self.inpublisher: + self._save_author('name', value, 'publisher') + elif self.inauthor: + self._save_author('name', value) + elif self.incontributor: + self._save_contributor('name', value) + elif self.intextinput: + context = self._getContext() + context['name'] = value + _end_itunes_name = _end_name + + def _start_width(self, attrsD): + self.push('width', 0) + + def _end_width(self): + value = self.pop('width') + try: + value = int(value) + except: + value = 0 + if self.inimage: + context = self._getContext() + context['width'] = value + + def _start_height(self, attrsD): + self.push('height', 0) + + def _end_height(self): + value = self.pop('height') + try: + value = int(value) + except: + value = 0 + if self.inimage: + context = self._getContext() + context['height'] = value + + def _start_url(self, attrsD): + self.push('href', 1) + _start_homepage = _start_url + _start_uri = _start_url + + def _end_url(self): + value = self.pop('href') + if self.inauthor: + self._save_author('href', value) + elif self.incontributor: + self._save_contributor('href', value) + _end_homepage = _end_url + _end_uri = _end_url + + def _start_email(self, attrsD): + self.push('email', 0) + _start_itunes_email = _start_email + + def _end_email(self): + value = self.pop('email') + if self.inpublisher: + self._save_author('email', value, 'publisher') + elif self.inauthor: + self._save_author('email', value) + elif self.incontributor: + self._save_contributor('email', value) + _end_itunes_email = _end_email + + def _getContext(self): + if self.insource: + context = self.sourcedata + elif self.inimage and self.feeddata.has_key('image'): + context = self.feeddata['image'] + elif self.intextinput: + context = self.feeddata['textinput'] + elif self.inentry: + context = self.entries[-1] + else: + context = self.feeddata + return context + + def _save_author(self, key, value, prefix='author'): + context = self._getContext() + context.setdefault(prefix + '_detail', FeedParserDict()) + context[prefix + '_detail'][key] = value + self._sync_author_detail() + context.setdefault('authors', [FeedParserDict()]) + context['authors'][-1][key] = value + + def _save_contributor(self, key, value): + context = self._getContext() + context.setdefault('contributors', [FeedParserDict()]) + context['contributors'][-1][key] = value + + def _sync_author_detail(self, key='author'): + context = self._getContext() + detail = context.get('%s_detail' % key) + if detail: + name = detail.get('name') + email = detail.get('email') + if name and email: + context[key] = '%s (%s)' % (name, email) + elif name: + context[key] = name + elif email: + context[key] = email + else: + author, email = context.get(key), None + if not author: return + emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?''', author) + if emailmatch: + email = emailmatch.group(0) + # probably a better way to do the following, but it passes all the tests + author = author.replace(email, '') + author = author.replace('()', '') + author = author.replace('<>', '') + author = author.replace('<>', '') + author = author.strip() + if author and (author[0] == '('): + author = author[1:] + if author and (author[-1] == ')'): + author = author[:-1] + author = author.strip() + if author or email: + context.setdefault('%s_detail' % key, FeedParserDict()) + if author: + context['%s_detail' % key]['name'] = author + if email: + context['%s_detail' % key]['email'] = email + + def _start_subtitle(self, attrsD): + self.pushContent('subtitle', attrsD, 'text/plain', 1) + _start_tagline = _start_subtitle + _start_itunes_subtitle = _start_subtitle + + def _end_subtitle(self): + self.popContent('subtitle') + _end_tagline = _end_subtitle + _end_itunes_subtitle = _end_subtitle + + def _start_rights(self, attrsD): + self.pushContent('rights', attrsD, 'text/plain', 1) + _start_dc_rights = _start_rights + _start_copyright = _start_rights + + def _end_rights(self): + self.popContent('rights') + _end_dc_rights = _end_rights + _end_copyright = _end_rights + + def _start_item(self, attrsD): + self.entries.append(FeedParserDict()) + self.push('item', 0) + self.inentry = 1 + self.guidislink = 0 + self.hasTitle = 0 + id = self._getAttribute(attrsD, 'rdf:about') + if id: + context = self._getContext() + context['id'] = id + self._cdf_common(attrsD) + _start_entry = _start_item + _start_product = _start_item + + def _end_item(self): + self.pop('item') + self.inentry = 0 + _end_entry = _end_item + + def _start_dc_language(self, attrsD): + self.push('language', 1) + _start_language = _start_dc_language + + def _end_dc_language(self): + self.lang = self.pop('language') + _end_language = _end_dc_language + + def _start_dc_publisher(self, attrsD): + self.push('publisher', 1) + _start_webmaster = _start_dc_publisher + + def _end_dc_publisher(self): + self.pop('publisher') + self._sync_author_detail('publisher') + _end_webmaster = _end_dc_publisher + + def _start_published(self, attrsD): + self.push('published', 1) + _start_dcterms_issued = _start_published + _start_issued = _start_published + + def _end_published(self): + value = self.pop('published') + self._save('published_parsed', _parse_date(value), overwrite=True) + _end_dcterms_issued = _end_published + _end_issued = _end_published + + def _start_updated(self, attrsD): + self.push('updated', 1) + _start_modified = _start_updated + _start_dcterms_modified = _start_updated + _start_pubdate = _start_updated + _start_dc_date = _start_updated + _start_lastbuilddate = _start_updated + + def _end_updated(self): + value = self.pop('updated') + parsed_value = _parse_date(value) + self._save('updated_parsed', parsed_value, overwrite=True) + _end_modified = _end_updated + _end_dcterms_modified = _end_updated + _end_pubdate = _end_updated + _end_dc_date = _end_updated + _end_lastbuilddate = _end_updated + + def _start_created(self, attrsD): + self.push('created', 1) + _start_dcterms_created = _start_created + + def _end_created(self): + value = self.pop('created') + self._save('created_parsed', _parse_date(value), overwrite=True) + _end_dcterms_created = _end_created + + def _start_expirationdate(self, attrsD): + self.push('expired', 1) + + def _end_expirationdate(self): + self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True) + + def _start_cc_license(self, attrsD): + context = self._getContext() + value = self._getAttribute(attrsD, 'rdf:resource') + attrsD = FeedParserDict() + attrsD['rel']='license' + if value: attrsD['href']=value + context.setdefault('links', []).append(attrsD) + + def _start_creativecommons_license(self, attrsD): + self.push('license', 1) + _start_creativeCommons_license = _start_creativecommons_license + + def _end_creativecommons_license(self): + value = self.pop('license') + context = self._getContext() + attrsD = FeedParserDict() + attrsD['rel']='license' + if value: attrsD['href']=value + context.setdefault('links', []).append(attrsD) + del context['license'] + _end_creativeCommons_license = _end_creativecommons_license + + def _addXFN(self, relationships, href, name): + context = self._getContext() + xfn = context.setdefault('xfn', []) + value = FeedParserDict({'relationships': relationships, 'href': href, 'name': name}) + if value not in xfn: + xfn.append(value) + + def _addTag(self, term, scheme, label): + context = self._getContext() + tags = context.setdefault('tags', []) + if (not term) and (not scheme) and (not label): return + value = FeedParserDict({'term': term, 'scheme': scheme, 'label': label}) + if value not in tags: + tags.append(value) + + def _start_category(self, attrsD): + if _debug: sys.stderr.write('entering _start_category with %s\n' % repr(attrsD)) + term = attrsD.get('term') + scheme = attrsD.get('scheme', attrsD.get('domain')) + label = attrsD.get('label') + self._addTag(term, scheme, label) + self.push('category', 1) + _start_dc_subject = _start_category + _start_keywords = _start_category + + def _start_media_category(self, attrsD): + attrsD.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema') + self._start_category(attrsD) + + def _end_itunes_keywords(self): + for term in self.pop('itunes_keywords').split(): + self._addTag(term, 'http://www.itunes.com/', None) + + def _start_itunes_category(self, attrsD): + self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None) + self.push('category', 1) + + def _end_category(self): + value = self.pop('category') + if not value: return + context = self._getContext() + tags = context['tags'] + if value and len(tags) and not tags[-1]['term']: + tags[-1]['term'] = value + else: + self._addTag(value, None, None) + _end_dc_subject = _end_category + _end_keywords = _end_category + _end_itunes_category = _end_category + _end_media_category = _end_category + + def _start_cloud(self, attrsD): + self._getContext()['cloud'] = FeedParserDict(attrsD) + + def _start_link(self, attrsD): + attrsD.setdefault('rel', 'alternate') + if attrsD['rel'] == 'self': + attrsD.setdefault('type', 'application/atom+xml') + else: + attrsD.setdefault('type', 'text/html') + context = self._getContext() + attrsD = self._itsAnHrefDamnIt(attrsD) + if attrsD.has_key('href'): + attrsD['href'] = self.resolveURI(attrsD['href']) + expectingText = self.infeed or self.inentry or self.insource + context.setdefault('links', []) + if not (self.inentry and self.inimage): + context['links'].append(FeedParserDict(attrsD)) + if attrsD.has_key('href'): + expectingText = 0 + if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): + context['link'] = attrsD['href'] + else: + self.push('link', expectingText) + _start_producturl = _start_link + + def _end_link(self): + value = self.pop('link') + context = self._getContext() + _end_producturl = _end_link + + def _start_guid(self, attrsD): + self.guidislink = (attrsD.get('ispermalink', 'true') == 'true') + self.push('id', 1) + + def _end_guid(self): + value = self.pop('id') + self._save('guidislink', self.guidislink and not self._getContext().has_key('link')) + if self.guidislink: + # guid acts as link, but only if 'ispermalink' is not present or is 'true', + # and only if the item doesn't already have a link element + self._save('link', value) + + def _start_title(self, attrsD): + if self.svgOK: return self.unknown_starttag('title', attrsD.items()) + self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) + _start_dc_title = _start_title + _start_media_title = _start_title + + def _end_title(self): + if self.svgOK: return + value = self.popContent('title') + if not value: return + context = self._getContext() + self.hasTitle = 1 + _end_dc_title = _end_title + + def _end_media_title(self): + hasTitle = self.hasTitle + self._end_title() + self.hasTitle = hasTitle + + def _start_description(self, attrsD): + context = self._getContext() + if context.has_key('summary'): + self._summaryKey = 'content' + self._start_content(attrsD) + else: + self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource) + _start_dc_description = _start_description + + def _start_abstract(self, attrsD): + self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) + + def _end_description(self): + if self._summaryKey == 'content': + self._end_content() + else: + value = self.popContent('description') + self._summaryKey = None + _end_abstract = _end_description + _end_dc_description = _end_description + + def _start_info(self, attrsD): + self.pushContent('info', attrsD, 'text/plain', 1) + _start_feedburner_browserfriendly = _start_info + + def _end_info(self): + self.popContent('info') + _end_feedburner_browserfriendly = _end_info + + def _start_generator(self, attrsD): + if attrsD: + attrsD = self._itsAnHrefDamnIt(attrsD) + if attrsD.has_key('href'): + attrsD['href'] = self.resolveURI(attrsD['href']) + self._getContext()['generator_detail'] = FeedParserDict(attrsD) + self.push('generator', 1) + + def _end_generator(self): + value = self.pop('generator') + context = self._getContext() + if context.has_key('generator_detail'): + context['generator_detail']['name'] = value + + def _start_admin_generatoragent(self, attrsD): + self.push('generator', 1) + value = self._getAttribute(attrsD, 'rdf:resource') + if value: + self.elementstack[-1][2].append(value) + self.pop('generator') + self._getContext()['generator_detail'] = FeedParserDict({'href': value}) + + def _start_admin_errorreportsto(self, attrsD): + self.push('errorreportsto', 1) + value = self._getAttribute(attrsD, 'rdf:resource') + if value: + self.elementstack[-1][2].append(value) + self.pop('errorreportsto') + + def _start_summary(self, attrsD): + context = self._getContext() + if context.has_key('summary'): + self._summaryKey = 'content' + self._start_content(attrsD) + else: + self._summaryKey = 'summary' + self.pushContent(self._summaryKey, attrsD, 'text/plain', 1) + _start_itunes_summary = _start_summary + + def _end_summary(self): + if self._summaryKey == 'content': + self._end_content() + else: + self.popContent(self._summaryKey or 'summary') + self._summaryKey = None + _end_itunes_summary = _end_summary + + def _start_enclosure(self, attrsD): + attrsD = self._itsAnHrefDamnIt(attrsD) + context = self._getContext() + attrsD['rel']='enclosure' + context.setdefault('links', []).append(FeedParserDict(attrsD)) + + def _start_source(self, attrsD): + if 'url' in attrsD: + # This means that we're processing a source element from an RSS 2.0 feed + self.sourcedata['href'] = attrsD[u'url'] + self.push('source', 1) + self.insource = 1 + self.hasTitle = 0 + + def _end_source(self): + self.insource = 0 + value = self.pop('source') + if value: + self.sourcedata['title'] = value + self._getContext()['source'] = copy.deepcopy(self.sourcedata) + self.sourcedata.clear() + + def _start_content(self, attrsD): + self.pushContent('content', attrsD, 'text/plain', 1) + src = attrsD.get('src') + if src: + self.contentparams['src'] = src + self.push('content', 1) + + def _start_prodlink(self, attrsD): + self.pushContent('content', attrsD, 'text/html', 1) + + def _start_body(self, attrsD): + self.pushContent('content', attrsD, 'application/xhtml+xml', 1) + _start_xhtml_body = _start_body + + def _start_content_encoded(self, attrsD): + self.pushContent('content', attrsD, 'text/html', 1) + _start_fullitem = _start_content_encoded + + def _end_content(self): + copyToSummary = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types) + value = self.popContent('content') + if copyToSummary: + self._save('summary', value) + + _end_body = _end_content + _end_xhtml_body = _end_content + _end_content_encoded = _end_content + _end_fullitem = _end_content + _end_prodlink = _end_content + + def _start_itunes_image(self, attrsD): + self.push('itunes_image', 0) + if attrsD.get('href'): + self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')}) + _start_itunes_link = _start_itunes_image + + def _end_itunes_block(self): + value = self.pop('itunes_block', 0) + self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0 + + def _end_itunes_explicit(self): + value = self.pop('itunes_explicit', 0) + # Convert 'yes' -> True, 'clean' to False, and any other value to None + # False and None both evaluate as False, so the difference can be ignored + # by applications that only need to know if the content is explicit. + self._getContext()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0] + + def _start_media_content(self, attrsD): + context = self._getContext() + context.setdefault('media_content', []) + context['media_content'].append(attrsD) + + def _start_media_thumbnail(self, attrsD): + context = self._getContext() + context.setdefault('media_thumbnail', []) + self.push('url', 1) # new + context['media_thumbnail'].append(attrsD) + + def _end_media_thumbnail(self): + url = self.pop('url') + context = self._getContext() + if url != None and len(url.strip()) != 0: + if not context['media_thumbnail'][-1].has_key('url'): + context['media_thumbnail'][-1]['url'] = url + + def _start_media_player(self, attrsD): + self.push('media_player', 0) + self._getContext()['media_player'] = FeedParserDict(attrsD) + + def _end_media_player(self): + value = self.pop('media_player') + context = self._getContext() + context['media_player']['content'] = value + + def _start_newlocation(self, attrsD): + self.push('newlocation', 1) + + def _end_newlocation(self): + url = self.pop('newlocation') + context = self._getContext() + # don't set newlocation if the context isn't right + if context is not self.feeddata: + return + context['newlocation'] = _makeSafeAbsoluteURI(self.baseuri, url.strip()) + +if _XML_AVAILABLE: + class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler): + def __init__(self, baseuri, baselang, encoding): + if _debug: sys.stderr.write('trying StrictFeedParser\n') + xml.sax.handler.ContentHandler.__init__(self) + _FeedParserMixin.__init__(self, baseuri, baselang, encoding) + self.bozo = 0 + self.exc = None + self.decls = {} + + def startPrefixMapping(self, prefix, uri): + self.trackNamespace(prefix, uri) + if uri == 'http://www.w3.org/1999/xlink': + self.decls['xmlns:'+prefix] = uri + + def startElementNS(self, name, qname, attrs): + namespace, localname = name + lowernamespace = str(namespace or '').lower() + if lowernamespace.find('backend.userland.com/rss') <> -1: + # match any backend.userland.com namespace + namespace = 'http://backend.userland.com/rss' + lowernamespace = namespace + if qname and qname.find(':') > 0: + givenprefix = qname.split(':')[0] + else: + givenprefix = None + prefix = self._matchnamespaces.get(lowernamespace, givenprefix) + if givenprefix and (prefix is None or (prefix == '' and lowernamespace == '')) and not self.namespacesInUse.has_key(givenprefix): + raise UndeclaredNamespace, "'%s' is not associated with a namespace" % givenprefix + localname = str(localname).lower() + + # qname implementation is horribly broken in Python 2.1 (it + # doesn't report any), and slightly broken in Python 2.2 (it + # doesn't report the xml: namespace). So we match up namespaces + # with a known list first, and then possibly override them with + # the qnames the SAX parser gives us (if indeed it gives us any + # at all). Thanks to MatejC for helping me test this and + # tirelessly telling me that it didn't work yet. + attrsD, self.decls = self.decls, {} + if localname=='math' and namespace=='http://www.w3.org/1998/Math/MathML': + attrsD['xmlns']=namespace + if localname=='svg' and namespace=='http://www.w3.org/2000/svg': + attrsD['xmlns']=namespace + + if prefix: + localname = prefix.lower() + ':' + localname + elif namespace and not qname: #Expat + for name,value in self.namespacesInUse.items(): + if name and value == namespace: + localname = name + ':' + localname + break + if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname)) + + for (namespace, attrlocalname), attrvalue in attrs._attrs.items(): + lowernamespace = (namespace or '').lower() + prefix = self._matchnamespaces.get(lowernamespace, '') + if prefix: + attrlocalname = prefix + ':' + attrlocalname + attrsD[str(attrlocalname).lower()] = attrvalue + for qname in attrs.getQNames(): + attrsD[str(qname).lower()] = attrs.getValueByQName(qname) + self.unknown_starttag(localname, attrsD.items()) + + def characters(self, text): + self.handle_data(text) + + def endElementNS(self, name, qname): + namespace, localname = name + lowernamespace = str(namespace or '').lower() + if qname and qname.find(':') > 0: + givenprefix = qname.split(':')[0] + else: + givenprefix = '' + prefix = self._matchnamespaces.get(lowernamespace, givenprefix) + if prefix: + localname = prefix + ':' + localname + elif namespace and not qname: #Expat + for name,value in self.namespacesInUse.items(): + if name and value == namespace: + localname = name + ':' + localname + break + localname = str(localname).lower() + self.unknown_endtag(localname) + + def error(self, exc): + self.bozo = 1 + self.exc = exc + + def fatalError(self, exc): + self.error(exc) + raise exc + +class _BaseHTMLProcessor(sgmllib.SGMLParser): + special = re.compile('''[<>'"]''') + bare_ampersand = re.compile("&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)") + elements_no_end_tag = [ + 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', + 'hr', 'img', 'input', 'isindex', 'keygen', 'link', 'meta', 'param', + 'source', 'track', 'wbr' + ] + + def __init__(self, encoding, _type): + self.encoding = encoding + self._type = _type + if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding) + sgmllib.SGMLParser.__init__(self) + + def reset(self): + self.pieces = [] + sgmllib.SGMLParser.reset(self) + + def _shorttag_replace(self, match): + tag = match.group(1) + if tag in self.elements_no_end_tag: + return '<' + tag + ' />' + else: + return '<' + tag + '>' + + def parse_starttag(self,i): + j=sgmllib.SGMLParser.parse_starttag(self, i) + if self._type == 'application/xhtml+xml': + if j>2 and self.rawdata[j-2:j]=='/>': + self.unknown_endtag(self.lasttag) + return j + + def feed(self, data): + data = re.compile(r'', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace + data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) + data = data.replace(''', "'") + data = data.replace('"', '"') + try: + bytes + if bytes is str: + raise NameError + self.encoding = self.encoding + '_INVALID_PYTHON_3' + except NameError: + if self.encoding and type(data) == type(u''): + data = data.encode(self.encoding) + sgmllib.SGMLParser.feed(self, data) + sgmllib.SGMLParser.close(self) + + def normalize_attrs(self, attrs): + if not attrs: return attrs + # utility method to be called by descendants + attrs = dict([(k.lower(), v) for k, v in attrs]).items() + attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] + attrs.sort() + return attrs + + def unknown_starttag(self, tag, attrs): + # called for each start tag + # attrs is a list of (attr, value) tuples + # e.g. for
, tag='pre', attrs=[('class', 'screen')]
+        if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
+        uattrs = []
+        strattrs=''
+        if attrs:
+            for key, value in attrs:
+                value=value.replace('>','>').replace('<','<').replace('"','"')
+                value = self.bare_ampersand.sub("&", value)
+                # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
+                if type(value) != type(u''):
+                    try:
+                        value = unicode(value, self.encoding)
+                    except:
+                        value = unicode(value, 'iso-8859-1')
+                try:
+                    # Currently, in Python 3 the key is already a str, and cannot be decoded again
+                    uattrs.append((unicode(key, self.encoding), value))
+                except TypeError:
+                    uattrs.append((key, value))
+            strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs])
+            if self.encoding:
+                try:
+                    strattrs=strattrs.encode(self.encoding)
+                except:
+                    pass
+        if tag in self.elements_no_end_tag:
+            self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
+        else:
+            self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
+
+    def unknown_endtag(self, tag):
+        # called for each end tag, e.g. for 
, tag will be 'pre' + # Reconstruct the original end tag. + if tag not in self.elements_no_end_tag: + self.pieces.append("" % locals()) + + def handle_charref(self, ref): + # called for each character reference, e.g. for ' ', ref will be '160' + # Reconstruct the original character reference. + if ref.startswith('x'): + value = unichr(int(ref[1:],16)) + else: + value = unichr(int(ref)) + + if value in _cp1252.keys(): + self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:]) + else: + self.pieces.append('&#%(ref)s;' % locals()) + + def handle_entityref(self, ref): + # called for each entity reference, e.g. for '©', ref will be 'copy' + # Reconstruct the original entity reference. + if name2codepoint.has_key(ref): + self.pieces.append('&%(ref)s;' % locals()) + else: + self.pieces.append('&%(ref)s' % locals()) + + def handle_data(self, text): + # called for each block of plain text, i.e. outside of any tag and + # not containing any character or entity references + # Store the original text verbatim. + if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_data, text=%s\n' % text) + self.pieces.append(text) + + def handle_comment(self, text): + # called for each HTML comment, e.g. + # Reconstruct the original comment. + self.pieces.append('' % locals()) + + def handle_pi(self, text): + # called for each processing instruction, e.g. + # Reconstruct original processing instruction. + self.pieces.append('' % locals()) + + def handle_decl(self, text): + # called for the DOCTYPE, if present, e.g. + # + # Reconstruct original DOCTYPE + self.pieces.append('' % locals()) + + _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match + def _scan_name(self, i, declstartpos): + rawdata = self.rawdata + n = len(rawdata) + if i == n: + return None, -1 + m = self._new_declname_match(rawdata, i) + if m: + s = m.group() + name = s.strip() + if (i + len(s)) == n: + return None, -1 # end of buffer + return name.lower(), m.end() + else: + self.handle_data(rawdata) +# self.updatepos(declstartpos, i) + return None, -1 + + def convert_charref(self, name): + return '&#%s;' % name + + def convert_entityref(self, name): + return '&%s;' % name + + def output(self): + '''Return processed HTML as a single string''' + return ''.join([str(p) for p in self.pieces]) + + def parse_declaration(self, i): + try: + return sgmllib.SGMLParser.parse_declaration(self, i) + except sgmllib.SGMLParseError: + # escape the doctype declaration and continue parsing + self.handle_data('<') + return i+1 + +class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor): + def __init__(self, baseuri, baselang, encoding, entities): + sgmllib.SGMLParser.__init__(self) + _FeedParserMixin.__init__(self, baseuri, baselang, encoding) + _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml') + self.entities=entities + + def decodeEntities(self, element, data): + data = data.replace('<', '<') + data = data.replace('<', '<') + data = data.replace('<', '<') + data = data.replace('>', '>') + data = data.replace('>', '>') + data = data.replace('>', '>') + data = data.replace('&', '&') + data = data.replace('&', '&') + data = data.replace('"', '"') + data = data.replace('"', '"') + data = data.replace(''', ''') + data = data.replace(''', ''') + if self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): + data = data.replace('<', '<') + data = data.replace('>', '>') + data = data.replace('&', '&') + data = data.replace('"', '"') + data = data.replace(''', "'") + return data + + def strattrs(self, attrs): + return ''.join([' %s="%s"' % (n,v.replace('"','"')) for n,v in attrs]) + +class _MicroformatsParser: + STRING = 1 + DATE = 2 + URI = 3 + NODE = 4 + EMAIL = 5 + + known_xfn_relationships = ['contact', 'acquaintance', 'friend', 'met', 'co-worker', 'coworker', 'colleague', 'co-resident', 'coresident', 'neighbor', 'child', 'parent', 'sibling', 'brother', 'sister', 'spouse', 'wife', 'husband', 'kin', 'relative', 'muse', 'crush', 'date', 'sweetheart', 'me'] + known_binary_extensions = ['zip','rar','exe','gz','tar','tgz','tbz2','bz2','z','7z','dmg','img','sit','sitx','hqx','deb','rpm','bz2','jar','rar','iso','bin','msi','mp2','mp3','ogg','ogm','mp4','m4v','m4a','avi','wma','wmv'] + + def __init__(self, data, baseuri, encoding): + self.document = BeautifulSoup.BeautifulSoup(data) + self.baseuri = baseuri + self.encoding = encoding + if type(data) == type(u''): + data = data.encode(encoding) + self.tags = [] + self.enclosures = [] + self.xfn = [] + self.vcard = None + + def vcardEscape(self, s): + if type(s) in (type(''), type(u'')): + s = s.replace(',', '\\,').replace(';', '\\;').replace('\n', '\\n') + return s + + def vcardFold(self, s): + s = re.sub(';+$', '', s) + sFolded = '' + iMax = 75 + sPrefix = '' + while len(s) > iMax: + sFolded += sPrefix + s[:iMax] + '\n' + s = s[iMax:] + sPrefix = ' ' + iMax = 74 + sFolded += sPrefix + s + return sFolded + + def normalize(self, s): + return re.sub(r'\s+', ' ', s).strip() + + def unique(self, aList): + results = [] + for element in aList: + if element not in results: + results.append(element) + return results + + def toISO8601(self, dt): + return time.strftime('%Y-%m-%dT%H:%M:%SZ', dt) + + def getPropertyValue(self, elmRoot, sProperty, iPropertyType=4, bAllowMultiple=0, bAutoEscape=0): + all = lambda x: 1 + sProperty = sProperty.lower() + bFound = 0 + bNormalize = 1 + propertyMatch = {'class': re.compile(r'\b%s\b' % sProperty)} + if bAllowMultiple and (iPropertyType != self.NODE): + snapResults = [] + containers = elmRoot(['ul', 'ol'], propertyMatch) + for container in containers: + snapResults.extend(container('li')) + bFound = (len(snapResults) != 0) + if not bFound: + snapResults = elmRoot(all, propertyMatch) + bFound = (len(snapResults) != 0) + if (not bFound) and (sProperty == 'value'): + snapResults = elmRoot('pre') + bFound = (len(snapResults) != 0) + bNormalize = not bFound + if not bFound: + snapResults = [elmRoot] + bFound = (len(snapResults) != 0) + arFilter = [] + if sProperty == 'vcard': + snapFilter = elmRoot(all, propertyMatch) + for node in snapFilter: + if node.findParent(all, propertyMatch): + arFilter.append(node) + arResults = [] + for node in snapResults: + if node not in arFilter: + arResults.append(node) + bFound = (len(arResults) != 0) + if not bFound: + if bAllowMultiple: return [] + elif iPropertyType == self.STRING: return '' + elif iPropertyType == self.DATE: return None + elif iPropertyType == self.URI: return '' + elif iPropertyType == self.NODE: return None + else: return None + arValues = [] + for elmResult in arResults: + sValue = None + if iPropertyType == self.NODE: + if bAllowMultiple: + arValues.append(elmResult) + continue + else: + return elmResult + sNodeName = elmResult.name.lower() + if (iPropertyType == self.EMAIL) and (sNodeName == 'a'): + sValue = (elmResult.get('href') or '').split('mailto:').pop().split('?')[0] + if sValue: + sValue = bNormalize and self.normalize(sValue) or sValue.strip() + if (not sValue) and (sNodeName == 'abbr'): + sValue = elmResult.get('title') + if sValue: + sValue = bNormalize and self.normalize(sValue) or sValue.strip() + if (not sValue) and (iPropertyType == self.URI): + if sNodeName == 'a': sValue = elmResult.get('href') + elif sNodeName == 'img': sValue = elmResult.get('src') + elif sNodeName == 'object': sValue = elmResult.get('data') + if sValue: + sValue = bNormalize and self.normalize(sValue) or sValue.strip() + if (not sValue) and (sNodeName == 'img'): + sValue = elmResult.get('alt') + if sValue: + sValue = bNormalize and self.normalize(sValue) or sValue.strip() + if not sValue: + sValue = elmResult.renderContents() + sValue = re.sub(r'<\S[^>]*>', '', sValue) + sValue = sValue.replace('\r\n', '\n') + sValue = sValue.replace('\r', '\n') + if sValue: + sValue = bNormalize and self.normalize(sValue) or sValue.strip() + if not sValue: continue + if iPropertyType == self.DATE: + sValue = _parse_date_iso8601(sValue) + if bAllowMultiple: + arValues.append(bAutoEscape and self.vcardEscape(sValue) or sValue) + else: + return bAutoEscape and self.vcardEscape(sValue) or sValue + return arValues + + def findVCards(self, elmRoot, bAgentParsing=0): + sVCards = '' + + if not bAgentParsing: + arCards = self.getPropertyValue(elmRoot, 'vcard', bAllowMultiple=1) + else: + arCards = [elmRoot] + + for elmCard in arCards: + arLines = [] + + def processSingleString(sProperty): + sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1).decode(self.encoding) + if sValue: + arLines.append(self.vcardFold(sProperty.upper() + ':' + sValue)) + return sValue or u'' + + def processSingleURI(sProperty): + sValue = self.getPropertyValue(elmCard, sProperty, self.URI) + if sValue: + sContentType = '' + sEncoding = '' + sValueKey = '' + if sValue.startswith('data:'): + sEncoding = ';ENCODING=b' + sContentType = sValue.split(';')[0].split('/').pop() + sValue = sValue.split(',', 1).pop() + else: + elmValue = self.getPropertyValue(elmCard, sProperty) + if elmValue: + if sProperty != 'url': + sValueKey = ';VALUE=uri' + sContentType = elmValue.get('type', '').strip().split('/').pop().strip() + sContentType = sContentType.upper() + if sContentType == 'OCTET-STREAM': + sContentType = '' + if sContentType: + sContentType = ';TYPE=' + sContentType.upper() + arLines.append(self.vcardFold(sProperty.upper() + sEncoding + sContentType + sValueKey + ':' + sValue)) + + def processTypeValue(sProperty, arDefaultType, arForceType=None): + arResults = self.getPropertyValue(elmCard, sProperty, bAllowMultiple=1) + for elmResult in arResults: + arType = self.getPropertyValue(elmResult, 'type', self.STRING, 1, 1) + if arForceType: + arType = self.unique(arForceType + arType) + if not arType: + arType = arDefaultType + sValue = self.getPropertyValue(elmResult, 'value', self.EMAIL, 0) + if sValue: + arLines.append(self.vcardFold(sProperty.upper() + ';TYPE=' + ','.join(arType) + ':' + sValue)) + + # AGENT + # must do this before all other properties because it is destructive + # (removes nested class="vcard" nodes so they don't interfere with + # this vcard's other properties) + arAgent = self.getPropertyValue(elmCard, 'agent', bAllowMultiple=1) + for elmAgent in arAgent: + if re.compile(r'\bvcard\b').search(elmAgent.get('class')): + sAgentValue = self.findVCards(elmAgent, 1) + '\n' + sAgentValue = sAgentValue.replace('\n', '\\n') + sAgentValue = sAgentValue.replace(';', '\\;') + if sAgentValue: + arLines.append(self.vcardFold('AGENT:' + sAgentValue)) + # Completely remove the agent element from the parse tree + elmAgent.extract() + else: + sAgentValue = self.getPropertyValue(elmAgent, 'value', self.URI, bAutoEscape=1); + if sAgentValue: + arLines.append(self.vcardFold('AGENT;VALUE=uri:' + sAgentValue)) + + # FN (full name) + sFN = processSingleString('fn') + + # N (name) + elmName = self.getPropertyValue(elmCard, 'n') + if elmName: + sFamilyName = self.getPropertyValue(elmName, 'family-name', self.STRING, bAutoEscape=1) + sGivenName = self.getPropertyValue(elmName, 'given-name', self.STRING, bAutoEscape=1) + arAdditionalNames = self.getPropertyValue(elmName, 'additional-name', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'additional-names', self.STRING, 1, 1) + arHonorificPrefixes = self.getPropertyValue(elmName, 'honorific-prefix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-prefixes', self.STRING, 1, 1) + arHonorificSuffixes = self.getPropertyValue(elmName, 'honorific-suffix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-suffixes', self.STRING, 1, 1) + arLines.append(self.vcardFold('N:' + sFamilyName + ';' + + sGivenName + ';' + + ','.join(arAdditionalNames) + ';' + + ','.join(arHonorificPrefixes) + ';' + + ','.join(arHonorificSuffixes))) + elif sFN: + # implied "N" optimization + # http://microformats.org/wiki/hcard#Implied_.22N.22_Optimization + arNames = self.normalize(sFN).split() + if len(arNames) == 2: + bFamilyNameFirst = (arNames[0].endswith(',') or + len(arNames[1]) == 1 or + ((len(arNames[1]) == 2) and (arNames[1].endswith('.')))) + if bFamilyNameFirst: + arLines.append(self.vcardFold('N:' + arNames[0] + ';' + arNames[1])) + else: + arLines.append(self.vcardFold('N:' + arNames[1] + ';' + arNames[0])) + + # SORT-STRING + sSortString = self.getPropertyValue(elmCard, 'sort-string', self.STRING, bAutoEscape=1) + if sSortString: + arLines.append(self.vcardFold('SORT-STRING:' + sSortString)) + + # NICKNAME + arNickname = self.getPropertyValue(elmCard, 'nickname', self.STRING, 1, 1) + if arNickname: + arLines.append(self.vcardFold('NICKNAME:' + ','.join(arNickname))) + + # PHOTO + processSingleURI('photo') + + # BDAY + dtBday = self.getPropertyValue(elmCard, 'bday', self.DATE) + if dtBday: + arLines.append(self.vcardFold('BDAY:' + self.toISO8601(dtBday))) + + # ADR (address) + arAdr = self.getPropertyValue(elmCard, 'adr', bAllowMultiple=1) + for elmAdr in arAdr: + arType = self.getPropertyValue(elmAdr, 'type', self.STRING, 1, 1) + if not arType: + arType = ['intl','postal','parcel','work'] # default adr types, see RFC 2426 section 3.2.1 + sPostOfficeBox = self.getPropertyValue(elmAdr, 'post-office-box', self.STRING, 0, 1) + sExtendedAddress = self.getPropertyValue(elmAdr, 'extended-address', self.STRING, 0, 1) + sStreetAddress = self.getPropertyValue(elmAdr, 'street-address', self.STRING, 0, 1) + sLocality = self.getPropertyValue(elmAdr, 'locality', self.STRING, 0, 1) + sRegion = self.getPropertyValue(elmAdr, 'region', self.STRING, 0, 1) + sPostalCode = self.getPropertyValue(elmAdr, 'postal-code', self.STRING, 0, 1) + sCountryName = self.getPropertyValue(elmAdr, 'country-name', self.STRING, 0, 1) + arLines.append(self.vcardFold('ADR;TYPE=' + ','.join(arType) + ':' + + sPostOfficeBox + ';' + + sExtendedAddress + ';' + + sStreetAddress + ';' + + sLocality + ';' + + sRegion + ';' + + sPostalCode + ';' + + sCountryName)) + + # LABEL + processTypeValue('label', ['intl','postal','parcel','work']) + + # TEL (phone number) + processTypeValue('tel', ['voice']) + + # EMAIL + processTypeValue('email', ['internet'], ['internet']) + + # MAILER + processSingleString('mailer') + + # TZ (timezone) + processSingleString('tz') + + # GEO (geographical information) + elmGeo = self.getPropertyValue(elmCard, 'geo') + if elmGeo: + sLatitude = self.getPropertyValue(elmGeo, 'latitude', self.STRING, 0, 1) + sLongitude = self.getPropertyValue(elmGeo, 'longitude', self.STRING, 0, 1) + arLines.append(self.vcardFold('GEO:' + sLatitude + ';' + sLongitude)) + + # TITLE + processSingleString('title') + + # ROLE + processSingleString('role') + + # LOGO + processSingleURI('logo') + + # ORG (organization) + elmOrg = self.getPropertyValue(elmCard, 'org') + if elmOrg: + sOrganizationName = self.getPropertyValue(elmOrg, 'organization-name', self.STRING, 0, 1) + if not sOrganizationName: + # implied "organization-name" optimization + # http://microformats.org/wiki/hcard#Implied_.22organization-name.22_Optimization + sOrganizationName = self.getPropertyValue(elmCard, 'org', self.STRING, 0, 1) + if sOrganizationName: + arLines.append(self.vcardFold('ORG:' + sOrganizationName)) + else: + arOrganizationUnit = self.getPropertyValue(elmOrg, 'organization-unit', self.STRING, 1, 1) + arLines.append(self.vcardFold('ORG:' + sOrganizationName + ';' + ';'.join(arOrganizationUnit))) + + # CATEGORY + arCategory = self.getPropertyValue(elmCard, 'category', self.STRING, 1, 1) + self.getPropertyValue(elmCard, 'categories', self.STRING, 1, 1) + if arCategory: + arLines.append(self.vcardFold('CATEGORIES:' + ','.join(arCategory))) + + # NOTE + processSingleString('note') + + # REV + processSingleString('rev') + + # SOUND + processSingleURI('sound') + + # UID + processSingleString('uid') + + # URL + processSingleURI('url') + + # CLASS + processSingleString('class') + + # KEY + processSingleURI('key') + + if arLines: + arLines = [u'BEGIN:vCard',u'VERSION:3.0'] + arLines + [u'END:vCard'] + sVCards += u'\n'.join(arLines) + u'\n' + + return sVCards.strip() + + def isProbablyDownloadable(self, elm): + attrsD = elm.attrMap + if not attrsD.has_key('href'): return 0 + linktype = attrsD.get('type', '').strip() + if linktype.startswith('audio/') or \ + linktype.startswith('video/') or \ + (linktype.startswith('application/') and not linktype.endswith('xml')): + return 1 + path = urlparse.urlparse(attrsD['href'])[2] + if path.find('.') == -1: return 0 + fileext = path.split('.').pop().lower() + return fileext in self.known_binary_extensions + + def findTags(self): + all = lambda x: 1 + for elm in self.document(all, {'rel': re.compile(r'\btag\b')}): + href = elm.get('href') + if not href: continue + urlscheme, domain, path, params, query, fragment = \ + urlparse.urlparse(_urljoin(self.baseuri, href)) + segments = path.split('/') + tag = segments.pop() + if not tag: + tag = segments.pop() + tagscheme = urlparse.urlunparse((urlscheme, domain, '/'.join(segments), '', '', '')) + if not tagscheme.endswith('/'): + tagscheme += '/' + self.tags.append(FeedParserDict({"term": tag, "scheme": tagscheme, "label": elm.string or ''})) + + def findEnclosures(self): + all = lambda x: 1 + enclosure_match = re.compile(r'\benclosure\b') + for elm in self.document(all, {'href': re.compile(r'.+')}): + if not enclosure_match.search(elm.get('rel', '')) and not self.isProbablyDownloadable(elm): continue + if elm.attrMap not in self.enclosures: + self.enclosures.append(elm.attrMap) + if elm.string and not elm.get('title'): + self.enclosures[-1]['title'] = elm.string + + def findXFN(self): + all = lambda x: 1 + for elm in self.document(all, {'rel': re.compile('.+'), 'href': re.compile('.+')}): + rels = elm.get('rel', '').split() + xfn_rels = [] + for rel in rels: + if rel in self.known_xfn_relationships: + xfn_rels.append(rel) + if xfn_rels: + self.xfn.append({"relationships": xfn_rels, "href": elm.get('href', ''), "name": elm.string}) + +def _parseMicroformats(htmlSource, baseURI, encoding): + if not BeautifulSoup: return + if _debug: sys.stderr.write('entering _parseMicroformats\n') + try: + p = _MicroformatsParser(htmlSource, baseURI, encoding) + except UnicodeEncodeError: + # sgmllib throws this exception when performing lookups of tags + # with non-ASCII characters in them. + return + p.vcard = p.findVCards(p.document) + p.findTags() + p.findEnclosures() + p.findXFN() + return {"tags": p.tags, "enclosures": p.enclosures, "xfn": p.xfn, "vcard": p.vcard} + +class _RelativeURIResolver(_BaseHTMLProcessor): + relative_uris = [('a', 'href'), + ('applet', 'codebase'), + ('area', 'href'), + ('blockquote', 'cite'), + ('body', 'background'), + ('del', 'cite'), + ('form', 'action'), + ('frame', 'longdesc'), + ('frame', 'src'), + ('iframe', 'longdesc'), + ('iframe', 'src'), + ('head', 'profile'), + ('img', 'longdesc'), + ('img', 'src'), + ('img', 'usemap'), + ('input', 'src'), + ('input', 'usemap'), + ('ins', 'cite'), + ('link', 'href'), + ('object', 'classid'), + ('object', 'codebase'), + ('object', 'data'), + ('object', 'usemap'), + ('q', 'cite'), + ('script', 'src')] + + def __init__(self, baseuri, encoding, _type): + _BaseHTMLProcessor.__init__(self, encoding, _type) + self.baseuri = baseuri + + def resolveURI(self, uri): + return _makeSafeAbsoluteURI(_urljoin(self.baseuri, uri.strip())) + + def unknown_starttag(self, tag, attrs): + if _debug: + sys.stderr.write('tag: [%s] with attributes: [%s]\n' % (tag, str(attrs))) + attrs = self.normalize_attrs(attrs) + attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs] + _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) + +def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type): + if _debug: + sys.stderr.write('entering _resolveRelativeURIs\n') + + p = _RelativeURIResolver(baseURI, encoding, _type) + p.feed(htmlSource) + return p.output() + +def _makeSafeAbsoluteURI(base, rel=None): + # bail if ACCEPTABLE_URI_SCHEMES is empty + if not ACCEPTABLE_URI_SCHEMES: + return _urljoin(base, rel or u'') + if not base: + return rel or u'' + if not rel: + scheme = urlparse.urlparse(base)[0] + if not scheme or scheme in ACCEPTABLE_URI_SCHEMES: + return base + return u'' + uri = _urljoin(base, rel) + if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES: + return u'' + return uri + +class _HTMLSanitizer(_BaseHTMLProcessor): + acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', + 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', + 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', + 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', + 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', + 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1', + 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', + 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', + 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', + 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', + 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', + 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', + 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript'] + + acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', + 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis', + 'background', 'balance', 'bgcolor', 'bgproperties', 'border', + 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding', + 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff', + 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', 'cols', + 'colspan', 'compact', 'contenteditable', 'controls', 'coords', 'data', + 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', 'delay', + 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', 'face', 'for', + 'form', 'frame', 'galleryimg', 'gutter', 'headers', 'height', 'hidefocus', + 'hidden', 'high', 'href', 'hreflang', 'hspace', 'icon', 'id', 'inputmode', + 'ismap', 'keytype', 'label', 'leftspacing', 'lang', 'list', 'longdesc', + 'loop', 'loopcount', 'loopend', 'loopstart', 'low', 'lowsrc', 'max', + 'maxlength', 'media', 'method', 'min', 'multiple', 'name', 'nohref', + 'noshade', 'nowrap', 'open', 'optimum', 'pattern', 'ping', 'point-size', + 'prompt', 'pqg', 'radiogroup', 'readonly', 'rel', 'repeat-max', + 'repeat-min', 'replace', 'required', 'rev', 'rightspacing', 'rows', + 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', + 'start', 'step', 'summary', 'suppress', 'tabindex', 'target', 'template', + 'title', 'toppadding', 'type', 'unselectable', 'usemap', 'urn', 'valign', + 'value', 'variable', 'volume', 'vspace', 'vrml', 'width', 'wrap', + 'xml:lang'] + + unacceptable_elements_with_end_tag = ['script', 'applet', 'style'] + + acceptable_css_properties = ['azimuth', 'background-color', + 'border-bottom-color', 'border-collapse', 'border-color', + 'border-left-color', 'border-right-color', 'border-top-color', 'clear', + 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', + 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', + 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', + 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', + 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', + 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', + 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', + 'white-space', 'width'] + + # survey of common keywords found in feeds + acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue', + 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', + 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', + 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', + 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', + 'transparent', 'underline', 'white', 'yellow'] + + valid_css_values = re.compile('^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' + + '\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$') + + mathml_elements = ['annotation', 'annotation-xml', 'maction', 'math', + 'merror', 'mfenced', 'mfrac', 'mi', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', + 'mphantom', 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', + 'msub', 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', + 'munderover', 'none', 'semantics'] + + mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign', + 'columnalign', 'close', 'columnlines', 'columnspacing', 'columnspan', 'depth', + 'display', 'displaystyle', 'encoding', 'equalcolumns', 'equalrows', + 'fence', 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', + 'lspace', 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', + 'maxsize', 'minsize', 'open', 'other', 'rowalign', 'rowalign', 'rowalign', + 'rowlines', 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection', + 'separator', 'separators', 'stretchy', 'width', 'width', 'xlink:href', + 'xlink:show', 'xlink:type', 'xmlns', 'xmlns:xlink'] + + # svgtiny - foreignObject + linearGradient + radialGradient + stop + svg_elements = ['a', 'animate', 'animateColor', 'animateMotion', + 'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'foreignObject', + 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', + 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', 'mpath', + 'path', 'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop', + 'svg', 'switch', 'text', 'title', 'tspan', 'use'] + + # svgtiny + class + opacity + offset + xmlns + xmlns:xlink + svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic', + 'arabic-form', 'ascent', 'attributeName', 'attributeType', + 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height', + 'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd', 'dx', + 'dy', 'descent', 'display', 'dur', 'end', 'fill', 'fill-opacity', + 'fill-rule', 'font-family', 'font-size', 'font-stretch', 'font-style', + 'font-variant', 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', + 'glyph-name', 'gradientUnits', 'hanging', 'height', 'horiz-adv-x', + 'horiz-origin-x', 'id', 'ideographic', 'k', 'keyPoints', 'keySplines', + 'keyTimes', 'lang', 'mathematical', 'marker-end', 'marker-mid', + 'marker-start', 'markerHeight', 'markerUnits', 'markerWidth', 'max', + 'min', 'name', 'offset', 'opacity', 'orient', 'origin', + 'overline-position', 'overline-thickness', 'panose-1', 'path', + 'pathLength', 'points', 'preserveAspectRatio', 'r', 'refX', 'refY', + 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures', + 'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', + 'stop-color', 'stop-opacity', 'strikethrough-position', + 'strikethrough-thickness', 'stroke', 'stroke-dasharray', + 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', + 'stroke-miterlimit', 'stroke-opacity', 'stroke-width', 'systemLanguage', + 'target', 'text-anchor', 'to', 'transform', 'type', 'u1', 'u2', + 'underline-position', 'underline-thickness', 'unicode', 'unicode-range', + 'units-per-em', 'values', 'version', 'viewBox', 'visibility', 'width', + 'widths', 'x', 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole', + 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type', + 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', + 'y2', 'zoomAndPan'] + + svg_attr_map = None + svg_elem_map = None + + acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule', + 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', + 'stroke-opacity'] + + def reset(self): + _BaseHTMLProcessor.reset(self) + self.unacceptablestack = 0 + self.mathmlOK = 0 + self.svgOK = 0 + + def unknown_starttag(self, tag, attrs): + acceptable_attributes = self.acceptable_attributes + keymap = {} + if not tag in self.acceptable_elements or self.svgOK: + if tag in self.unacceptable_elements_with_end_tag: + self.unacceptablestack += 1 + + # add implicit namespaces to html5 inline svg/mathml + if self._type.endswith('html'): + if not dict(attrs).get('xmlns'): + if tag=='svg': + attrs.append( ('xmlns','http://www.w3.org/2000/svg') ) + if tag=='math': + attrs.append( ('xmlns','http://www.w3.org/1998/Math/MathML') ) + + # not otherwise acceptable, perhaps it is MathML or SVG? + if tag=='math' and ('xmlns','http://www.w3.org/1998/Math/MathML') in attrs: + self.mathmlOK += 1 + if tag=='svg' and ('xmlns','http://www.w3.org/2000/svg') in attrs: + self.svgOK += 1 + + # chose acceptable attributes based on tag class, else bail + if self.mathmlOK and tag in self.mathml_elements: + acceptable_attributes = self.mathml_attributes + elif self.svgOK and tag in self.svg_elements: + # for most vocabularies, lowercasing is a good idea. Many + # svg elements, however, are camel case + if not self.svg_attr_map: + lower=[attr.lower() for attr in self.svg_attributes] + mix=[a for a in self.svg_attributes if a not in lower] + self.svg_attributes = lower + self.svg_attr_map = dict([(a.lower(),a) for a in mix]) + + lower=[attr.lower() for attr in self.svg_elements] + mix=[a for a in self.svg_elements if a not in lower] + self.svg_elements = lower + self.svg_elem_map = dict([(a.lower(),a) for a in mix]) + acceptable_attributes = self.svg_attributes + tag = self.svg_elem_map.get(tag,tag) + keymap = self.svg_attr_map + elif not tag in self.acceptable_elements: + return + + # declare xlink namespace, if needed + if self.mathmlOK or self.svgOK: + if filter(lambda (n,v): n.startswith('xlink:'),attrs): + if not ('xmlns:xlink','http://www.w3.org/1999/xlink') in attrs: + attrs.append(('xmlns:xlink','http://www.w3.org/1999/xlink')) + + clean_attrs = [] + for key, value in self.normalize_attrs(attrs): + if key in acceptable_attributes: + key=keymap.get(key,key) + # make sure the uri uses an acceptable uri scheme + if key == u'href': + value = _makeSafeAbsoluteURI(value) + clean_attrs.append((key,value)) + elif key=='style': + clean_value = self.sanitize_style(value) + if clean_value: clean_attrs.append((key,clean_value)) + _BaseHTMLProcessor.unknown_starttag(self, tag, clean_attrs) + + def unknown_endtag(self, tag): + if not tag in self.acceptable_elements: + if tag in self.unacceptable_elements_with_end_tag: + self.unacceptablestack -= 1 + if self.mathmlOK and tag in self.mathml_elements: + if tag == 'math' and self.mathmlOK: self.mathmlOK -= 1 + elif self.svgOK and tag in self.svg_elements: + tag = self.svg_elem_map.get(tag,tag) + if tag == 'svg' and self.svgOK: self.svgOK -= 1 + else: + return + _BaseHTMLProcessor.unknown_endtag(self, tag) + + def handle_pi(self, text): + pass + + def handle_decl(self, text): + pass + + def handle_data(self, text): + if not self.unacceptablestack: + _BaseHTMLProcessor.handle_data(self, text) + + def sanitize_style(self, style): + # disallow urls + style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style) + + # gauntlet + if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return '' + # This replaced a regexp that used re.match and was prone to pathological back-tracking. + if re.sub("\s*[-\w]+\s*:\s*[^:;]*;?", '', style).strip(): return '' + + clean = [] + for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style): + if not value: continue + if prop.lower() in self.acceptable_css_properties: + clean.append(prop + ': ' + value + ';') + elif prop.split('-')[0].lower() in ['background','border','margin','padding']: + for keyword in value.split(): + if not keyword in self.acceptable_css_keywords and \ + not self.valid_css_values.match(keyword): + break + else: + clean.append(prop + ': ' + value + ';') + elif self.svgOK and prop.lower() in self.acceptable_svg_properties: + clean.append(prop + ': ' + value + ';') + + return ' '.join(clean) + + def parse_comment(self, i, report=1): + ret = _BaseHTMLProcessor.parse_comment(self, i, report) + if ret >= 0: + return ret + # if ret == -1, this may be a malicious attempt to circumvent + # sanitization, or a page-destroying unclosed comment + match = re.compile(r'--[^>]*>').search(self.rawdata, i+4) + if match: + return match.end() + # unclosed comment; deliberately fail to handle_data() + return len(self.rawdata) + + +def _sanitizeHTML(htmlSource, encoding, _type): + p = _HTMLSanitizer(encoding, _type) + htmlSource = htmlSource.replace(''): + data = data.split('>', 1)[1] + if data.count('= '2.3.3' + assert base64 != None + user, passw = _base64decode(req.headers['Authorization'].split(' ')[1]).split(':') + realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0] + self.add_password(realm, host, user, passw) + retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) + self.reset_retry_count() + return retry + except: + return self.http_error_default(req, fp, code, msg, headers) + +def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers): + """URL, filename, or string --> stream + + This function lets you define parsers that take any input source + (URL, pathname to local or network file, or actual data as a string) + and deal with it in a uniform manner. Returned object is guaranteed + to have all the basic stdio read methods (read, readline, readlines). + Just .close() the object when you're done with it. + + If the etag argument is supplied, it will be used as the value of an + If-None-Match request header. + + If the modified argument is supplied, it can be a tuple of 9 integers + (as returned by gmtime() in the standard Python time module) or a date + string in any format supported by feedparser. Regardless, it MUST + be in GMT (Greenwich Mean Time). It will be reformatted into an + RFC 1123-compliant date and used as the value of an If-Modified-Since + request header. + + If the agent argument is supplied, it will be used as the value of a + User-Agent request header. + + If the referrer argument is supplied, it will be used as the value of a + Referer[sic] request header. + + If handlers is supplied, it is a list of handlers used to build a + urllib2 opener. + + if request_headers is supplied it is a dictionary of HTTP request headers + that will override the values generated by FeedParser. + """ + + if hasattr(url_file_stream_or_string, 'read'): + return url_file_stream_or_string + + if url_file_stream_or_string == '-': + return sys.stdin + + if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'): + # Deal with the feed URI scheme + if url_file_stream_or_string.startswith('feed:http'): + url_file_stream_or_string = url_file_stream_or_string[5:] + elif url_file_stream_or_string.startswith('feed:'): + url_file_stream_or_string = 'http:' + url_file_stream_or_string[5:] + if not agent: + agent = USER_AGENT + # test for inline user:password for basic auth + auth = None + if base64: + urltype, rest = urllib.splittype(url_file_stream_or_string) + realhost, rest = urllib.splithost(rest) + if realhost: + user_passwd, realhost = urllib.splituser(realhost) + if user_passwd: + url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest) + auth = base64.standard_b64encode(user_passwd).strip() + + # iri support + try: + if isinstance(url_file_stream_or_string,unicode): + url_file_stream_or_string = url_file_stream_or_string.encode('idna').decode('utf-8') + else: + url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna').decode('utf-8') + except: + pass + + # try to open with urllib2 (to use optional headers) + request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, request_headers) + opener = apply(urllib2.build_opener, tuple(handlers + [_FeedURLHandler()])) + opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent + try: + return opener.open(request, timeout=15) + finally: + opener.close() # JohnD + + # try to open with native open function (if url_file_stream_or_string is a filename) + try: + return open(url_file_stream_or_string, 'rb') + except: + pass + + # treat url_file_stream_or_string as string + return _StringIO(str(url_file_stream_or_string)) + +def _build_urllib2_request(url, agent, etag, modified, referrer, auth, request_headers): + request = urllib2.Request(url) + request.add_header('User-Agent', agent) + if etag: + request.add_header('If-None-Match', etag) + if type(modified) == type(''): + modified = _parse_date(modified) + elif isinstance(modified, datetime.datetime): + modified = modified.utctimetuple() + if modified: + # format into an RFC 1123-compliant timestamp. We can't use + # time.strftime() since the %a and %b directives can be affected + # by the current locale, but RFC 2616 states that dates must be + # in English. + short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + request.add_header('If-Modified-Since', '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5])) + if referrer: + request.add_header('Referer', referrer) + if gzip and zlib: + request.add_header('Accept-encoding', 'gzip, deflate') + elif gzip: + request.add_header('Accept-encoding', 'gzip') + elif zlib: + request.add_header('Accept-encoding', 'deflate') + else: + request.add_header('Accept-encoding', '') + if auth: + request.add_header('Authorization', 'Basic %s' % auth) + if ACCEPT_HEADER: + request.add_header('Accept', ACCEPT_HEADER) + # use this for whatever -- cookies, special headers, etc + # [('Cookie','Something'),('x-special-header','Another Value')] + for header_name, header_value in request_headers.items(): + request.add_header(header_name, header_value) + request.add_header('A-IM', 'feed') # RFC 3229 support + return request + +_date_handlers = [] +def registerDateHandler(func): + '''Register a date handler function (takes string, returns 9-tuple date in GMT)''' + _date_handlers.insert(0, func) + +# ISO-8601 date parsing routines written by Fazal Majid. +# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601 +# parser is beyond the scope of feedparser and would be a worthwhile addition +# to the Python library. +# A single regular expression cannot parse ISO 8601 date formats into groups +# as the standard is highly irregular (for instance is 030104 2003-01-04 or +# 0301-04-01), so we use templates instead. +# Please note the order in templates is significant because we need a +# greedy match. +_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-0MM?-?DD', 'YYYY-MM', 'YYYY-?OOO', + 'YY-?MM-?DD', 'YY-?OOO', 'YYYY', + '-YY-?MM', '-OOO', '-YY', + '--MM-?DD', '--MM', + '---DD', + 'CC', ''] +_iso8601_re = [ + tmpl.replace( + 'YYYY', r'(?P\d{4})').replace( + 'YY', r'(?P\d\d)').replace( + 'MM', r'(?P[01]\d)').replace( + 'DD', r'(?P[0123]\d)').replace( + 'OOO', r'(?P[0123]\d\d)').replace( + 'CC', r'(?P\d\d$)') + + r'(T?(?P\d{2}):(?P\d{2})' + + r'(:(?P\d{2}))?' + + r'(\.(?P\d+))?' + + r'(?P[+-](?P\d{2})(:(?P\d{2}))?|Z)?)?' + for tmpl in _iso8601_tmpl] +try: + del tmpl +except NameError: + pass +_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re] +try: + del regex +except NameError: + pass +def _parse_date_iso8601(dateString): + '''Parse a variety of ISO-8601-compatible formats like 20040105''' + m = None + for _iso8601_match in _iso8601_matches: + m = _iso8601_match(dateString) + if m: break + if not m: return + if m.span() == (0, 0): return + params = m.groupdict() + ordinal = params.get('ordinal', 0) + if ordinal: + ordinal = int(ordinal) + else: + ordinal = 0 + year = params.get('year', '--') + if not year or year == '--': + year = time.gmtime()[0] + elif len(year) == 2: + # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993 + year = 100 * int(time.gmtime()[0] / 100) + int(year) + else: + year = int(year) + month = params.get('month', '-') + if not month or month == '-': + # ordinals are NOT normalized by mktime, we simulate them + # by setting month=1, day=ordinal + if ordinal: + month = 1 + else: + month = time.gmtime()[1] + month = int(month) + day = params.get('day', 0) + if not day: + # see above + if ordinal: + day = ordinal + elif params.get('century', 0) or \ + params.get('year', 0) or params.get('month', 0): + day = 1 + else: + day = time.gmtime()[2] + else: + day = int(day) + # special case of the century - is the first year of the 21st century + # 2000 or 2001 ? The debate goes on... + if 'century' in params.keys(): + year = (int(params['century']) - 1) * 100 + 1 + # in ISO 8601 most fields are optional + for field in ['hour', 'minute', 'second', 'tzhour', 'tzmin']: + if not params.get(field, None): + params[field] = 0 + hour = int(params.get('hour', 0)) + minute = int(params.get('minute', 0)) + second = int(float(params.get('second', 0))) + # weekday is normalized by mktime(), we can ignore it + weekday = 0 + daylight_savings_flag = -1 + tm = [year, month, day, hour, minute, second, weekday, + ordinal, daylight_savings_flag] + # ISO 8601 time zone adjustments + tz = params.get('tz') + if tz and tz != 'Z': + if tz[0] == '-': + tm[3] += int(params.get('tzhour', 0)) + tm[4] += int(params.get('tzmin', 0)) + elif tz[0] == '+': + tm[3] -= int(params.get('tzhour', 0)) + tm[4] -= int(params.get('tzmin', 0)) + else: + return None + # Python's time.mktime() is a wrapper around the ANSI C mktime(3c) + # which is guaranteed to normalize d/m/y/h/m/s. + # Many implementations have bugs, but we'll pretend they don't. + return time.localtime(time.mktime(tuple(tm))) +registerDateHandler(_parse_date_iso8601) + +# 8-bit date handling routines written by ytrewq1. +_korean_year = u'\ub144' # b3e2 in euc-kr +_korean_month = u'\uc6d4' # bff9 in euc-kr +_korean_day = u'\uc77c' # c0cf in euc-kr +_korean_am = u'\uc624\uc804' # bfc0 c0fc in euc-kr +_korean_pm = u'\uc624\ud6c4' # bfc0 c8c4 in euc-kr + +_korean_onblog_date_re = \ + re.compile('(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})' % \ + (_korean_year, _korean_month, _korean_day)) +_korean_nate_date_re = \ + re.compile(u'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})' % \ + (_korean_am, _korean_pm)) +def _parse_date_onblog(dateString): + '''Parse a string according to the OnBlog 8-bit date format''' + m = _korean_onblog_date_re.match(dateString) + if not m: return + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ + {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ + 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ + 'zonediff': '+09:00'} + if _debug: sys.stderr.write('OnBlog date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_onblog) + +def _parse_date_nate(dateString): + '''Parse a string according to the Nate 8-bit date format''' + m = _korean_nate_date_re.match(dateString) + if not m: return + hour = int(m.group(5)) + ampm = m.group(4) + if (ampm == _korean_pm): + hour += 12 + hour = str(hour) + if len(hour) == 1: + hour = '0' + hour + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ + {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ + 'hour': hour, 'minute': m.group(6), 'second': m.group(7),\ + 'zonediff': '+09:00'} + if _debug: sys.stderr.write('Nate date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_nate) + +_mssql_date_re = \ + re.compile('(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?') +def _parse_date_mssql(dateString): + '''Parse a string according to the MS SQL date format''' + m = _mssql_date_re.match(dateString) + if not m: return + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ + {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ + 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ + 'zonediff': '+09:00'} + if _debug: sys.stderr.write('MS SQL date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_mssql) + +# Unicode strings for Greek date strings +_greek_months = \ + { \ + u'\u0399\u03b1\u03bd': u'Jan', # c9e1ed in iso-8859-7 + u'\u03a6\u03b5\u03b2': u'Feb', # d6e5e2 in iso-8859-7 + u'\u039c\u03ac\u03ce': u'Mar', # ccdcfe in iso-8859-7 + u'\u039c\u03b1\u03ce': u'Mar', # cce1fe in iso-8859-7 + u'\u0391\u03c0\u03c1': u'Apr', # c1f0f1 in iso-8859-7 + u'\u039c\u03ac\u03b9': u'May', # ccdce9 in iso-8859-7 + u'\u039c\u03b1\u03ca': u'May', # cce1fa in iso-8859-7 + u'\u039c\u03b1\u03b9': u'May', # cce1e9 in iso-8859-7 + u'\u0399\u03bf\u03cd\u03bd': u'Jun', # c9effded in iso-8859-7 + u'\u0399\u03bf\u03bd': u'Jun', # c9efed in iso-8859-7 + u'\u0399\u03bf\u03cd\u03bb': u'Jul', # c9effdeb in iso-8859-7 + u'\u0399\u03bf\u03bb': u'Jul', # c9f9eb in iso-8859-7 + u'\u0391\u03cd\u03b3': u'Aug', # c1fde3 in iso-8859-7 + u'\u0391\u03c5\u03b3': u'Aug', # c1f5e3 in iso-8859-7 + u'\u03a3\u03b5\u03c0': u'Sep', # d3e5f0 in iso-8859-7 + u'\u039f\u03ba\u03c4': u'Oct', # cfeaf4 in iso-8859-7 + u'\u039d\u03bf\u03ad': u'Nov', # cdefdd in iso-8859-7 + u'\u039d\u03bf\u03b5': u'Nov', # cdefe5 in iso-8859-7 + u'\u0394\u03b5\u03ba': u'Dec', # c4e5ea in iso-8859-7 + } + +_greek_wdays = \ + { \ + u'\u039a\u03c5\u03c1': u'Sun', # caf5f1 in iso-8859-7 + u'\u0394\u03b5\u03c5': u'Mon', # c4e5f5 in iso-8859-7 + u'\u03a4\u03c1\u03b9': u'Tue', # d4f1e9 in iso-8859-7 + u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7 + u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7 + u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7 + u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7 + } + +_greek_date_format_re = \ + re.compile(u'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)') + +def _parse_date_greek(dateString): + '''Parse a string according to a Greek 8-bit date format.''' + m = _greek_date_format_re.match(dateString) + if not m: return + try: + wday = _greek_wdays[m.group(1)] + month = _greek_months[m.group(3)] + except: + return + rfc822date = '%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)s' % \ + {'wday': wday, 'day': m.group(2), 'month': month, 'year': m.group(4),\ + 'hour': m.group(5), 'minute': m.group(6), 'second': m.group(7),\ + 'zonediff': m.group(8)} + if _debug: sys.stderr.write('Greek date parsed as: %s\n' % rfc822date) + return _parse_date_rfc822(rfc822date) +registerDateHandler(_parse_date_greek) + +# Unicode strings for Hungarian date strings +_hungarian_months = \ + { \ + u'janu\u00e1r': u'01', # e1 in iso-8859-2 + u'febru\u00e1ri': u'02', # e1 in iso-8859-2 + u'm\u00e1rcius': u'03', # e1 in iso-8859-2 + u'\u00e1prilis': u'04', # e1 in iso-8859-2 + u'm\u00e1ujus': u'05', # e1 in iso-8859-2 + u'j\u00fanius': u'06', # fa in iso-8859-2 + u'j\u00falius': u'07', # fa in iso-8859-2 + u'augusztus': u'08', + u'szeptember': u'09', + u'okt\u00f3ber': u'10', # f3 in iso-8859-2 + u'november': u'11', + u'december': u'12', + } + +_hungarian_date_format_re = \ + re.compile(u'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))') + +def _parse_date_hungarian(dateString): + '''Parse a string according to a Hungarian 8-bit date format.''' + m = _hungarian_date_format_re.match(dateString) + if not m: return + try: + month = _hungarian_months[m.group(2)] + day = m.group(3) + if len(day) == 1: + day = '0' + day + hour = m.group(4) + if len(hour) == 1: + hour = '0' + hour + except: + return + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s%(zonediff)s' % \ + {'year': m.group(1), 'month': month, 'day': day,\ + 'hour': hour, 'minute': m.group(5),\ + 'zonediff': m.group(6)} + if _debug: sys.stderr.write('Hungarian date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_hungarian) + +# W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by +# Drake and licensed under the Python license. Removed all range checking +# for month, day, hour, minute, and second, since mktime will normalize +# these later +def _parse_date_w3dtf(dateString): + def __extract_date(m): + year = int(m.group('year')) + if year < 100: + year = 100 * int(time.gmtime()[0] / 100) + int(year) + if year < 1000: + return 0, 0, 0 + julian = m.group('julian') + if julian: + julian = int(julian) + month = julian / 30 + 1 + day = julian % 30 + 1 + jday = None + while jday != julian: + t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) + jday = time.gmtime(t)[-2] + diff = abs(jday - julian) + if jday > julian: + if diff < day: + day = day - diff + else: + month = month - 1 + day = 31 + elif jday < julian: + if day + diff < 28: + day = day + diff + else: + month = month + 1 + return year, month, day + month = m.group('month') + day = 1 + if month is None: + month = 1 + else: + month = int(month) + day = m.group('day') + if day: + day = int(day) + else: + day = 1 + return year, month, day + + def __extract_time(m): + if not m: + return 0, 0, 0 + hours = m.group('hours') + if not hours: + return 0, 0, 0 + hours = int(hours) + minutes = int(m.group('minutes')) + seconds = m.group('seconds') + if seconds: + seconds = int(seconds) + else: + seconds = 0 + return hours, minutes, seconds + + def __extract_tzd(m): + '''Return the Time Zone Designator as an offset in seconds from UTC.''' + if not m: + return 0 + tzd = m.group('tzd') + if not tzd: + return 0 + if tzd == 'Z': + return 0 + hours = int(m.group('tzdhours')) + minutes = m.group('tzdminutes') + if minutes: + minutes = int(minutes) + else: + minutes = 0 + offset = (hours*60 + minutes) * 60 + if tzd[0] == '+': + return -offset + return offset + + __date_re = ('(?P\d\d\d\d)' + '(?:(?P-|)' + '(?:(?P\d\d)(?:(?P=dsep)(?P\d\d))?' + '|(?P\d\d\d)))?') + __tzd_re = '(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)' + __tzd_rx = re.compile(__tzd_re) + __time_re = ('(?P\d\d)(?P:|)(?P\d\d)' + '(?:(?P=tsep)(?P\d\d)(?:[.,]\d+)?)?' + + __tzd_re) + __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) + __datetime_rx = re.compile(__datetime_re) + m = __datetime_rx.match(dateString) + if (m is None) or (m.group() != dateString): return + gmt = __extract_date(m) + __extract_time(m) + (0, 0, 0) + if gmt[0] == 0: return + return time.gmtime(time.mktime(gmt) + __extract_tzd(m) - time.timezone) +registerDateHandler(_parse_date_w3dtf) + +def _parse_date_rfc822(dateString): + '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' + data = dateString.split() + if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames: + del data[0] + if len(data) == 4: + s = data[3] + i = s.find('+') + if i > 0: + data[3:] = [s[:i], s[i+1:]] + else: + data.append('') + dateString = " ".join(data) + # Account for the Etc/GMT timezone by stripping 'Etc/' + elif len(data) == 5 and data[4].lower().startswith('etc/'): + data[4] = data[4][4:] + dateString = " ".join(data) + if len(data) < 5: + dateString += ' 00:00:00 GMT' + tm = rfc822.parsedate_tz(dateString) + if tm: + return time.gmtime(rfc822.mktime_tz(tm)) +# rfc822.py defines several time zones, but we define some extra ones. +# 'ET' is equivalent to 'EST', etc. +_additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -800} +rfc822._timezones.update(_additional_timezones) +registerDateHandler(_parse_date_rfc822) + +def _parse_date_perforce(aDateString): + """parse a date in yyyy/mm/dd hh:mm:ss TTT format""" + # Fri, 2006/09/15 08:19:53 EDT + _my_date_pattern = re.compile( \ + r'(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})') + + dow, year, month, day, hour, minute, second, tz = \ + _my_date_pattern.search(aDateString).groups() + months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + dateString = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz) + tm = rfc822.parsedate_tz(dateString) + if tm: + return time.gmtime(rfc822.mktime_tz(tm)) +registerDateHandler(_parse_date_perforce) + +def _parse_date(dateString): + '''Parses a variety of date formats into a 9-tuple in GMT''' + for handler in _date_handlers: + try: + date9tuple = handler(dateString) + if not date9tuple: continue + if len(date9tuple) != 9: + if _debug: sys.stderr.write('date handler function must return 9-tuple\n') + raise ValueError + map(int, date9tuple) + return date9tuple + except Exception, e: + if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e))) + pass + return None + +def _getCharacterEncoding(http_headers, xml_data): + '''Get the character encoding of the XML document + + http_headers is a dictionary + xml_data is a raw string (not Unicode) + + This is so much trickier than it sounds, it's not even funny. + According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type + is application/xml, application/*+xml, + application/xml-external-parsed-entity, or application/xml-dtd, + the encoding given in the charset parameter of the HTTP Content-Type + takes precedence over the encoding given in the XML prefix within the + document, and defaults to 'utf-8' if neither are specified. But, if + the HTTP Content-Type is text/xml, text/*+xml, or + text/xml-external-parsed-entity, the encoding given in the XML prefix + within the document is ALWAYS IGNORED and only the encoding given in + the charset parameter of the HTTP Content-Type header should be + respected, and it defaults to 'us-ascii' if not specified. + + Furthermore, discussion on the atom-syntax mailing list with the + author of RFC 3023 leads me to the conclusion that any document + served with a Content-Type of text/* and no charset parameter + must be treated as us-ascii. (We now do this.) And also that it + must always be flagged as non-well-formed. (We now do this too.) + + If Content-Type is unspecified (input was local file or non-HTTP source) + or unrecognized (server just got it totally wrong), then go by the + encoding given in the XML prefix of the document and default to + 'iso-8859-1' as per the HTTP specification (RFC 2616). + + Then, assuming we didn't find a character encoding in the HTTP headers + (and the HTTP Content-type allowed us to look in the body), we need + to sniff the first few bytes of the XML data and try to determine + whether the encoding is ASCII-compatible. Section F of the XML + specification shows the way here: + http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info + + If the sniffed encoding is not ASCII-compatible, we need to make it + ASCII compatible so that we can sniff further into the XML declaration + to find the encoding attribute, which will tell us the true encoding. + + Of course, none of this guarantees that we will be able to parse the + feed in the declared character encoding (assuming it was declared + correctly, which many are not). CJKCodecs and iconv_codec help a lot; + you should definitely install them if you can. + http://cjkpython.i18n.org/ + ''' + + def _parseHTTPContentType(content_type): + '''takes HTTP Content-Type header and returns (content type, charset) + + If no charset is specified, returns (content type, '') + If no content type is specified, returns ('', '') + Both return parameters are guaranteed to be lowercase strings + ''' + content_type = content_type or '' + content_type, params = cgi.parse_header(content_type) + return content_type, params.get('charset', '').replace("'", '') + + sniffed_xml_encoding = '' + xml_encoding = '' + true_encoding = '' + http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type', http_headers.get('Content-type'))) + # Must sniff for non-ASCII-compatible character encodings before + # searching for XML declaration. This heuristic is defined in + # section F of the XML specification: + # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info + try: + if xml_data[:4] == _l2bytes([0x4c, 0x6f, 0xa7, 0x94]): + # EBCDIC + xml_data = _ebcdic_to_ascii(xml_data) + elif xml_data[:4] == _l2bytes([0x00, 0x3c, 0x00, 0x3f]): + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xfe, 0xff])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x3f, 0x00]): + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xff, 0xfe])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == _l2bytes([0x00, 0x00, 0x00, 0x3c]): + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x00, 0x00]): + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' + xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + else: + # ASCII-compatible + pass + xml_encoding_match = re.compile(_s2bytes('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')).match(xml_data) + except: + xml_encoding_match = None + if xml_encoding_match: + xml_encoding = xml_encoding_match.groups()[0].decode('utf-8').lower() + if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')): + xml_encoding = sniffed_xml_encoding + acceptable_content_type = 0 + application_content_types = ('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity') + text_content_types = ('text/xml', 'text/xml-external-parsed-entity') + if (http_content_type in application_content_types) or \ + (http_content_type.startswith('application/') and http_content_type.endswith('+xml')): + acceptable_content_type = 1 + true_encoding = http_encoding or xml_encoding or 'utf-8' + elif (http_content_type in text_content_types) or \ + (http_content_type.startswith('text/')) and http_content_type.endswith('+xml'): + acceptable_content_type = 1 + true_encoding = http_encoding or 'us-ascii' + elif http_content_type.startswith('text/'): + true_encoding = http_encoding or 'us-ascii' + elif http_headers and (not (http_headers.has_key('content-type') or http_headers.has_key('Content-type'))): + true_encoding = xml_encoding or 'iso-8859-1' + else: + true_encoding = xml_encoding or 'utf-8' + # some feeds claim to be gb2312 but are actually gb18030. + # apparently MSIE and Firefox both do the following switch: + if true_encoding.lower() == 'gb2312': + true_encoding = 'gb18030' + return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type + +def _toUTF8(data, encoding): + '''Changes an XML data stream on the fly to specify a new encoding + + data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already + encoding is a string recognized by encodings.aliases + ''' + if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding) + # strip Byte Order Mark (if present) + if (len(data) >= 4) and (data[:2] == _l2bytes([0xfe, 0xff])) and (data[2:4] != _l2bytes([0x00, 0x00])): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-16be': + sys.stderr.write('trying utf-16be instead\n') + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == _l2bytes([0xff, 0xfe])) and (data[2:4] != _l2bytes([0x00, 0x00])): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-16le': + sys.stderr.write('trying utf-16le instead\n') + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-8': + sys.stderr.write('trying utf-8 instead\n') + encoding = 'utf-8' + data = data[3:] + elif data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-32be': + sys.stderr.write('trying utf-32be instead\n') + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-32le': + sys.stderr.write('trying utf-32le instead\n') + encoding = 'utf-32le' + data = data[4:] + newdata = unicode(data, encoding) + if _debug: sys.stderr.write('successfully converted %s data to unicode\n' % encoding) + declmatch = re.compile('^<\?xml[^>]*?>') + newdecl = '''''' + if declmatch.search(newdata): + newdata = declmatch.sub(newdecl, newdata) + else: + newdata = newdecl + u'\n' + newdata + return newdata.encode('utf-8') + +def _stripDoctype(data): + '''Strips DOCTYPE from XML document, returns (rss_version, stripped_data) + + rss_version may be 'rss091n' or None + stripped_data is the same XML document, minus the DOCTYPE + ''' + start = re.search(_s2bytes('<\w'), data) + start = start and start.start() or -1 + head,data = data[:start+1], data[start+1:] + + entity_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE) + entity_results=entity_pattern.findall(head) + head = entity_pattern.sub(_s2bytes(''), head) + doctype_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE) + doctype_results = doctype_pattern.findall(head) + doctype = doctype_results and doctype_results[0] or _s2bytes('') + if doctype.lower().count(_s2bytes('netscape')): + version = 'rss091n' + else: + version = None + + # only allow in 'safe' inline entity definitions + replacement=_s2bytes('') + if len(doctype_results)==1 and entity_results: + safe_pattern=re.compile(_s2bytes('\s+(\w+)\s+"(&#\w+;|[^&"]*)"')) + safe_entities=filter(lambda e: safe_pattern.match(e),entity_results) + if safe_entities: + replacement=_s2bytes('\n \n]>') + data = doctype_pattern.sub(replacement, head) + data + + return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)]) + +def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}): + '''Parse a feed from a URL, file, stream, or string. + + request_headers, if given, is a dict from http header name to value to add + to the request; this overrides internally generated values. + ''' + result = FeedParserDict() + result['feed'] = FeedParserDict() + result['entries'] = [] + if _XML_AVAILABLE: + result['bozo'] = 0 + if not isinstance(handlers, list): + handlers = [handlers] + try: + f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers) + data = f.read() + except Exception, e: + result['bozo'] = 1 + result['bozo_exception'] = e + data = None + f = None + + if hasattr(f, 'headers'): + result['headers'] = dict(f.headers) + # overwrite existing headers using response_headers + if 'headers' in result: + result['headers'].update(response_headers) + elif response_headers: + result['headers'] = copy.deepcopy(response_headers) + + # if feed is gzip-compressed, decompress it + if f and data and 'headers' in result: + if gzip and result['headers'].get('content-encoding') == 'gzip': + try: + data = gzip.GzipFile(fileobj=_StringIO(data)).read() + except Exception, e: + # Some feeds claim to be gzipped but they're not, so + # we get garbage. Ideally, we should re-request the + # feed without the 'Accept-encoding: gzip' header, + # but we don't. + result['bozo'] = 1 + result['bozo_exception'] = e + data = '' + elif zlib and result['headers'].get('content-encoding') == 'deflate': + try: + data = zlib.decompress(data, -zlib.MAX_WBITS) + except Exception, e: + result['bozo'] = 1 + result['bozo_exception'] = e + data = '' + + # save HTTP headers + if 'headers' in result: + if 'etag' in result['headers'] or 'ETag' in result['headers']: + etag = result['headers'].get('etag', result['headers'].get('ETag')) + if etag: + result['etag'] = etag + if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']: + modified = result['headers'].get('last-modified', result['headers'].get('Last-Modified')) + if modified: + result['modified'] = _parse_date(modified) + if hasattr(f, 'url'): + result['href'] = f.url + result['status'] = 200 + if hasattr(f, 'status'): + result['status'] = f.status + if hasattr(f, 'close'): + f.close() + + # there are four encodings to keep track of: + # - http_encoding is the encoding declared in the Content-Type HTTP header + # - xml_encoding is the encoding declared in the [:][ :.] +GNTP_INFO_LINE = re.compile( + 'GNTP/(?P\d+\.\d+) (?PREGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)' + + ' (?P[A-Z0-9]+(:(?P[A-F0-9]+))?) ?' + + '((?P[A-Z0-9]+):(?P[A-F0-9]+).(?P[A-F0-9]+))?\r\n', + re.IGNORECASE +) + +GNTP_INFO_LINE_SHORT = re.compile( + 'GNTP/(?P\d+\.\d+) (?PREGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)', + re.IGNORECASE +) + +GNTP_HEADER = re.compile('([\w-]+):(.+)') + +GNTP_EOL = gntp.shim.b('\r\n') +GNTP_SEP = gntp.shim.b(': ') + + +class _GNTPBuffer(gntp.shim.StringIO): + """GNTP Buffer class""" + def writeln(self, value=None): + if value: + self.write(gntp.shim.b(value)) + self.write(GNTP_EOL) + + def writeheader(self, key, value): + if not isinstance(value, str): + value = str(value) + self.write(gntp.shim.b(key)) + self.write(GNTP_SEP) + self.write(gntp.shim.b(value)) + self.write(GNTP_EOL) + + +class _GNTPBase(object): + """Base initilization + + :param string messagetype: GNTP Message type + :param string version: GNTP Protocol version + :param string encription: Encryption protocol + """ + def __init__(self, messagetype=None, version='1.0', encryption=None): + self.info = { + 'version': version, + 'messagetype': messagetype, + 'encryptionAlgorithmID': encryption + } + self.hash_algo = { + 'MD5': hashlib.md5, + 'SHA1': hashlib.sha1, + 'SHA256': hashlib.sha256, + 'SHA512': hashlib.sha512, + } + self.headers = {} + self.resources = {} + + def __str__(self): + return self.encode() + + def _parse_info(self, data): + """Parse the first line of a GNTP message to get security and other info values + + :param string data: GNTP Message + :return dict: Parsed GNTP Info line + """ + + match = GNTP_INFO_LINE.match(data) + + if not match: + raise errors.ParseError('ERROR_PARSING_INFO_LINE') + + info = match.groupdict() + if info['encryptionAlgorithmID'] == 'NONE': + info['encryptionAlgorithmID'] = None + + return info + + def set_password(self, password, encryptAlgo='MD5'): + """Set a password for a GNTP Message + + :param string password: Null to clear password + :param string encryptAlgo: Supports MD5, SHA1, SHA256, SHA512 + """ + if not password: + self.info['encryptionAlgorithmID'] = None + self.info['keyHashAlgorithm'] = None + return + + self.password = gntp.shim.b(password) + self.encryptAlgo = encryptAlgo.upper() + + if not self.encryptAlgo in self.hash_algo: + raise errors.UnsupportedError('INVALID HASH "%s"' % self.encryptAlgo) + + hashfunction = self.hash_algo.get(self.encryptAlgo) + + password = password.encode('utf8') + seed = time.ctime().encode('utf8') + salt = hashfunction(seed).hexdigest() + saltHash = hashfunction(seed).digest() + keyBasis = password + saltHash + key = hashfunction(keyBasis).digest() + keyHash = hashfunction(key).hexdigest() + + self.info['keyHashAlgorithmID'] = self.encryptAlgo + self.info['keyHash'] = keyHash.upper() + self.info['salt'] = salt.upper() + + def _decode_hex(self, value): + """Helper function to decode hex string to `proper` hex string + + :param string value: Human readable hex string + :return string: Hex string + """ + result = '' + for i in range(0, len(value), 2): + tmp = int(value[i:i + 2], 16) + result += chr(tmp) + return result + + def _decode_binary(self, rawIdentifier, identifier): + rawIdentifier += '\r\n\r\n' + dataLength = int(identifier['Length']) + pointerStart = self.raw.find(rawIdentifier) + len(rawIdentifier) + pointerEnd = pointerStart + dataLength + data = self.raw[pointerStart:pointerEnd] + if not len(data) == dataLength: + raise errors.ParseError('INVALID_DATA_LENGTH Expected: %s Recieved %s' % (dataLength, len(data))) + return data + + def _validate_password(self, password): + """Validate GNTP Message against stored password""" + self.password = password + if password is None: + raise errors.AuthError('Missing password') + keyHash = self.info.get('keyHash', None) + if keyHash is None and self.password is None: + return True + if keyHash is None: + raise errors.AuthError('Invalid keyHash') + if self.password is None: + raise errors.AuthError('Missing password') + + keyHashAlgorithmID = self.info.get('keyHashAlgorithmID','MD5') + + password = self.password.encode('utf8') + saltHash = self._decode_hex(self.info['salt']) + + keyBasis = password + saltHash + self.key = self.hash_algo[keyHashAlgorithmID](keyBasis).digest() + keyHash = self.hash_algo[keyHashAlgorithmID](self.key).hexdigest() + + if not keyHash.upper() == self.info['keyHash'].upper(): + raise errors.AuthError('Invalid Hash') + return True + + def validate(self): + """Verify required headers""" + for header in self._requiredHeaders: + if not self.headers.get(header, False): + raise errors.ParseError('Missing Notification Header: ' + header) + + def _format_info(self): + """Generate info line for GNTP Message + + :return string: + """ + info = 'GNTP/%s %s' % ( + self.info.get('version'), + self.info.get('messagetype'), + ) + if self.info.get('encryptionAlgorithmID', None): + info += ' %s:%s' % ( + self.info.get('encryptionAlgorithmID'), + self.info.get('ivValue'), + ) + else: + info += ' NONE' + + if self.info.get('keyHashAlgorithmID', None): + info += ' %s:%s.%s' % ( + self.info.get('keyHashAlgorithmID'), + self.info.get('keyHash'), + self.info.get('salt') + ) + + return info + + def _parse_dict(self, data): + """Helper function to parse blocks of GNTP headers into a dictionary + + :param string data: + :return dict: Dictionary of parsed GNTP Headers + """ + d = {} + for line in data.split('\r\n'): + match = GNTP_HEADER.match(line) + if not match: + continue + + key = match.group(1).strip() + val = match.group(2).strip() + d[key] = val + return d + + def add_header(self, key, value): + self.headers[key] = value + + def add_resource(self, data): + """Add binary resource + + :param string data: Binary Data + """ + data = gntp.shim.b(data) + identifier = hashlib.md5(data).hexdigest() + self.resources[identifier] = data + return 'x-growl-resource://%s' % identifier + + def decode(self, data, password=None): + """Decode GNTP Message + + :param string data: + """ + self.password = password + self.raw = gntp.shim.u(data) + parts = self.raw.split('\r\n\r\n') + self.info = self._parse_info(self.raw) + self.headers = self._parse_dict(parts[0]) + + def encode(self): + """Encode a generic GNTP Message + + :return string: GNTP Message ready to be sent. Returned as a byte string + """ + + buff = _GNTPBuffer() + + buff.writeln(self._format_info()) + + #Headers + for k, v in self.headers.items(): + buff.writeheader(k, v) + buff.writeln() + + #Resources + for resource, data in self.resources.items(): + buff.writeheader('Identifier', resource) + buff.writeheader('Length', len(data)) + buff.writeln() + buff.write(data) + buff.writeln() + buff.writeln() + + return buff.getvalue() + + +class GNTPRegister(_GNTPBase): + """Represents a GNTP Registration Command + + :param string data: (Optional) See decode() + :param string password: (Optional) Password to use while encoding/decoding messages + """ + _requiredHeaders = [ + 'Application-Name', + 'Notifications-Count' + ] + _requiredNotificationHeaders = ['Notification-Name'] + + def __init__(self, data=None, password=None): + _GNTPBase.__init__(self, 'REGISTER') + self.notifications = [] + + if data: + self.decode(data, password) + else: + self.set_password(password) + self.add_header('Application-Name', 'pygntp') + self.add_header('Notifications-Count', 0) + + def validate(self): + '''Validate required headers and validate notification headers''' + for header in self._requiredHeaders: + if not self.headers.get(header, False): + raise errors.ParseError('Missing Registration Header: ' + header) + for notice in self.notifications: + for header in self._requiredNotificationHeaders: + if not notice.get(header, False): + raise errors.ParseError('Missing Notification Header: ' + header) + + def decode(self, data, password): + """Decode existing GNTP Registration message + + :param string data: Message to decode + """ + self.raw = gntp.shim.u(data) + parts = self.raw.split('\r\n\r\n') + self.info = self._parse_info(self.raw) + self._validate_password(password) + self.headers = self._parse_dict(parts[0]) + + for i, part in enumerate(parts): + if i == 0: + continue # Skip Header + if part.strip() == '': + continue + notice = self._parse_dict(part) + if notice.get('Notification-Name', False): + self.notifications.append(notice) + elif notice.get('Identifier', False): + notice['Data'] = self._decode_binary(part, notice) + #open('register.png','wblol').write(notice['Data']) + self.resources[notice.get('Identifier')] = notice + + def add_notification(self, name, enabled=True): + """Add new Notification to Registration message + + :param string name: Notification Name + :param boolean enabled: Enable this notification by default + """ + notice = {} + notice['Notification-Name'] = name + notice['Notification-Enabled'] = enabled + + self.notifications.append(notice) + self.add_header('Notifications-Count', len(self.notifications)) + + def encode(self): + """Encode a GNTP Registration Message + + :return string: Encoded GNTP Registration message. Returned as a byte string + """ + + buff = _GNTPBuffer() + + buff.writeln(self._format_info()) + + #Headers + for k, v in self.headers.items(): + buff.writeheader(k, v) + buff.writeln() + + #Notifications + if len(self.notifications) > 0: + for notice in self.notifications: + for k, v in notice.items(): + buff.writeheader(k, v) + buff.writeln() + + #Resources + for resource, data in self.resources.items(): + buff.writeheader('Identifier', resource) + buff.writeheader('Length', len(data)) + buff.writeln() + buff.write(data) + buff.writeln() + buff.writeln() + + return buff.getvalue() + + +class GNTPNotice(_GNTPBase): + """Represents a GNTP Notification Command + + :param string data: (Optional) See decode() + :param string app: (Optional) Set Application-Name + :param string name: (Optional) Set Notification-Name + :param string title: (Optional) Set Notification Title + :param string password: (Optional) Password to use while encoding/decoding messages + """ + _requiredHeaders = [ + 'Application-Name', + 'Notification-Name', + 'Notification-Title' + ] + + def __init__(self, data=None, app=None, name=None, title=None, password=None): + _GNTPBase.__init__(self, 'NOTIFY') + + if data: + self.decode(data, password) + else: + self.set_password(password) + if app: + self.add_header('Application-Name', app) + if name: + self.add_header('Notification-Name', name) + if title: + self.add_header('Notification-Title', title) + + def decode(self, data, password): + """Decode existing GNTP Notification message + + :param string data: Message to decode. + """ + self.raw = gntp.shim.u(data) + parts = self.raw.split('\r\n\r\n') + self.info = self._parse_info(self.raw) + self._validate_password(password) + self.headers = self._parse_dict(parts[0]) + + for i, part in enumerate(parts): + if i == 0: + continue # Skip Header + if part.strip() == '': + continue + notice = self._parse_dict(part) + if notice.get('Identifier', False): + notice['Data'] = self._decode_binary(part, notice) + #open('notice.png','wblol').write(notice['Data']) + self.resources[notice.get('Identifier')] = notice + + +class GNTPSubscribe(_GNTPBase): + """Represents a GNTP Subscribe Command + + :param string data: (Optional) See decode() + :param string password: (Optional) Password to use while encoding/decoding messages + """ + _requiredHeaders = [ + 'Subscriber-ID', + 'Subscriber-Name', + ] + + def __init__(self, data=None, password=None): + _GNTPBase.__init__(self, 'SUBSCRIBE') + if data: + self.decode(data, password) + else: + self.set_password(password) + + +class GNTPOK(_GNTPBase): + """Represents a GNTP OK Response + + :param string data: (Optional) See _GNTPResponse.decode() + :param string action: (Optional) Set type of action the OK Response is for + """ + _requiredHeaders = ['Response-Action'] + + def __init__(self, data=None, action=None): + _GNTPBase.__init__(self, '-OK') + if data: + self.decode(data) + if action: + self.add_header('Response-Action', action) + + +class GNTPError(_GNTPBase): + """Represents a GNTP Error response + + :param string data: (Optional) See _GNTPResponse.decode() + :param string errorcode: (Optional) Error code + :param string errordesc: (Optional) Error Description + """ + _requiredHeaders = ['Error-Code', 'Error-Description'] + + def __init__(self, data=None, errorcode=None, errordesc=None): + _GNTPBase.__init__(self, '-ERROR') + if data: + self.decode(data) + if errorcode: + self.add_header('Error-Code', errorcode) + self.add_header('Error-Description', errordesc) + + def error(self): + return (self.headers.get('Error-Code', None), + self.headers.get('Error-Description', None)) + + +def parse_gntp(data, password=None): + """Attempt to parse a message as a GNTP message + + :param string data: Message to be parsed + :param string password: Optional password to be used to verify the message + """ + data = gntp.shim.u(data) + match = GNTP_INFO_LINE_SHORT.match(data) + if not match: + raise errors.ParseError('INVALID_GNTP_INFO') + info = match.groupdict() + if info['messagetype'] == 'REGISTER': + return GNTPRegister(data, password=password) + elif info['messagetype'] == 'NOTIFY': + return GNTPNotice(data, password=password) + elif info['messagetype'] == 'SUBSCRIBE': + return GNTPSubscribe(data, password=password) + elif info['messagetype'] == '-OK': + return GNTPOK(data) + elif info['messagetype'] == '-ERROR': + return GNTPError(data) + raise errors.ParseError('INVALID_GNTP_MESSAGE') diff --git a/lib/gntp/errors.py b/lib/gntp/errors.py new file mode 100644 index 00000000..c006fd68 --- /dev/null +++ b/lib/gntp/errors.py @@ -0,0 +1,25 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +class BaseError(Exception): + pass + + +class ParseError(BaseError): + errorcode = 500 + errordesc = 'Error parsing the message' + + +class AuthError(BaseError): + errorcode = 400 + errordesc = 'Error with authorization' + + +class UnsupportedError(BaseError): + errorcode = 500 + errordesc = 'Currently unsupported by gntp.py' + + +class NetworkError(BaseError): + errorcode = 500 + errordesc = "Error connecting to growl server" diff --git a/lib/gntp/notifier.py b/lib/gntp/notifier.py new file mode 100644 index 00000000..1719ecdf --- /dev/null +++ b/lib/gntp/notifier.py @@ -0,0 +1,265 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +""" +The gntp.notifier module is provided as a simple way to send notifications +using GNTP + +.. note:: + This class is intended to mostly mirror the older Python bindings such + that you should be able to replace instances of the old bindings with + this class. + `Original Python bindings `_ + +""" +import logging +import platform +import socket +import sys + +from gntp.version import __version__ +import gntp.core +import gntp.errors as errors +import gntp.shim + +__all__ = [ + 'mini', + 'GrowlNotifier', +] + +logger = logging.getLogger(__name__) + + +class GrowlNotifier(object): + """Helper class to simplfy sending Growl messages + + :param string applicationName: Sending application name + :param list notification: List of valid notifications + :param list defaultNotifications: List of notifications that should be enabled + by default + :param string applicationIcon: Icon URL + :param string hostname: Remote host + :param integer port: Remote port + """ + + passwordHash = 'MD5' + socketTimeout = 3 + + def __init__(self, applicationName='Python GNTP', notifications=[], + defaultNotifications=None, applicationIcon=None, hostname='localhost', + password=None, port=23053): + + self.applicationName = applicationName + self.notifications = list(notifications) + if defaultNotifications: + self.defaultNotifications = list(defaultNotifications) + else: + self.defaultNotifications = self.notifications + self.applicationIcon = applicationIcon + + self.password = password + self.hostname = hostname + self.port = int(port) + + def _checkIcon(self, data): + ''' + Check the icon to see if it's valid + + If it's a simple URL icon, then we return True. If it's a data icon + then we return False + ''' + logger.info('Checking icon') + return gntp.shim.u(data).startswith('http') + + def register(self): + """Send GNTP Registration + + .. warning:: + Before sending notifications to Growl, you need to have + sent a registration message at least once + """ + logger.info('Sending registration to %s:%s', self.hostname, self.port) + register = gntp.core.GNTPRegister() + register.add_header('Application-Name', self.applicationName) + for notification in self.notifications: + enabled = notification in self.defaultNotifications + register.add_notification(notification, enabled) + if self.applicationIcon: + if self._checkIcon(self.applicationIcon): + register.add_header('Application-Icon', self.applicationIcon) + else: + resource = register.add_resource(self.applicationIcon) + register.add_header('Application-Icon', resource) + if self.password: + register.set_password(self.password, self.passwordHash) + self.add_origin_info(register) + self.register_hook(register) + return self._send('register', register) + + def notify(self, noteType, title, description, icon=None, sticky=False, + priority=None, callback=None, identifier=None, custom={}): + """Send a GNTP notifications + + .. warning:: + Must have registered with growl beforehand or messages will be ignored + + :param string noteType: One of the notification names registered earlier + :param string title: Notification title (usually displayed on the notification) + :param string description: The main content of the notification + :param string icon: Icon URL path + :param boolean sticky: Sticky notification + :param integer priority: Message priority level from -2 to 2 + :param string callback: URL callback + :param dict custom: Custom attributes. Key names should be prefixed with X- + according to the spec but this is not enforced by this class + + .. warning:: + For now, only URL callbacks are supported. In the future, the + callback argument will also support a function + """ + logger.info('Sending notification [%s] to %s:%s', noteType, self.hostname, self.port) + assert noteType in self.notifications + notice = gntp.core.GNTPNotice() + notice.add_header('Application-Name', self.applicationName) + notice.add_header('Notification-Name', noteType) + notice.add_header('Notification-Title', title) + if self.password: + notice.set_password(self.password, self.passwordHash) + if sticky: + notice.add_header('Notification-Sticky', sticky) + if priority: + notice.add_header('Notification-Priority', priority) + if icon: + if self._checkIcon(icon): + notice.add_header('Notification-Icon', icon) + else: + resource = notice.add_resource(icon) + notice.add_header('Notification-Icon', resource) + + if description: + notice.add_header('Notification-Text', description) + if callback: + notice.add_header('Notification-Callback-Target', callback) + if identifier: + notice.add_header('Notification-Coalescing-ID', identifier) + + for key in custom: + notice.add_header(key, custom[key]) + + self.add_origin_info(notice) + self.notify_hook(notice) + + return self._send('notify', notice) + + def subscribe(self, id, name, port): + """Send a Subscribe request to a remote machine""" + sub = gntp.core.GNTPSubscribe() + sub.add_header('Subscriber-ID', id) + sub.add_header('Subscriber-Name', name) + sub.add_header('Subscriber-Port', port) + if self.password: + sub.set_password(self.password, self.passwordHash) + + self.add_origin_info(sub) + self.subscribe_hook(sub) + + return self._send('subscribe', sub) + + def add_origin_info(self, packet): + """Add optional Origin headers to message""" + packet.add_header('Origin-Machine-Name', platform.node()) + packet.add_header('Origin-Software-Name', 'gntp.py') + packet.add_header('Origin-Software-Version', __version__) + packet.add_header('Origin-Platform-Name', platform.system()) + packet.add_header('Origin-Platform-Version', platform.platform()) + + def register_hook(self, packet): + pass + + def notify_hook(self, packet): + pass + + def subscribe_hook(self, packet): + pass + + def _send(self, messagetype, packet): + """Send the GNTP Packet""" + + packet.validate() + data = packet.encode() + + logger.debug('To : %s:%s <%s>\n%s', self.hostname, self.port, packet.__class__, data) + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(self.socketTimeout) + try: + s.connect((self.hostname, self.port)) + s.send(data) + recv_data = s.recv(1024) + while not recv_data.endswith(gntp.shim.b("\r\n\r\n")): + recv_data += s.recv(1024) + except socket.error: + # Python2.5 and Python3 compatibile exception + exc = sys.exc_info()[1] + raise errors.NetworkError(exc) + + response = gntp.core.parse_gntp(recv_data) + s.close() + + logger.debug('From : %s:%s <%s>\n%s', self.hostname, self.port, response.__class__, response) + + if type(response) == gntp.core.GNTPOK: + return True + logger.error('Invalid response: %s', response.error()) + return response.error() + + +def mini(description, applicationName='PythonMini', noteType="Message", + title="Mini Message", applicationIcon=None, hostname='localhost', + password=None, port=23053, sticky=False, priority=None, + callback=None, notificationIcon=None, identifier=None, + notifierFactory=GrowlNotifier): + """Single notification function + + Simple notification function in one line. Has only one required parameter + and attempts to use reasonable defaults for everything else + :param string description: Notification message + + .. warning:: + For now, only URL callbacks are supported. In the future, the + callback argument will also support a function + """ + try: + growl = notifierFactory( + applicationName=applicationName, + notifications=[noteType], + defaultNotifications=[noteType], + applicationIcon=applicationIcon, + hostname=hostname, + password=password, + port=port, + ) + result = growl.register() + if result is not True: + return result + + return growl.notify( + noteType=noteType, + title=title, + description=description, + icon=notificationIcon, + sticky=sticky, + priority=priority, + callback=callback, + identifier=identifier, + ) + except Exception: + # We want the "mini" function to be simple and swallow Exceptions + # in order to be less invasive + logger.exception("Growl error") + +if __name__ == '__main__': + # If we're running this module directly we're likely running it as a test + # so extra debugging is useful + logging.basicConfig(level=logging.INFO) + mini('Testing mini notification') diff --git a/lib/gntp/shim.py b/lib/gntp/shim.py new file mode 100644 index 00000000..3a387828 --- /dev/null +++ b/lib/gntp/shim.py @@ -0,0 +1,45 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +""" +Python2.5 and Python3.3 compatibility shim + +Heavily inspirted by the "six" library. +https://pypi.python.org/pypi/six +""" + +import sys + +PY3 = sys.version_info[0] == 3 + +if PY3: + def b(s): + if isinstance(s, bytes): + return s + return s.encode('utf8', 'replace') + + def u(s): + if isinstance(s, bytes): + return s.decode('utf8', 'replace') + return s + + from io import BytesIO as StringIO + from configparser import RawConfigParser +else: + def b(s): + if isinstance(s, unicode): + return s.encode('utf8', 'replace') + return s + + def u(s): + if isinstance(s, unicode): + return s + if isinstance(s, int): + s = str(s) + return unicode(s, "utf8", "replace") + + from StringIO import StringIO + from ConfigParser import RawConfigParser + +b.__doc__ = "Ensure we have a byte string" +u.__doc__ = "Ensure we have a unicode string" diff --git a/lib/gntp/version.py b/lib/gntp/version.py new file mode 100644 index 00000000..2166aaca --- /dev/null +++ b/lib/gntp/version.py @@ -0,0 +1,4 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +__version__ = '1.0.2' diff --git a/lib/html5lib/__init__.py b/lib/html5lib/__init__.py new file mode 100644 index 00000000..3ba1163c --- /dev/null +++ b/lib/html5lib/__init__.py @@ -0,0 +1,25 @@ +""" +HTML parsing library based on the WHATWG "HTML5" +specification. The parser is designed to be compatible with existing +HTML found in the wild and implements well-defined error recovery that +is largely compatible with modern desktop web browsers. + +Example usage: + +import html5lib +f = open("my_document.html") +tree = html5lib.parse(f) +""" + +from __future__ import absolute_import, division, unicode_literals + +from .html5parser import HTMLParser, parse, parseFragment +from .treebuilders import getTreeBuilder +from .treewalkers import getTreeWalker +from .serializer import serialize + +__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", + "getTreeWalker", "serialize"] + +# this has to be at the top level, see how setup.py parses this +__version__ = "0.999999" diff --git a/lib/html5lib/constants.py b/lib/html5lib/constants.py new file mode 100644 index 00000000..d938e0ae --- /dev/null +++ b/lib/html5lib/constants.py @@ -0,0 +1,3102 @@ +from __future__ import absolute_import, division, unicode_literals + +import string + +EOF = None + +E = { + "null-character": + "Null character in input stream, replaced with U+FFFD.", + "invalid-codepoint": + "Invalid codepoint in stream.", + "incorrectly-placed-solidus": + "Solidus (/) incorrectly placed in tag.", + "incorrect-cr-newline-entity": + "Incorrect CR newline entity, replaced with LF.", + "illegal-windows-1252-entity": + "Entity used with illegal number (windows-1252 reference).", + "cant-convert-numeric-entity": + "Numeric entity couldn't be converted to character " + "(codepoint U+%(charAsInt)08x).", + "illegal-codepoint-for-numeric-entity": + "Numeric entity represents an illegal codepoint: " + "U+%(charAsInt)08x.", + "numeric-entity-without-semicolon": + "Numeric entity didn't end with ';'.", + "expected-numeric-entity-but-got-eof": + "Numeric entity expected. Got end of file instead.", + "expected-numeric-entity": + "Numeric entity expected but none found.", + "named-entity-without-semicolon": + "Named entity didn't end with ';'.", + "expected-named-entity": + "Named entity expected. Got none.", + "attributes-in-end-tag": + "End tag contains unexpected attributes.", + 'self-closing-flag-on-end-tag': + "End tag contains unexpected self-closing flag.", + "expected-tag-name-but-got-right-bracket": + "Expected tag name. Got '>' instead.", + "expected-tag-name-but-got-question-mark": + "Expected tag name. Got '?' instead. (HTML doesn't " + "support processing instructions.)", + "expected-tag-name": + "Expected tag name. Got something else instead", + "expected-closing-tag-but-got-right-bracket": + "Expected closing tag. Got '>' instead. Ignoring ''.", + "expected-closing-tag-but-got-eof": + "Expected closing tag. Unexpected end of file.", + "expected-closing-tag-but-got-char": + "Expected closing tag. Unexpected character '%(data)s' found.", + "eof-in-tag-name": + "Unexpected end of file in the tag name.", + "expected-attribute-name-but-got-eof": + "Unexpected end of file. Expected attribute name instead.", + "eof-in-attribute-name": + "Unexpected end of file in attribute name.", + "invalid-character-in-attribute-name": + "Invalid character in attribute name", + "duplicate-attribute": + "Dropped duplicate attribute on tag.", + "expected-end-of-tag-name-but-got-eof": + "Unexpected end of file. Expected = or end of tag.", + "expected-attribute-value-but-got-eof": + "Unexpected end of file. Expected attribute value.", + "expected-attribute-value-but-got-right-bracket": + "Expected attribute value. Got '>' instead.", + 'equals-in-unquoted-attribute-value': + "Unexpected = in unquoted attribute", + 'unexpected-character-in-unquoted-attribute-value': + "Unexpected character in unquoted attribute", + "invalid-character-after-attribute-name": + "Unexpected character after attribute name.", + "unexpected-character-after-attribute-value": + "Unexpected character after attribute value.", + "eof-in-attribute-value-double-quote": + "Unexpected end of file in attribute value (\").", + "eof-in-attribute-value-single-quote": + "Unexpected end of file in attribute value (').", + "eof-in-attribute-value-no-quotes": + "Unexpected end of file in attribute value.", + "unexpected-EOF-after-solidus-in-tag": + "Unexpected end of file in tag. Expected >", + "unexpected-character-after-solidus-in-tag": + "Unexpected character after / in tag. Expected >", + "expected-dashes-or-doctype": + "Expected '--' or 'DOCTYPE'. Not found.", + "unexpected-bang-after-double-dash-in-comment": + "Unexpected ! after -- in comment", + "unexpected-space-after-double-dash-in-comment": + "Unexpected space after -- in comment", + "incorrect-comment": + "Incorrect comment.", + "eof-in-comment": + "Unexpected end of file in comment.", + "eof-in-comment-end-dash": + "Unexpected end of file in comment (-)", + "unexpected-dash-after-double-dash-in-comment": + "Unexpected '-' after '--' found in comment.", + "eof-in-comment-double-dash": + "Unexpected end of file in comment (--).", + "eof-in-comment-end-space-state": + "Unexpected end of file in comment.", + "eof-in-comment-end-bang-state": + "Unexpected end of file in comment.", + "unexpected-char-in-comment": + "Unexpected character in comment found.", + "need-space-after-doctype": + "No space after literal string 'DOCTYPE'.", + "expected-doctype-name-but-got-right-bracket": + "Unexpected > character. Expected DOCTYPE name.", + "expected-doctype-name-but-got-eof": + "Unexpected end of file. Expected DOCTYPE name.", + "eof-in-doctype-name": + "Unexpected end of file in DOCTYPE name.", + "eof-in-doctype": + "Unexpected end of file in DOCTYPE.", + "expected-space-or-right-bracket-in-doctype": + "Expected space or '>'. Got '%(data)s'", + "unexpected-end-of-doctype": + "Unexpected end of DOCTYPE.", + "unexpected-char-in-doctype": + "Unexpected character in DOCTYPE.", + "eof-in-innerhtml": + "XXX innerHTML EOF", + "unexpected-doctype": + "Unexpected DOCTYPE. Ignored.", + "non-html-root": + "html needs to be the first start tag.", + "expected-doctype-but-got-eof": + "Unexpected End of file. Expected DOCTYPE.", + "unknown-doctype": + "Erroneous DOCTYPE.", + "expected-doctype-but-got-chars": + "Unexpected non-space characters. Expected DOCTYPE.", + "expected-doctype-but-got-start-tag": + "Unexpected start tag (%(name)s). Expected DOCTYPE.", + "expected-doctype-but-got-end-tag": + "Unexpected end tag (%(name)s). Expected DOCTYPE.", + "end-tag-after-implied-root": + "Unexpected end tag (%(name)s) after the (implied) root element.", + "expected-named-closing-tag-but-got-eof": + "Unexpected end of file. Expected end tag (%(name)s).", + "two-heads-are-not-better-than-one": + "Unexpected start tag head in existing head. Ignored.", + "unexpected-end-tag": + "Unexpected end tag (%(name)s). Ignored.", + "unexpected-start-tag-out-of-my-head": + "Unexpected start tag (%(name)s) that can be in head. Moved.", + "unexpected-start-tag": + "Unexpected start tag (%(name)s).", + "missing-end-tag": + "Missing end tag (%(name)s).", + "missing-end-tags": + "Missing end tags (%(name)s).", + "unexpected-start-tag-implies-end-tag": + "Unexpected start tag (%(startName)s) " + "implies end tag (%(endName)s).", + "unexpected-start-tag-treated-as": + "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", + "deprecated-tag": + "Unexpected start tag %(name)s. Don't use it!", + "unexpected-start-tag-ignored": + "Unexpected start tag %(name)s. Ignored.", + "expected-one-end-tag-but-got-another": + "Unexpected end tag (%(gotName)s). " + "Missing end tag (%(expectedName)s).", + "end-tag-too-early": + "End tag (%(name)s) seen too early. Expected other end tag.", + "end-tag-too-early-named": + "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", + "end-tag-too-early-ignored": + "End tag (%(name)s) seen too early. Ignored.", + "adoption-agency-1.1": + "End tag (%(name)s) violates step 1, " + "paragraph 1 of the adoption agency algorithm.", + "adoption-agency-1.2": + "End tag (%(name)s) violates step 1, " + "paragraph 2 of the adoption agency algorithm.", + "adoption-agency-1.3": + "End tag (%(name)s) violates step 1, " + "paragraph 3 of the adoption agency algorithm.", + "adoption-agency-4.4": + "End tag (%(name)s) violates step 4, " + "paragraph 4 of the adoption agency algorithm.", + "unexpected-end-tag-treated-as": + "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", + "no-end-tag": + "This element (%(name)s) has no end tag.", + "unexpected-implied-end-tag-in-table": + "Unexpected implied end tag (%(name)s) in the table phase.", + "unexpected-implied-end-tag-in-table-body": + "Unexpected implied end tag (%(name)s) in the table body phase.", + "unexpected-char-implies-table-voodoo": + "Unexpected non-space characters in " + "table context caused voodoo mode.", + "unexpected-hidden-input-in-table": + "Unexpected input with type hidden in table context.", + "unexpected-form-in-table": + "Unexpected form in table context.", + "unexpected-start-tag-implies-table-voodoo": + "Unexpected start tag (%(name)s) in " + "table context caused voodoo mode.", + "unexpected-end-tag-implies-table-voodoo": + "Unexpected end tag (%(name)s) in " + "table context caused voodoo mode.", + "unexpected-cell-in-table-body": + "Unexpected table cell start tag (%(name)s) " + "in the table body phase.", + "unexpected-cell-end-tag": + "Got table cell end tag (%(name)s) " + "while required end tags are missing.", + "unexpected-end-tag-in-table-body": + "Unexpected end tag (%(name)s) in the table body phase. Ignored.", + "unexpected-implied-end-tag-in-table-row": + "Unexpected implied end tag (%(name)s) in the table row phase.", + "unexpected-end-tag-in-table-row": + "Unexpected end tag (%(name)s) in the table row phase. Ignored.", + "unexpected-select-in-select": + "Unexpected select start tag in the select phase " + "treated as select end tag.", + "unexpected-input-in-select": + "Unexpected input start tag in the select phase.", + "unexpected-start-tag-in-select": + "Unexpected start tag token (%(name)s in the select phase. " + "Ignored.", + "unexpected-end-tag-in-select": + "Unexpected end tag (%(name)s) in the select phase. Ignored.", + "unexpected-table-element-start-tag-in-select-in-table": + "Unexpected table element start tag (%(name)s) in the select in table phase.", + "unexpected-table-element-end-tag-in-select-in-table": + "Unexpected table element end tag (%(name)s) in the select in table phase.", + "unexpected-char-after-body": + "Unexpected non-space characters in the after body phase.", + "unexpected-start-tag-after-body": + "Unexpected start tag token (%(name)s)" + " in the after body phase.", + "unexpected-end-tag-after-body": + "Unexpected end tag token (%(name)s)" + " in the after body phase.", + "unexpected-char-in-frameset": + "Unexpected characters in the frameset phase. Characters ignored.", + "unexpected-start-tag-in-frameset": + "Unexpected start tag token (%(name)s)" + " in the frameset phase. Ignored.", + "unexpected-frameset-in-frameset-innerhtml": + "Unexpected end tag token (frameset) " + "in the frameset phase (innerHTML).", + "unexpected-end-tag-in-frameset": + "Unexpected end tag token (%(name)s)" + " in the frameset phase. Ignored.", + "unexpected-char-after-frameset": + "Unexpected non-space characters in the " + "after frameset phase. Ignored.", + "unexpected-start-tag-after-frameset": + "Unexpected start tag (%(name)s)" + " in the after frameset phase. Ignored.", + "unexpected-end-tag-after-frameset": + "Unexpected end tag (%(name)s)" + " in the after frameset phase. Ignored.", + "unexpected-end-tag-after-body-innerhtml": + "Unexpected end tag after body(innerHtml)", + "expected-eof-but-got-char": + "Unexpected non-space characters. Expected end of file.", + "expected-eof-but-got-start-tag": + "Unexpected start tag (%(name)s)" + ". Expected end of file.", + "expected-eof-but-got-end-tag": + "Unexpected end tag (%(name)s)" + ". Expected end of file.", + "eof-in-table": + "Unexpected end of file. Expected table content.", + "eof-in-select": + "Unexpected end of file. Expected select content.", + "eof-in-frameset": + "Unexpected end of file. Expected frameset content.", + "eof-in-script-in-script": + "Unexpected end of file. Expected script content.", + "eof-in-foreign-lands": + "Unexpected end of file. Expected foreign content", + "non-void-element-with-trailing-solidus": + "Trailing solidus not allowed on element %(name)s", + "unexpected-html-element-in-foreign-content": + "Element %(name)s not allowed in a non-html context", + "unexpected-end-tag-before-html": + "Unexpected end tag (%(name)s) before html.", + "XXX-undefined-error": + "Undefined error (this sucks and should be fixed)", +} + +namespaces = { + "html": "http://www.w3.org/1999/xhtml", + "mathml": "http://www.w3.org/1998/Math/MathML", + "svg": "http://www.w3.org/2000/svg", + "xlink": "http://www.w3.org/1999/xlink", + "xml": "http://www.w3.org/XML/1998/namespace", + "xmlns": "http://www.w3.org/2000/xmlns/" +} + +scopingElements = frozenset([ + (namespaces["html"], "applet"), + (namespaces["html"], "caption"), + (namespaces["html"], "html"), + (namespaces["html"], "marquee"), + (namespaces["html"], "object"), + (namespaces["html"], "table"), + (namespaces["html"], "td"), + (namespaces["html"], "th"), + (namespaces["mathml"], "mi"), + (namespaces["mathml"], "mo"), + (namespaces["mathml"], "mn"), + (namespaces["mathml"], "ms"), + (namespaces["mathml"], "mtext"), + (namespaces["mathml"], "annotation-xml"), + (namespaces["svg"], "foreignObject"), + (namespaces["svg"], "desc"), + (namespaces["svg"], "title"), +]) + +formattingElements = frozenset([ + (namespaces["html"], "a"), + (namespaces["html"], "b"), + (namespaces["html"], "big"), + (namespaces["html"], "code"), + (namespaces["html"], "em"), + (namespaces["html"], "font"), + (namespaces["html"], "i"), + (namespaces["html"], "nobr"), + (namespaces["html"], "s"), + (namespaces["html"], "small"), + (namespaces["html"], "strike"), + (namespaces["html"], "strong"), + (namespaces["html"], "tt"), + (namespaces["html"], "u") +]) + +specialElements = frozenset([ + (namespaces["html"], "address"), + (namespaces["html"], "applet"), + (namespaces["html"], "area"), + (namespaces["html"], "article"), + (namespaces["html"], "aside"), + (namespaces["html"], "base"), + (namespaces["html"], "basefont"), + (namespaces["html"], "bgsound"), + (namespaces["html"], "blockquote"), + (namespaces["html"], "body"), + (namespaces["html"], "br"), + (namespaces["html"], "button"), + (namespaces["html"], "caption"), + (namespaces["html"], "center"), + (namespaces["html"], "col"), + (namespaces["html"], "colgroup"), + (namespaces["html"], "command"), + (namespaces["html"], "dd"), + (namespaces["html"], "details"), + (namespaces["html"], "dir"), + (namespaces["html"], "div"), + (namespaces["html"], "dl"), + (namespaces["html"], "dt"), + (namespaces["html"], "embed"), + (namespaces["html"], "fieldset"), + (namespaces["html"], "figure"), + (namespaces["html"], "footer"), + (namespaces["html"], "form"), + (namespaces["html"], "frame"), + (namespaces["html"], "frameset"), + (namespaces["html"], "h1"), + (namespaces["html"], "h2"), + (namespaces["html"], "h3"), + (namespaces["html"], "h4"), + (namespaces["html"], "h5"), + (namespaces["html"], "h6"), + (namespaces["html"], "head"), + (namespaces["html"], "header"), + (namespaces["html"], "hr"), + (namespaces["html"], "html"), + (namespaces["html"], "iframe"), + # Note that image is commented out in the spec as "this isn't an + # element that can end up on the stack, so it doesn't matter," + (namespaces["html"], "image"), + (namespaces["html"], "img"), + (namespaces["html"], "input"), + (namespaces["html"], "isindex"), + (namespaces["html"], "li"), + (namespaces["html"], "link"), + (namespaces["html"], "listing"), + (namespaces["html"], "marquee"), + (namespaces["html"], "menu"), + (namespaces["html"], "meta"), + (namespaces["html"], "nav"), + (namespaces["html"], "noembed"), + (namespaces["html"], "noframes"), + (namespaces["html"], "noscript"), + (namespaces["html"], "object"), + (namespaces["html"], "ol"), + (namespaces["html"], "p"), + (namespaces["html"], "param"), + (namespaces["html"], "plaintext"), + (namespaces["html"], "pre"), + (namespaces["html"], "script"), + (namespaces["html"], "section"), + (namespaces["html"], "select"), + (namespaces["html"], "style"), + (namespaces["html"], "table"), + (namespaces["html"], "tbody"), + (namespaces["html"], "td"), + (namespaces["html"], "textarea"), + (namespaces["html"], "tfoot"), + (namespaces["html"], "th"), + (namespaces["html"], "thead"), + (namespaces["html"], "title"), + (namespaces["html"], "tr"), + (namespaces["html"], "ul"), + (namespaces["html"], "wbr"), + (namespaces["html"], "xmp"), + (namespaces["svg"], "foreignObject") +]) + +htmlIntegrationPointElements = frozenset([ + (namespaces["mathml"], "annotaion-xml"), + (namespaces["svg"], "foreignObject"), + (namespaces["svg"], "desc"), + (namespaces["svg"], "title") +]) + +mathmlTextIntegrationPointElements = frozenset([ + (namespaces["mathml"], "mi"), + (namespaces["mathml"], "mo"), + (namespaces["mathml"], "mn"), + (namespaces["mathml"], "ms"), + (namespaces["mathml"], "mtext") +]) + +adjustForeignAttributes = { + "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), + "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), + "xlink:href": ("xlink", "href", namespaces["xlink"]), + "xlink:role": ("xlink", "role", namespaces["xlink"]), + "xlink:show": ("xlink", "show", namespaces["xlink"]), + "xlink:title": ("xlink", "title", namespaces["xlink"]), + "xlink:type": ("xlink", "type", namespaces["xlink"]), + "xml:base": ("xml", "base", namespaces["xml"]), + "xml:lang": ("xml", "lang", namespaces["xml"]), + "xml:space": ("xml", "space", namespaces["xml"]), + "xmlns": (None, "xmlns", namespaces["xmlns"]), + "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) +} + +unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in + adjustForeignAttributes.items()]) + +spaceCharacters = frozenset([ + "\t", + "\n", + "\u000C", + " ", + "\r" +]) + +tableInsertModeElements = frozenset([ + "table", + "tbody", + "tfoot", + "thead", + "tr" +]) + +asciiLowercase = frozenset(string.ascii_lowercase) +asciiUppercase = frozenset(string.ascii_uppercase) +asciiLetters = frozenset(string.ascii_letters) +digits = frozenset(string.digits) +hexDigits = frozenset(string.hexdigits) + +asciiUpper2Lower = dict([(ord(c), ord(c.lower())) + for c in string.ascii_uppercase]) + +# Heading elements need to be ordered +headingElements = ( + "h1", + "h2", + "h3", + "h4", + "h5", + "h6" +) + +voidElements = frozenset([ + "base", + "command", + "event-source", + "link", + "meta", + "hr", + "br", + "img", + "embed", + "param", + "area", + "col", + "input", + "source", + "track" +]) + +cdataElements = frozenset(['title', 'textarea']) + +rcdataElements = frozenset([ + 'style', + 'script', + 'xmp', + 'iframe', + 'noembed', + 'noframes', + 'noscript' +]) + +booleanAttributes = { + "": frozenset(["irrelevant"]), + "style": frozenset(["scoped"]), + "img": frozenset(["ismap"]), + "audio": frozenset(["autoplay", "controls"]), + "video": frozenset(["autoplay", "controls"]), + "script": frozenset(["defer", "async"]), + "details": frozenset(["open"]), + "datagrid": frozenset(["multiple", "disabled"]), + "command": frozenset(["hidden", "disabled", "checked", "default"]), + "hr": frozenset(["noshade"]), + "menu": frozenset(["autosubmit"]), + "fieldset": frozenset(["disabled", "readonly"]), + "option": frozenset(["disabled", "readonly", "selected"]), + "optgroup": frozenset(["disabled", "readonly"]), + "button": frozenset(["disabled", "autofocus"]), + "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), + "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), + "output": frozenset(["disabled", "readonly"]), +} + +# entitiesWindows1252 has to be _ordered_ and needs to have an index. It +# therefore can't be a frozenset. +entitiesWindows1252 = ( + 8364, # 0x80 0x20AC EURO SIGN + 65533, # 0x81 UNDEFINED + 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK + 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK + 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK + 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS + 8224, # 0x86 0x2020 DAGGER + 8225, # 0x87 0x2021 DOUBLE DAGGER + 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT + 8240, # 0x89 0x2030 PER MILLE SIGN + 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON + 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE + 65533, # 0x8D UNDEFINED + 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON + 65533, # 0x8F UNDEFINED + 65533, # 0x90 UNDEFINED + 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK + 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK + 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK + 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK + 8226, # 0x95 0x2022 BULLET + 8211, # 0x96 0x2013 EN DASH + 8212, # 0x97 0x2014 EM DASH + 732, # 0x98 0x02DC SMALL TILDE + 8482, # 0x99 0x2122 TRADE MARK SIGN + 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON + 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE + 65533, # 0x9D UNDEFINED + 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON + 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +) + +xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) + +entities = { + "AElig": "\xc6", + "AElig;": "\xc6", + "AMP": "&", + "AMP;": "&", + "Aacute": "\xc1", + "Aacute;": "\xc1", + "Abreve;": "\u0102", + "Acirc": "\xc2", + "Acirc;": "\xc2", + "Acy;": "\u0410", + "Afr;": "\U0001d504", + "Agrave": "\xc0", + "Agrave;": "\xc0", + "Alpha;": "\u0391", + "Amacr;": "\u0100", + "And;": "\u2a53", + "Aogon;": "\u0104", + "Aopf;": "\U0001d538", + "ApplyFunction;": "\u2061", + "Aring": "\xc5", + "Aring;": "\xc5", + "Ascr;": "\U0001d49c", + "Assign;": "\u2254", + "Atilde": "\xc3", + "Atilde;": "\xc3", + "Auml": "\xc4", + "Auml;": "\xc4", + "Backslash;": "\u2216", + "Barv;": "\u2ae7", + "Barwed;": "\u2306", + "Bcy;": "\u0411", + "Because;": "\u2235", + "Bernoullis;": "\u212c", + "Beta;": "\u0392", + "Bfr;": "\U0001d505", + "Bopf;": "\U0001d539", + "Breve;": "\u02d8", + "Bscr;": "\u212c", + "Bumpeq;": "\u224e", + "CHcy;": "\u0427", + "COPY": "\xa9", + "COPY;": "\xa9", + "Cacute;": "\u0106", + "Cap;": "\u22d2", + "CapitalDifferentialD;": "\u2145", + "Cayleys;": "\u212d", + "Ccaron;": "\u010c", + "Ccedil": "\xc7", + "Ccedil;": "\xc7", + "Ccirc;": "\u0108", + "Cconint;": "\u2230", + "Cdot;": "\u010a", + "Cedilla;": "\xb8", + "CenterDot;": "\xb7", + "Cfr;": "\u212d", + "Chi;": "\u03a7", + "CircleDot;": "\u2299", + "CircleMinus;": "\u2296", + "CirclePlus;": "\u2295", + "CircleTimes;": "\u2297", + "ClockwiseContourIntegral;": "\u2232", + "CloseCurlyDoubleQuote;": "\u201d", + "CloseCurlyQuote;": "\u2019", + "Colon;": "\u2237", + "Colone;": "\u2a74", + "Congruent;": "\u2261", + "Conint;": "\u222f", + "ContourIntegral;": "\u222e", + "Copf;": "\u2102", + "Coproduct;": "\u2210", + "CounterClockwiseContourIntegral;": "\u2233", + "Cross;": "\u2a2f", + "Cscr;": "\U0001d49e", + "Cup;": "\u22d3", + "CupCap;": "\u224d", + "DD;": "\u2145", + "DDotrahd;": "\u2911", + "DJcy;": "\u0402", + "DScy;": "\u0405", + "DZcy;": "\u040f", + "Dagger;": "\u2021", + "Darr;": "\u21a1", + "Dashv;": "\u2ae4", + "Dcaron;": "\u010e", + "Dcy;": "\u0414", + "Del;": "\u2207", + "Delta;": "\u0394", + "Dfr;": "\U0001d507", + "DiacriticalAcute;": "\xb4", + "DiacriticalDot;": "\u02d9", + "DiacriticalDoubleAcute;": "\u02dd", + "DiacriticalGrave;": "`", + "DiacriticalTilde;": "\u02dc", + "Diamond;": "\u22c4", + "DifferentialD;": "\u2146", + "Dopf;": "\U0001d53b", + "Dot;": "\xa8", + "DotDot;": "\u20dc", + "DotEqual;": "\u2250", + "DoubleContourIntegral;": "\u222f", + "DoubleDot;": "\xa8", + "DoubleDownArrow;": "\u21d3", + "DoubleLeftArrow;": "\u21d0", + "DoubleLeftRightArrow;": "\u21d4", + "DoubleLeftTee;": "\u2ae4", + "DoubleLongLeftArrow;": "\u27f8", + "DoubleLongLeftRightArrow;": "\u27fa", + "DoubleLongRightArrow;": "\u27f9", + "DoubleRightArrow;": "\u21d2", + "DoubleRightTee;": "\u22a8", + "DoubleUpArrow;": "\u21d1", + "DoubleUpDownArrow;": "\u21d5", + "DoubleVerticalBar;": "\u2225", + "DownArrow;": "\u2193", + "DownArrowBar;": "\u2913", + "DownArrowUpArrow;": "\u21f5", + "DownBreve;": "\u0311", + "DownLeftRightVector;": "\u2950", + "DownLeftTeeVector;": "\u295e", + "DownLeftVector;": "\u21bd", + "DownLeftVectorBar;": "\u2956", + "DownRightTeeVector;": "\u295f", + "DownRightVector;": "\u21c1", + "DownRightVectorBar;": "\u2957", + "DownTee;": "\u22a4", + "DownTeeArrow;": "\u21a7", + "Downarrow;": "\u21d3", + "Dscr;": "\U0001d49f", + "Dstrok;": "\u0110", + "ENG;": "\u014a", + "ETH": "\xd0", + "ETH;": "\xd0", + "Eacute": "\xc9", + "Eacute;": "\xc9", + "Ecaron;": "\u011a", + "Ecirc": "\xca", + "Ecirc;": "\xca", + "Ecy;": "\u042d", + "Edot;": "\u0116", + "Efr;": "\U0001d508", + "Egrave": "\xc8", + "Egrave;": "\xc8", + "Element;": "\u2208", + "Emacr;": "\u0112", + "EmptySmallSquare;": "\u25fb", + "EmptyVerySmallSquare;": "\u25ab", + "Eogon;": "\u0118", + "Eopf;": "\U0001d53c", + "Epsilon;": "\u0395", + "Equal;": "\u2a75", + "EqualTilde;": "\u2242", + "Equilibrium;": "\u21cc", + "Escr;": "\u2130", + "Esim;": "\u2a73", + "Eta;": "\u0397", + "Euml": "\xcb", + "Euml;": "\xcb", + "Exists;": "\u2203", + "ExponentialE;": "\u2147", + "Fcy;": "\u0424", + "Ffr;": "\U0001d509", + "FilledSmallSquare;": "\u25fc", + "FilledVerySmallSquare;": "\u25aa", + "Fopf;": "\U0001d53d", + "ForAll;": "\u2200", + "Fouriertrf;": "\u2131", + "Fscr;": "\u2131", + "GJcy;": "\u0403", + "GT": ">", + "GT;": ">", + "Gamma;": "\u0393", + "Gammad;": "\u03dc", + "Gbreve;": "\u011e", + "Gcedil;": "\u0122", + "Gcirc;": "\u011c", + "Gcy;": "\u0413", + "Gdot;": "\u0120", + "Gfr;": "\U0001d50a", + "Gg;": "\u22d9", + "Gopf;": "\U0001d53e", + "GreaterEqual;": "\u2265", + "GreaterEqualLess;": "\u22db", + "GreaterFullEqual;": "\u2267", + "GreaterGreater;": "\u2aa2", + "GreaterLess;": "\u2277", + "GreaterSlantEqual;": "\u2a7e", + "GreaterTilde;": "\u2273", + "Gscr;": "\U0001d4a2", + "Gt;": "\u226b", + "HARDcy;": "\u042a", + "Hacek;": "\u02c7", + "Hat;": "^", + "Hcirc;": "\u0124", + "Hfr;": "\u210c", + "HilbertSpace;": "\u210b", + "Hopf;": "\u210d", + "HorizontalLine;": "\u2500", + "Hscr;": "\u210b", + "Hstrok;": "\u0126", + "HumpDownHump;": "\u224e", + "HumpEqual;": "\u224f", + "IEcy;": "\u0415", + "IJlig;": "\u0132", + "IOcy;": "\u0401", + "Iacute": "\xcd", + "Iacute;": "\xcd", + "Icirc": "\xce", + "Icirc;": "\xce", + "Icy;": "\u0418", + "Idot;": "\u0130", + "Ifr;": "\u2111", + "Igrave": "\xcc", + "Igrave;": "\xcc", + "Im;": "\u2111", + "Imacr;": "\u012a", + "ImaginaryI;": "\u2148", + "Implies;": "\u21d2", + "Int;": "\u222c", + "Integral;": "\u222b", + "Intersection;": "\u22c2", + "InvisibleComma;": "\u2063", + "InvisibleTimes;": "\u2062", + "Iogon;": "\u012e", + "Iopf;": "\U0001d540", + "Iota;": "\u0399", + "Iscr;": "\u2110", + "Itilde;": "\u0128", + "Iukcy;": "\u0406", + "Iuml": "\xcf", + "Iuml;": "\xcf", + "Jcirc;": "\u0134", + "Jcy;": "\u0419", + "Jfr;": "\U0001d50d", + "Jopf;": "\U0001d541", + "Jscr;": "\U0001d4a5", + "Jsercy;": "\u0408", + "Jukcy;": "\u0404", + "KHcy;": "\u0425", + "KJcy;": "\u040c", + "Kappa;": "\u039a", + "Kcedil;": "\u0136", + "Kcy;": "\u041a", + "Kfr;": "\U0001d50e", + "Kopf;": "\U0001d542", + "Kscr;": "\U0001d4a6", + "LJcy;": "\u0409", + "LT": "<", + "LT;": "<", + "Lacute;": "\u0139", + "Lambda;": "\u039b", + "Lang;": "\u27ea", + "Laplacetrf;": "\u2112", + "Larr;": "\u219e", + "Lcaron;": "\u013d", + "Lcedil;": "\u013b", + "Lcy;": "\u041b", + "LeftAngleBracket;": "\u27e8", + "LeftArrow;": "\u2190", + "LeftArrowBar;": "\u21e4", + "LeftArrowRightArrow;": "\u21c6", + "LeftCeiling;": "\u2308", + "LeftDoubleBracket;": "\u27e6", + "LeftDownTeeVector;": "\u2961", + "LeftDownVector;": "\u21c3", + "LeftDownVectorBar;": "\u2959", + "LeftFloor;": "\u230a", + "LeftRightArrow;": "\u2194", + "LeftRightVector;": "\u294e", + "LeftTee;": "\u22a3", + "LeftTeeArrow;": "\u21a4", + "LeftTeeVector;": "\u295a", + "LeftTriangle;": "\u22b2", + "LeftTriangleBar;": "\u29cf", + "LeftTriangleEqual;": "\u22b4", + "LeftUpDownVector;": "\u2951", + "LeftUpTeeVector;": "\u2960", + "LeftUpVector;": "\u21bf", + "LeftUpVectorBar;": "\u2958", + "LeftVector;": "\u21bc", + "LeftVectorBar;": "\u2952", + "Leftarrow;": "\u21d0", + "Leftrightarrow;": "\u21d4", + "LessEqualGreater;": "\u22da", + "LessFullEqual;": "\u2266", + "LessGreater;": "\u2276", + "LessLess;": "\u2aa1", + "LessSlantEqual;": "\u2a7d", + "LessTilde;": "\u2272", + "Lfr;": "\U0001d50f", + "Ll;": "\u22d8", + "Lleftarrow;": "\u21da", + "Lmidot;": "\u013f", + "LongLeftArrow;": "\u27f5", + "LongLeftRightArrow;": "\u27f7", + "LongRightArrow;": "\u27f6", + "Longleftarrow;": "\u27f8", + "Longleftrightarrow;": "\u27fa", + "Longrightarrow;": "\u27f9", + "Lopf;": "\U0001d543", + "LowerLeftArrow;": "\u2199", + "LowerRightArrow;": "\u2198", + "Lscr;": "\u2112", + "Lsh;": "\u21b0", + "Lstrok;": "\u0141", + "Lt;": "\u226a", + "Map;": "\u2905", + "Mcy;": "\u041c", + "MediumSpace;": "\u205f", + "Mellintrf;": "\u2133", + "Mfr;": "\U0001d510", + "MinusPlus;": "\u2213", + "Mopf;": "\U0001d544", + "Mscr;": "\u2133", + "Mu;": "\u039c", + "NJcy;": "\u040a", + "Nacute;": "\u0143", + "Ncaron;": "\u0147", + "Ncedil;": "\u0145", + "Ncy;": "\u041d", + "NegativeMediumSpace;": "\u200b", + "NegativeThickSpace;": "\u200b", + "NegativeThinSpace;": "\u200b", + "NegativeVeryThinSpace;": "\u200b", + "NestedGreaterGreater;": "\u226b", + "NestedLessLess;": "\u226a", + "NewLine;": "\n", + "Nfr;": "\U0001d511", + "NoBreak;": "\u2060", + "NonBreakingSpace;": "\xa0", + "Nopf;": "\u2115", + "Not;": "\u2aec", + "NotCongruent;": "\u2262", + "NotCupCap;": "\u226d", + "NotDoubleVerticalBar;": "\u2226", + "NotElement;": "\u2209", + "NotEqual;": "\u2260", + "NotEqualTilde;": "\u2242\u0338", + "NotExists;": "\u2204", + "NotGreater;": "\u226f", + "NotGreaterEqual;": "\u2271", + "NotGreaterFullEqual;": "\u2267\u0338", + "NotGreaterGreater;": "\u226b\u0338", + "NotGreaterLess;": "\u2279", + "NotGreaterSlantEqual;": "\u2a7e\u0338", + "NotGreaterTilde;": "\u2275", + "NotHumpDownHump;": "\u224e\u0338", + "NotHumpEqual;": "\u224f\u0338", + "NotLeftTriangle;": "\u22ea", + "NotLeftTriangleBar;": "\u29cf\u0338", + "NotLeftTriangleEqual;": "\u22ec", + "NotLess;": "\u226e", + "NotLessEqual;": "\u2270", + "NotLessGreater;": "\u2278", + "NotLessLess;": "\u226a\u0338", + "NotLessSlantEqual;": "\u2a7d\u0338", + "NotLessTilde;": "\u2274", + "NotNestedGreaterGreater;": "\u2aa2\u0338", + "NotNestedLessLess;": "\u2aa1\u0338", + "NotPrecedes;": "\u2280", + "NotPrecedesEqual;": "\u2aaf\u0338", + "NotPrecedesSlantEqual;": "\u22e0", + "NotReverseElement;": "\u220c", + "NotRightTriangle;": "\u22eb", + "NotRightTriangleBar;": "\u29d0\u0338", + "NotRightTriangleEqual;": "\u22ed", + "NotSquareSubset;": "\u228f\u0338", + "NotSquareSubsetEqual;": "\u22e2", + "NotSquareSuperset;": "\u2290\u0338", + "NotSquareSupersetEqual;": "\u22e3", + "NotSubset;": "\u2282\u20d2", + "NotSubsetEqual;": "\u2288", + "NotSucceeds;": "\u2281", + "NotSucceedsEqual;": "\u2ab0\u0338", + "NotSucceedsSlantEqual;": "\u22e1", + "NotSucceedsTilde;": "\u227f\u0338", + "NotSuperset;": "\u2283\u20d2", + "NotSupersetEqual;": "\u2289", + "NotTilde;": "\u2241", + "NotTildeEqual;": "\u2244", + "NotTildeFullEqual;": "\u2247", + "NotTildeTilde;": "\u2249", + "NotVerticalBar;": "\u2224", + "Nscr;": "\U0001d4a9", + "Ntilde": "\xd1", + "Ntilde;": "\xd1", + "Nu;": "\u039d", + "OElig;": "\u0152", + "Oacute": "\xd3", + "Oacute;": "\xd3", + "Ocirc": "\xd4", + "Ocirc;": "\xd4", + "Ocy;": "\u041e", + "Odblac;": "\u0150", + "Ofr;": "\U0001d512", + "Ograve": "\xd2", + "Ograve;": "\xd2", + "Omacr;": "\u014c", + "Omega;": "\u03a9", + "Omicron;": "\u039f", + "Oopf;": "\U0001d546", + "OpenCurlyDoubleQuote;": "\u201c", + "OpenCurlyQuote;": "\u2018", + "Or;": "\u2a54", + "Oscr;": "\U0001d4aa", + "Oslash": "\xd8", + "Oslash;": "\xd8", + "Otilde": "\xd5", + "Otilde;": "\xd5", + "Otimes;": "\u2a37", + "Ouml": "\xd6", + "Ouml;": "\xd6", + "OverBar;": "\u203e", + "OverBrace;": "\u23de", + "OverBracket;": "\u23b4", + "OverParenthesis;": "\u23dc", + "PartialD;": "\u2202", + "Pcy;": "\u041f", + "Pfr;": "\U0001d513", + "Phi;": "\u03a6", + "Pi;": "\u03a0", + "PlusMinus;": "\xb1", + "Poincareplane;": "\u210c", + "Popf;": "\u2119", + "Pr;": "\u2abb", + "Precedes;": "\u227a", + "PrecedesEqual;": "\u2aaf", + "PrecedesSlantEqual;": "\u227c", + "PrecedesTilde;": "\u227e", + "Prime;": "\u2033", + "Product;": "\u220f", + "Proportion;": "\u2237", + "Proportional;": "\u221d", + "Pscr;": "\U0001d4ab", + "Psi;": "\u03a8", + "QUOT": "\"", + "QUOT;": "\"", + "Qfr;": "\U0001d514", + "Qopf;": "\u211a", + "Qscr;": "\U0001d4ac", + "RBarr;": "\u2910", + "REG": "\xae", + "REG;": "\xae", + "Racute;": "\u0154", + "Rang;": "\u27eb", + "Rarr;": "\u21a0", + "Rarrtl;": "\u2916", + "Rcaron;": "\u0158", + "Rcedil;": "\u0156", + "Rcy;": "\u0420", + "Re;": "\u211c", + "ReverseElement;": "\u220b", + "ReverseEquilibrium;": "\u21cb", + "ReverseUpEquilibrium;": "\u296f", + "Rfr;": "\u211c", + "Rho;": "\u03a1", + "RightAngleBracket;": "\u27e9", + "RightArrow;": "\u2192", + "RightArrowBar;": "\u21e5", + "RightArrowLeftArrow;": "\u21c4", + "RightCeiling;": "\u2309", + "RightDoubleBracket;": "\u27e7", + "RightDownTeeVector;": "\u295d", + "RightDownVector;": "\u21c2", + "RightDownVectorBar;": "\u2955", + "RightFloor;": "\u230b", + "RightTee;": "\u22a2", + "RightTeeArrow;": "\u21a6", + "RightTeeVector;": "\u295b", + "RightTriangle;": "\u22b3", + "RightTriangleBar;": "\u29d0", + "RightTriangleEqual;": "\u22b5", + "RightUpDownVector;": "\u294f", + "RightUpTeeVector;": "\u295c", + "RightUpVector;": "\u21be", + "RightUpVectorBar;": "\u2954", + "RightVector;": "\u21c0", + "RightVectorBar;": "\u2953", + "Rightarrow;": "\u21d2", + "Ropf;": "\u211d", + "RoundImplies;": "\u2970", + "Rrightarrow;": "\u21db", + "Rscr;": "\u211b", + "Rsh;": "\u21b1", + "RuleDelayed;": "\u29f4", + "SHCHcy;": "\u0429", + "SHcy;": "\u0428", + "SOFTcy;": "\u042c", + "Sacute;": "\u015a", + "Sc;": "\u2abc", + "Scaron;": "\u0160", + "Scedil;": "\u015e", + "Scirc;": "\u015c", + "Scy;": "\u0421", + "Sfr;": "\U0001d516", + "ShortDownArrow;": "\u2193", + "ShortLeftArrow;": "\u2190", + "ShortRightArrow;": "\u2192", + "ShortUpArrow;": "\u2191", + "Sigma;": "\u03a3", + "SmallCircle;": "\u2218", + "Sopf;": "\U0001d54a", + "Sqrt;": "\u221a", + "Square;": "\u25a1", + "SquareIntersection;": "\u2293", + "SquareSubset;": "\u228f", + "SquareSubsetEqual;": "\u2291", + "SquareSuperset;": "\u2290", + "SquareSupersetEqual;": "\u2292", + "SquareUnion;": "\u2294", + "Sscr;": "\U0001d4ae", + "Star;": "\u22c6", + "Sub;": "\u22d0", + "Subset;": "\u22d0", + "SubsetEqual;": "\u2286", + "Succeeds;": "\u227b", + "SucceedsEqual;": "\u2ab0", + "SucceedsSlantEqual;": "\u227d", + "SucceedsTilde;": "\u227f", + "SuchThat;": "\u220b", + "Sum;": "\u2211", + "Sup;": "\u22d1", + "Superset;": "\u2283", + "SupersetEqual;": "\u2287", + "Supset;": "\u22d1", + "THORN": "\xde", + "THORN;": "\xde", + "TRADE;": "\u2122", + "TSHcy;": "\u040b", + "TScy;": "\u0426", + "Tab;": "\t", + "Tau;": "\u03a4", + "Tcaron;": "\u0164", + "Tcedil;": "\u0162", + "Tcy;": "\u0422", + "Tfr;": "\U0001d517", + "Therefore;": "\u2234", + "Theta;": "\u0398", + "ThickSpace;": "\u205f\u200a", + "ThinSpace;": "\u2009", + "Tilde;": "\u223c", + "TildeEqual;": "\u2243", + "TildeFullEqual;": "\u2245", + "TildeTilde;": "\u2248", + "Topf;": "\U0001d54b", + "TripleDot;": "\u20db", + "Tscr;": "\U0001d4af", + "Tstrok;": "\u0166", + "Uacute": "\xda", + "Uacute;": "\xda", + "Uarr;": "\u219f", + "Uarrocir;": "\u2949", + "Ubrcy;": "\u040e", + "Ubreve;": "\u016c", + "Ucirc": "\xdb", + "Ucirc;": "\xdb", + "Ucy;": "\u0423", + "Udblac;": "\u0170", + "Ufr;": "\U0001d518", + "Ugrave": "\xd9", + "Ugrave;": "\xd9", + "Umacr;": "\u016a", + "UnderBar;": "_", + "UnderBrace;": "\u23df", + "UnderBracket;": "\u23b5", + "UnderParenthesis;": "\u23dd", + "Union;": "\u22c3", + "UnionPlus;": "\u228e", + "Uogon;": "\u0172", + "Uopf;": "\U0001d54c", + "UpArrow;": "\u2191", + "UpArrowBar;": "\u2912", + "UpArrowDownArrow;": "\u21c5", + "UpDownArrow;": "\u2195", + "UpEquilibrium;": "\u296e", + "UpTee;": "\u22a5", + "UpTeeArrow;": "\u21a5", + "Uparrow;": "\u21d1", + "Updownarrow;": "\u21d5", + "UpperLeftArrow;": "\u2196", + "UpperRightArrow;": "\u2197", + "Upsi;": "\u03d2", + "Upsilon;": "\u03a5", + "Uring;": "\u016e", + "Uscr;": "\U0001d4b0", + "Utilde;": "\u0168", + "Uuml": "\xdc", + "Uuml;": "\xdc", + "VDash;": "\u22ab", + "Vbar;": "\u2aeb", + "Vcy;": "\u0412", + "Vdash;": "\u22a9", + "Vdashl;": "\u2ae6", + "Vee;": "\u22c1", + "Verbar;": "\u2016", + "Vert;": "\u2016", + "VerticalBar;": "\u2223", + "VerticalLine;": "|", + "VerticalSeparator;": "\u2758", + "VerticalTilde;": "\u2240", + "VeryThinSpace;": "\u200a", + "Vfr;": "\U0001d519", + "Vopf;": "\U0001d54d", + "Vscr;": "\U0001d4b1", + "Vvdash;": "\u22aa", + "Wcirc;": "\u0174", + "Wedge;": "\u22c0", + "Wfr;": "\U0001d51a", + "Wopf;": "\U0001d54e", + "Wscr;": "\U0001d4b2", + "Xfr;": "\U0001d51b", + "Xi;": "\u039e", + "Xopf;": "\U0001d54f", + "Xscr;": "\U0001d4b3", + "YAcy;": "\u042f", + "YIcy;": "\u0407", + "YUcy;": "\u042e", + "Yacute": "\xdd", + "Yacute;": "\xdd", + "Ycirc;": "\u0176", + "Ycy;": "\u042b", + "Yfr;": "\U0001d51c", + "Yopf;": "\U0001d550", + "Yscr;": "\U0001d4b4", + "Yuml;": "\u0178", + "ZHcy;": "\u0416", + "Zacute;": "\u0179", + "Zcaron;": "\u017d", + "Zcy;": "\u0417", + "Zdot;": "\u017b", + "ZeroWidthSpace;": "\u200b", + "Zeta;": "\u0396", + "Zfr;": "\u2128", + "Zopf;": "\u2124", + "Zscr;": "\U0001d4b5", + "aacute": "\xe1", + "aacute;": "\xe1", + "abreve;": "\u0103", + "ac;": "\u223e", + "acE;": "\u223e\u0333", + "acd;": "\u223f", + "acirc": "\xe2", + "acirc;": "\xe2", + "acute": "\xb4", + "acute;": "\xb4", + "acy;": "\u0430", + "aelig": "\xe6", + "aelig;": "\xe6", + "af;": "\u2061", + "afr;": "\U0001d51e", + "agrave": "\xe0", + "agrave;": "\xe0", + "alefsym;": "\u2135", + "aleph;": "\u2135", + "alpha;": "\u03b1", + "amacr;": "\u0101", + "amalg;": "\u2a3f", + "amp": "&", + "amp;": "&", + "and;": "\u2227", + "andand;": "\u2a55", + "andd;": "\u2a5c", + "andslope;": "\u2a58", + "andv;": "\u2a5a", + "ang;": "\u2220", + "ange;": "\u29a4", + "angle;": "\u2220", + "angmsd;": "\u2221", + "angmsdaa;": "\u29a8", + "angmsdab;": "\u29a9", + "angmsdac;": "\u29aa", + "angmsdad;": "\u29ab", + "angmsdae;": "\u29ac", + "angmsdaf;": "\u29ad", + "angmsdag;": "\u29ae", + "angmsdah;": "\u29af", + "angrt;": "\u221f", + "angrtvb;": "\u22be", + "angrtvbd;": "\u299d", + "angsph;": "\u2222", + "angst;": "\xc5", + "angzarr;": "\u237c", + "aogon;": "\u0105", + "aopf;": "\U0001d552", + "ap;": "\u2248", + "apE;": "\u2a70", + "apacir;": "\u2a6f", + "ape;": "\u224a", + "apid;": "\u224b", + "apos;": "'", + "approx;": "\u2248", + "approxeq;": "\u224a", + "aring": "\xe5", + "aring;": "\xe5", + "ascr;": "\U0001d4b6", + "ast;": "*", + "asymp;": "\u2248", + "asympeq;": "\u224d", + "atilde": "\xe3", + "atilde;": "\xe3", + "auml": "\xe4", + "auml;": "\xe4", + "awconint;": "\u2233", + "awint;": "\u2a11", + "bNot;": "\u2aed", + "backcong;": "\u224c", + "backepsilon;": "\u03f6", + "backprime;": "\u2035", + "backsim;": "\u223d", + "backsimeq;": "\u22cd", + "barvee;": "\u22bd", + "barwed;": "\u2305", + "barwedge;": "\u2305", + "bbrk;": "\u23b5", + "bbrktbrk;": "\u23b6", + "bcong;": "\u224c", + "bcy;": "\u0431", + "bdquo;": "\u201e", + "becaus;": "\u2235", + "because;": "\u2235", + "bemptyv;": "\u29b0", + "bepsi;": "\u03f6", + "bernou;": "\u212c", + "beta;": "\u03b2", + "beth;": "\u2136", + "between;": "\u226c", + "bfr;": "\U0001d51f", + "bigcap;": "\u22c2", + "bigcirc;": "\u25ef", + "bigcup;": "\u22c3", + "bigodot;": "\u2a00", + "bigoplus;": "\u2a01", + "bigotimes;": "\u2a02", + "bigsqcup;": "\u2a06", + "bigstar;": "\u2605", + "bigtriangledown;": "\u25bd", + "bigtriangleup;": "\u25b3", + "biguplus;": "\u2a04", + "bigvee;": "\u22c1", + "bigwedge;": "\u22c0", + "bkarow;": "\u290d", + "blacklozenge;": "\u29eb", + "blacksquare;": "\u25aa", + "blacktriangle;": "\u25b4", + "blacktriangledown;": "\u25be", + "blacktriangleleft;": "\u25c2", + "blacktriangleright;": "\u25b8", + "blank;": "\u2423", + "blk12;": "\u2592", + "blk14;": "\u2591", + "blk34;": "\u2593", + "block;": "\u2588", + "bne;": "=\u20e5", + "bnequiv;": "\u2261\u20e5", + "bnot;": "\u2310", + "bopf;": "\U0001d553", + "bot;": "\u22a5", + "bottom;": "\u22a5", + "bowtie;": "\u22c8", + "boxDL;": "\u2557", + "boxDR;": "\u2554", + "boxDl;": "\u2556", + "boxDr;": "\u2553", + "boxH;": "\u2550", + "boxHD;": "\u2566", + "boxHU;": "\u2569", + "boxHd;": "\u2564", + "boxHu;": "\u2567", + "boxUL;": "\u255d", + "boxUR;": "\u255a", + "boxUl;": "\u255c", + "boxUr;": "\u2559", + "boxV;": "\u2551", + "boxVH;": "\u256c", + "boxVL;": "\u2563", + "boxVR;": "\u2560", + "boxVh;": "\u256b", + "boxVl;": "\u2562", + "boxVr;": "\u255f", + "boxbox;": "\u29c9", + "boxdL;": "\u2555", + "boxdR;": "\u2552", + "boxdl;": "\u2510", + "boxdr;": "\u250c", + "boxh;": "\u2500", + "boxhD;": "\u2565", + "boxhU;": "\u2568", + "boxhd;": "\u252c", + "boxhu;": "\u2534", + "boxminus;": "\u229f", + "boxplus;": "\u229e", + "boxtimes;": "\u22a0", + "boxuL;": "\u255b", + "boxuR;": "\u2558", + "boxul;": "\u2518", + "boxur;": "\u2514", + "boxv;": "\u2502", + "boxvH;": "\u256a", + "boxvL;": "\u2561", + "boxvR;": "\u255e", + "boxvh;": "\u253c", + "boxvl;": "\u2524", + "boxvr;": "\u251c", + "bprime;": "\u2035", + "breve;": "\u02d8", + "brvbar": "\xa6", + "brvbar;": "\xa6", + "bscr;": "\U0001d4b7", + "bsemi;": "\u204f", + "bsim;": "\u223d", + "bsime;": "\u22cd", + "bsol;": "\\", + "bsolb;": "\u29c5", + "bsolhsub;": "\u27c8", + "bull;": "\u2022", + "bullet;": "\u2022", + "bump;": "\u224e", + "bumpE;": "\u2aae", + "bumpe;": "\u224f", + "bumpeq;": "\u224f", + "cacute;": "\u0107", + "cap;": "\u2229", + "capand;": "\u2a44", + "capbrcup;": "\u2a49", + "capcap;": "\u2a4b", + "capcup;": "\u2a47", + "capdot;": "\u2a40", + "caps;": "\u2229\ufe00", + "caret;": "\u2041", + "caron;": "\u02c7", + "ccaps;": "\u2a4d", + "ccaron;": "\u010d", + "ccedil": "\xe7", + "ccedil;": "\xe7", + "ccirc;": "\u0109", + "ccups;": "\u2a4c", + "ccupssm;": "\u2a50", + "cdot;": "\u010b", + "cedil": "\xb8", + "cedil;": "\xb8", + "cemptyv;": "\u29b2", + "cent": "\xa2", + "cent;": "\xa2", + "centerdot;": "\xb7", + "cfr;": "\U0001d520", + "chcy;": "\u0447", + "check;": "\u2713", + "checkmark;": "\u2713", + "chi;": "\u03c7", + "cir;": "\u25cb", + "cirE;": "\u29c3", + "circ;": "\u02c6", + "circeq;": "\u2257", + "circlearrowleft;": "\u21ba", + "circlearrowright;": "\u21bb", + "circledR;": "\xae", + "circledS;": "\u24c8", + "circledast;": "\u229b", + "circledcirc;": "\u229a", + "circleddash;": "\u229d", + "cire;": "\u2257", + "cirfnint;": "\u2a10", + "cirmid;": "\u2aef", + "cirscir;": "\u29c2", + "clubs;": "\u2663", + "clubsuit;": "\u2663", + "colon;": ":", + "colone;": "\u2254", + "coloneq;": "\u2254", + "comma;": ",", + "commat;": "@", + "comp;": "\u2201", + "compfn;": "\u2218", + "complement;": "\u2201", + "complexes;": "\u2102", + "cong;": "\u2245", + "congdot;": "\u2a6d", + "conint;": "\u222e", + "copf;": "\U0001d554", + "coprod;": "\u2210", + "copy": "\xa9", + "copy;": "\xa9", + "copysr;": "\u2117", + "crarr;": "\u21b5", + "cross;": "\u2717", + "cscr;": "\U0001d4b8", + "csub;": "\u2acf", + "csube;": "\u2ad1", + "csup;": "\u2ad0", + "csupe;": "\u2ad2", + "ctdot;": "\u22ef", + "cudarrl;": "\u2938", + "cudarrr;": "\u2935", + "cuepr;": "\u22de", + "cuesc;": "\u22df", + "cularr;": "\u21b6", + "cularrp;": "\u293d", + "cup;": "\u222a", + "cupbrcap;": "\u2a48", + "cupcap;": "\u2a46", + "cupcup;": "\u2a4a", + "cupdot;": "\u228d", + "cupor;": "\u2a45", + "cups;": "\u222a\ufe00", + "curarr;": "\u21b7", + "curarrm;": "\u293c", + "curlyeqprec;": "\u22de", + "curlyeqsucc;": "\u22df", + "curlyvee;": "\u22ce", + "curlywedge;": "\u22cf", + "curren": "\xa4", + "curren;": "\xa4", + "curvearrowleft;": "\u21b6", + "curvearrowright;": "\u21b7", + "cuvee;": "\u22ce", + "cuwed;": "\u22cf", + "cwconint;": "\u2232", + "cwint;": "\u2231", + "cylcty;": "\u232d", + "dArr;": "\u21d3", + "dHar;": "\u2965", + "dagger;": "\u2020", + "daleth;": "\u2138", + "darr;": "\u2193", + "dash;": "\u2010", + "dashv;": "\u22a3", + "dbkarow;": "\u290f", + "dblac;": "\u02dd", + "dcaron;": "\u010f", + "dcy;": "\u0434", + "dd;": "\u2146", + "ddagger;": "\u2021", + "ddarr;": "\u21ca", + "ddotseq;": "\u2a77", + "deg": "\xb0", + "deg;": "\xb0", + "delta;": "\u03b4", + "demptyv;": "\u29b1", + "dfisht;": "\u297f", + "dfr;": "\U0001d521", + "dharl;": "\u21c3", + "dharr;": "\u21c2", + "diam;": "\u22c4", + "diamond;": "\u22c4", + "diamondsuit;": "\u2666", + "diams;": "\u2666", + "die;": "\xa8", + "digamma;": "\u03dd", + "disin;": "\u22f2", + "div;": "\xf7", + "divide": "\xf7", + "divide;": "\xf7", + "divideontimes;": "\u22c7", + "divonx;": "\u22c7", + "djcy;": "\u0452", + "dlcorn;": "\u231e", + "dlcrop;": "\u230d", + "dollar;": "$", + "dopf;": "\U0001d555", + "dot;": "\u02d9", + "doteq;": "\u2250", + "doteqdot;": "\u2251", + "dotminus;": "\u2238", + "dotplus;": "\u2214", + "dotsquare;": "\u22a1", + "doublebarwedge;": "\u2306", + "downarrow;": "\u2193", + "downdownarrows;": "\u21ca", + "downharpoonleft;": "\u21c3", + "downharpoonright;": "\u21c2", + "drbkarow;": "\u2910", + "drcorn;": "\u231f", + "drcrop;": "\u230c", + "dscr;": "\U0001d4b9", + "dscy;": "\u0455", + "dsol;": "\u29f6", + "dstrok;": "\u0111", + "dtdot;": "\u22f1", + "dtri;": "\u25bf", + "dtrif;": "\u25be", + "duarr;": "\u21f5", + "duhar;": "\u296f", + "dwangle;": "\u29a6", + "dzcy;": "\u045f", + "dzigrarr;": "\u27ff", + "eDDot;": "\u2a77", + "eDot;": "\u2251", + "eacute": "\xe9", + "eacute;": "\xe9", + "easter;": "\u2a6e", + "ecaron;": "\u011b", + "ecir;": "\u2256", + "ecirc": "\xea", + "ecirc;": "\xea", + "ecolon;": "\u2255", + "ecy;": "\u044d", + "edot;": "\u0117", + "ee;": "\u2147", + "efDot;": "\u2252", + "efr;": "\U0001d522", + "eg;": "\u2a9a", + "egrave": "\xe8", + "egrave;": "\xe8", + "egs;": "\u2a96", + "egsdot;": "\u2a98", + "el;": "\u2a99", + "elinters;": "\u23e7", + "ell;": "\u2113", + "els;": "\u2a95", + "elsdot;": "\u2a97", + "emacr;": "\u0113", + "empty;": "\u2205", + "emptyset;": "\u2205", + "emptyv;": "\u2205", + "emsp13;": "\u2004", + "emsp14;": "\u2005", + "emsp;": "\u2003", + "eng;": "\u014b", + "ensp;": "\u2002", + "eogon;": "\u0119", + "eopf;": "\U0001d556", + "epar;": "\u22d5", + "eparsl;": "\u29e3", + "eplus;": "\u2a71", + "epsi;": "\u03b5", + "epsilon;": "\u03b5", + "epsiv;": "\u03f5", + "eqcirc;": "\u2256", + "eqcolon;": "\u2255", + "eqsim;": "\u2242", + "eqslantgtr;": "\u2a96", + "eqslantless;": "\u2a95", + "equals;": "=", + "equest;": "\u225f", + "equiv;": "\u2261", + "equivDD;": "\u2a78", + "eqvparsl;": "\u29e5", + "erDot;": "\u2253", + "erarr;": "\u2971", + "escr;": "\u212f", + "esdot;": "\u2250", + "esim;": "\u2242", + "eta;": "\u03b7", + "eth": "\xf0", + "eth;": "\xf0", + "euml": "\xeb", + "euml;": "\xeb", + "euro;": "\u20ac", + "excl;": "!", + "exist;": "\u2203", + "expectation;": "\u2130", + "exponentiale;": "\u2147", + "fallingdotseq;": "\u2252", + "fcy;": "\u0444", + "female;": "\u2640", + "ffilig;": "\ufb03", + "fflig;": "\ufb00", + "ffllig;": "\ufb04", + "ffr;": "\U0001d523", + "filig;": "\ufb01", + "fjlig;": "fj", + "flat;": "\u266d", + "fllig;": "\ufb02", + "fltns;": "\u25b1", + "fnof;": "\u0192", + "fopf;": "\U0001d557", + "forall;": "\u2200", + "fork;": "\u22d4", + "forkv;": "\u2ad9", + "fpartint;": "\u2a0d", + "frac12": "\xbd", + "frac12;": "\xbd", + "frac13;": "\u2153", + "frac14": "\xbc", + "frac14;": "\xbc", + "frac15;": "\u2155", + "frac16;": "\u2159", + "frac18;": "\u215b", + "frac23;": "\u2154", + "frac25;": "\u2156", + "frac34": "\xbe", + "frac34;": "\xbe", + "frac35;": "\u2157", + "frac38;": "\u215c", + "frac45;": "\u2158", + "frac56;": "\u215a", + "frac58;": "\u215d", + "frac78;": "\u215e", + "frasl;": "\u2044", + "frown;": "\u2322", + "fscr;": "\U0001d4bb", + "gE;": "\u2267", + "gEl;": "\u2a8c", + "gacute;": "\u01f5", + "gamma;": "\u03b3", + "gammad;": "\u03dd", + "gap;": "\u2a86", + "gbreve;": "\u011f", + "gcirc;": "\u011d", + "gcy;": "\u0433", + "gdot;": "\u0121", + "ge;": "\u2265", + "gel;": "\u22db", + "geq;": "\u2265", + "geqq;": "\u2267", + "geqslant;": "\u2a7e", + "ges;": "\u2a7e", + "gescc;": "\u2aa9", + "gesdot;": "\u2a80", + "gesdoto;": "\u2a82", + "gesdotol;": "\u2a84", + "gesl;": "\u22db\ufe00", + "gesles;": "\u2a94", + "gfr;": "\U0001d524", + "gg;": "\u226b", + "ggg;": "\u22d9", + "gimel;": "\u2137", + "gjcy;": "\u0453", + "gl;": "\u2277", + "glE;": "\u2a92", + "gla;": "\u2aa5", + "glj;": "\u2aa4", + "gnE;": "\u2269", + "gnap;": "\u2a8a", + "gnapprox;": "\u2a8a", + "gne;": "\u2a88", + "gneq;": "\u2a88", + "gneqq;": "\u2269", + "gnsim;": "\u22e7", + "gopf;": "\U0001d558", + "grave;": "`", + "gscr;": "\u210a", + "gsim;": "\u2273", + "gsime;": "\u2a8e", + "gsiml;": "\u2a90", + "gt": ">", + "gt;": ">", + "gtcc;": "\u2aa7", + "gtcir;": "\u2a7a", + "gtdot;": "\u22d7", + "gtlPar;": "\u2995", + "gtquest;": "\u2a7c", + "gtrapprox;": "\u2a86", + "gtrarr;": "\u2978", + "gtrdot;": "\u22d7", + "gtreqless;": "\u22db", + "gtreqqless;": "\u2a8c", + "gtrless;": "\u2277", + "gtrsim;": "\u2273", + "gvertneqq;": "\u2269\ufe00", + "gvnE;": "\u2269\ufe00", + "hArr;": "\u21d4", + "hairsp;": "\u200a", + "half;": "\xbd", + "hamilt;": "\u210b", + "hardcy;": "\u044a", + "harr;": "\u2194", + "harrcir;": "\u2948", + "harrw;": "\u21ad", + "hbar;": "\u210f", + "hcirc;": "\u0125", + "hearts;": "\u2665", + "heartsuit;": "\u2665", + "hellip;": "\u2026", + "hercon;": "\u22b9", + "hfr;": "\U0001d525", + "hksearow;": "\u2925", + "hkswarow;": "\u2926", + "hoarr;": "\u21ff", + "homtht;": "\u223b", + "hookleftarrow;": "\u21a9", + "hookrightarrow;": "\u21aa", + "hopf;": "\U0001d559", + "horbar;": "\u2015", + "hscr;": "\U0001d4bd", + "hslash;": "\u210f", + "hstrok;": "\u0127", + "hybull;": "\u2043", + "hyphen;": "\u2010", + "iacute": "\xed", + "iacute;": "\xed", + "ic;": "\u2063", + "icirc": "\xee", + "icirc;": "\xee", + "icy;": "\u0438", + "iecy;": "\u0435", + "iexcl": "\xa1", + "iexcl;": "\xa1", + "iff;": "\u21d4", + "ifr;": "\U0001d526", + "igrave": "\xec", + "igrave;": "\xec", + "ii;": "\u2148", + "iiiint;": "\u2a0c", + "iiint;": "\u222d", + "iinfin;": "\u29dc", + "iiota;": "\u2129", + "ijlig;": "\u0133", + "imacr;": "\u012b", + "image;": "\u2111", + "imagline;": "\u2110", + "imagpart;": "\u2111", + "imath;": "\u0131", + "imof;": "\u22b7", + "imped;": "\u01b5", + "in;": "\u2208", + "incare;": "\u2105", + "infin;": "\u221e", + "infintie;": "\u29dd", + "inodot;": "\u0131", + "int;": "\u222b", + "intcal;": "\u22ba", + "integers;": "\u2124", + "intercal;": "\u22ba", + "intlarhk;": "\u2a17", + "intprod;": "\u2a3c", + "iocy;": "\u0451", + "iogon;": "\u012f", + "iopf;": "\U0001d55a", + "iota;": "\u03b9", + "iprod;": "\u2a3c", + "iquest": "\xbf", + "iquest;": "\xbf", + "iscr;": "\U0001d4be", + "isin;": "\u2208", + "isinE;": "\u22f9", + "isindot;": "\u22f5", + "isins;": "\u22f4", + "isinsv;": "\u22f3", + "isinv;": "\u2208", + "it;": "\u2062", + "itilde;": "\u0129", + "iukcy;": "\u0456", + "iuml": "\xef", + "iuml;": "\xef", + "jcirc;": "\u0135", + "jcy;": "\u0439", + "jfr;": "\U0001d527", + "jmath;": "\u0237", + "jopf;": "\U0001d55b", + "jscr;": "\U0001d4bf", + "jsercy;": "\u0458", + "jukcy;": "\u0454", + "kappa;": "\u03ba", + "kappav;": "\u03f0", + "kcedil;": "\u0137", + "kcy;": "\u043a", + "kfr;": "\U0001d528", + "kgreen;": "\u0138", + "khcy;": "\u0445", + "kjcy;": "\u045c", + "kopf;": "\U0001d55c", + "kscr;": "\U0001d4c0", + "lAarr;": "\u21da", + "lArr;": "\u21d0", + "lAtail;": "\u291b", + "lBarr;": "\u290e", + "lE;": "\u2266", + "lEg;": "\u2a8b", + "lHar;": "\u2962", + "lacute;": "\u013a", + "laemptyv;": "\u29b4", + "lagran;": "\u2112", + "lambda;": "\u03bb", + "lang;": "\u27e8", + "langd;": "\u2991", + "langle;": "\u27e8", + "lap;": "\u2a85", + "laquo": "\xab", + "laquo;": "\xab", + "larr;": "\u2190", + "larrb;": "\u21e4", + "larrbfs;": "\u291f", + "larrfs;": "\u291d", + "larrhk;": "\u21a9", + "larrlp;": "\u21ab", + "larrpl;": "\u2939", + "larrsim;": "\u2973", + "larrtl;": "\u21a2", + "lat;": "\u2aab", + "latail;": "\u2919", + "late;": "\u2aad", + "lates;": "\u2aad\ufe00", + "lbarr;": "\u290c", + "lbbrk;": "\u2772", + "lbrace;": "{", + "lbrack;": "[", + "lbrke;": "\u298b", + "lbrksld;": "\u298f", + "lbrkslu;": "\u298d", + "lcaron;": "\u013e", + "lcedil;": "\u013c", + "lceil;": "\u2308", + "lcub;": "{", + "lcy;": "\u043b", + "ldca;": "\u2936", + "ldquo;": "\u201c", + "ldquor;": "\u201e", + "ldrdhar;": "\u2967", + "ldrushar;": "\u294b", + "ldsh;": "\u21b2", + "le;": "\u2264", + "leftarrow;": "\u2190", + "leftarrowtail;": "\u21a2", + "leftharpoondown;": "\u21bd", + "leftharpoonup;": "\u21bc", + "leftleftarrows;": "\u21c7", + "leftrightarrow;": "\u2194", + "leftrightarrows;": "\u21c6", + "leftrightharpoons;": "\u21cb", + "leftrightsquigarrow;": "\u21ad", + "leftthreetimes;": "\u22cb", + "leg;": "\u22da", + "leq;": "\u2264", + "leqq;": "\u2266", + "leqslant;": "\u2a7d", + "les;": "\u2a7d", + "lescc;": "\u2aa8", + "lesdot;": "\u2a7f", + "lesdoto;": "\u2a81", + "lesdotor;": "\u2a83", + "lesg;": "\u22da\ufe00", + "lesges;": "\u2a93", + "lessapprox;": "\u2a85", + "lessdot;": "\u22d6", + "lesseqgtr;": "\u22da", + "lesseqqgtr;": "\u2a8b", + "lessgtr;": "\u2276", + "lesssim;": "\u2272", + "lfisht;": "\u297c", + "lfloor;": "\u230a", + "lfr;": "\U0001d529", + "lg;": "\u2276", + "lgE;": "\u2a91", + "lhard;": "\u21bd", + "lharu;": "\u21bc", + "lharul;": "\u296a", + "lhblk;": "\u2584", + "ljcy;": "\u0459", + "ll;": "\u226a", + "llarr;": "\u21c7", + "llcorner;": "\u231e", + "llhard;": "\u296b", + "lltri;": "\u25fa", + "lmidot;": "\u0140", + "lmoust;": "\u23b0", + "lmoustache;": "\u23b0", + "lnE;": "\u2268", + "lnap;": "\u2a89", + "lnapprox;": "\u2a89", + "lne;": "\u2a87", + "lneq;": "\u2a87", + "lneqq;": "\u2268", + "lnsim;": "\u22e6", + "loang;": "\u27ec", + "loarr;": "\u21fd", + "lobrk;": "\u27e6", + "longleftarrow;": "\u27f5", + "longleftrightarrow;": "\u27f7", + "longmapsto;": "\u27fc", + "longrightarrow;": "\u27f6", + "looparrowleft;": "\u21ab", + "looparrowright;": "\u21ac", + "lopar;": "\u2985", + "lopf;": "\U0001d55d", + "loplus;": "\u2a2d", + "lotimes;": "\u2a34", + "lowast;": "\u2217", + "lowbar;": "_", + "loz;": "\u25ca", + "lozenge;": "\u25ca", + "lozf;": "\u29eb", + "lpar;": "(", + "lparlt;": "\u2993", + "lrarr;": "\u21c6", + "lrcorner;": "\u231f", + "lrhar;": "\u21cb", + "lrhard;": "\u296d", + "lrm;": "\u200e", + "lrtri;": "\u22bf", + "lsaquo;": "\u2039", + "lscr;": "\U0001d4c1", + "lsh;": "\u21b0", + "lsim;": "\u2272", + "lsime;": "\u2a8d", + "lsimg;": "\u2a8f", + "lsqb;": "[", + "lsquo;": "\u2018", + "lsquor;": "\u201a", + "lstrok;": "\u0142", + "lt": "<", + "lt;": "<", + "ltcc;": "\u2aa6", + "ltcir;": "\u2a79", + "ltdot;": "\u22d6", + "lthree;": "\u22cb", + "ltimes;": "\u22c9", + "ltlarr;": "\u2976", + "ltquest;": "\u2a7b", + "ltrPar;": "\u2996", + "ltri;": "\u25c3", + "ltrie;": "\u22b4", + "ltrif;": "\u25c2", + "lurdshar;": "\u294a", + "luruhar;": "\u2966", + "lvertneqq;": "\u2268\ufe00", + "lvnE;": "\u2268\ufe00", + "mDDot;": "\u223a", + "macr": "\xaf", + "macr;": "\xaf", + "male;": "\u2642", + "malt;": "\u2720", + "maltese;": "\u2720", + "map;": "\u21a6", + "mapsto;": "\u21a6", + "mapstodown;": "\u21a7", + "mapstoleft;": "\u21a4", + "mapstoup;": "\u21a5", + "marker;": "\u25ae", + "mcomma;": "\u2a29", + "mcy;": "\u043c", + "mdash;": "\u2014", + "measuredangle;": "\u2221", + "mfr;": "\U0001d52a", + "mho;": "\u2127", + "micro": "\xb5", + "micro;": "\xb5", + "mid;": "\u2223", + "midast;": "*", + "midcir;": "\u2af0", + "middot": "\xb7", + "middot;": "\xb7", + "minus;": "\u2212", + "minusb;": "\u229f", + "minusd;": "\u2238", + "minusdu;": "\u2a2a", + "mlcp;": "\u2adb", + "mldr;": "\u2026", + "mnplus;": "\u2213", + "models;": "\u22a7", + "mopf;": "\U0001d55e", + "mp;": "\u2213", + "mscr;": "\U0001d4c2", + "mstpos;": "\u223e", + "mu;": "\u03bc", + "multimap;": "\u22b8", + "mumap;": "\u22b8", + "nGg;": "\u22d9\u0338", + "nGt;": "\u226b\u20d2", + "nGtv;": "\u226b\u0338", + "nLeftarrow;": "\u21cd", + "nLeftrightarrow;": "\u21ce", + "nLl;": "\u22d8\u0338", + "nLt;": "\u226a\u20d2", + "nLtv;": "\u226a\u0338", + "nRightarrow;": "\u21cf", + "nVDash;": "\u22af", + "nVdash;": "\u22ae", + "nabla;": "\u2207", + "nacute;": "\u0144", + "nang;": "\u2220\u20d2", + "nap;": "\u2249", + "napE;": "\u2a70\u0338", + "napid;": "\u224b\u0338", + "napos;": "\u0149", + "napprox;": "\u2249", + "natur;": "\u266e", + "natural;": "\u266e", + "naturals;": "\u2115", + "nbsp": "\xa0", + "nbsp;": "\xa0", + "nbump;": "\u224e\u0338", + "nbumpe;": "\u224f\u0338", + "ncap;": "\u2a43", + "ncaron;": "\u0148", + "ncedil;": "\u0146", + "ncong;": "\u2247", + "ncongdot;": "\u2a6d\u0338", + "ncup;": "\u2a42", + "ncy;": "\u043d", + "ndash;": "\u2013", + "ne;": "\u2260", + "neArr;": "\u21d7", + "nearhk;": "\u2924", + "nearr;": "\u2197", + "nearrow;": "\u2197", + "nedot;": "\u2250\u0338", + "nequiv;": "\u2262", + "nesear;": "\u2928", + "nesim;": "\u2242\u0338", + "nexist;": "\u2204", + "nexists;": "\u2204", + "nfr;": "\U0001d52b", + "ngE;": "\u2267\u0338", + "nge;": "\u2271", + "ngeq;": "\u2271", + "ngeqq;": "\u2267\u0338", + "ngeqslant;": "\u2a7e\u0338", + "nges;": "\u2a7e\u0338", + "ngsim;": "\u2275", + "ngt;": "\u226f", + "ngtr;": "\u226f", + "nhArr;": "\u21ce", + "nharr;": "\u21ae", + "nhpar;": "\u2af2", + "ni;": "\u220b", + "nis;": "\u22fc", + "nisd;": "\u22fa", + "niv;": "\u220b", + "njcy;": "\u045a", + "nlArr;": "\u21cd", + "nlE;": "\u2266\u0338", + "nlarr;": "\u219a", + "nldr;": "\u2025", + "nle;": "\u2270", + "nleftarrow;": "\u219a", + "nleftrightarrow;": "\u21ae", + "nleq;": "\u2270", + "nleqq;": "\u2266\u0338", + "nleqslant;": "\u2a7d\u0338", + "nles;": "\u2a7d\u0338", + "nless;": "\u226e", + "nlsim;": "\u2274", + "nlt;": "\u226e", + "nltri;": "\u22ea", + "nltrie;": "\u22ec", + "nmid;": "\u2224", + "nopf;": "\U0001d55f", + "not": "\xac", + "not;": "\xac", + "notin;": "\u2209", + "notinE;": "\u22f9\u0338", + "notindot;": "\u22f5\u0338", + "notinva;": "\u2209", + "notinvb;": "\u22f7", + "notinvc;": "\u22f6", + "notni;": "\u220c", + "notniva;": "\u220c", + "notnivb;": "\u22fe", + "notnivc;": "\u22fd", + "npar;": "\u2226", + "nparallel;": "\u2226", + "nparsl;": "\u2afd\u20e5", + "npart;": "\u2202\u0338", + "npolint;": "\u2a14", + "npr;": "\u2280", + "nprcue;": "\u22e0", + "npre;": "\u2aaf\u0338", + "nprec;": "\u2280", + "npreceq;": "\u2aaf\u0338", + "nrArr;": "\u21cf", + "nrarr;": "\u219b", + "nrarrc;": "\u2933\u0338", + "nrarrw;": "\u219d\u0338", + "nrightarrow;": "\u219b", + "nrtri;": "\u22eb", + "nrtrie;": "\u22ed", + "nsc;": "\u2281", + "nsccue;": "\u22e1", + "nsce;": "\u2ab0\u0338", + "nscr;": "\U0001d4c3", + "nshortmid;": "\u2224", + "nshortparallel;": "\u2226", + "nsim;": "\u2241", + "nsime;": "\u2244", + "nsimeq;": "\u2244", + "nsmid;": "\u2224", + "nspar;": "\u2226", + "nsqsube;": "\u22e2", + "nsqsupe;": "\u22e3", + "nsub;": "\u2284", + "nsubE;": "\u2ac5\u0338", + "nsube;": "\u2288", + "nsubset;": "\u2282\u20d2", + "nsubseteq;": "\u2288", + "nsubseteqq;": "\u2ac5\u0338", + "nsucc;": "\u2281", + "nsucceq;": "\u2ab0\u0338", + "nsup;": "\u2285", + "nsupE;": "\u2ac6\u0338", + "nsupe;": "\u2289", + "nsupset;": "\u2283\u20d2", + "nsupseteq;": "\u2289", + "nsupseteqq;": "\u2ac6\u0338", + "ntgl;": "\u2279", + "ntilde": "\xf1", + "ntilde;": "\xf1", + "ntlg;": "\u2278", + "ntriangleleft;": "\u22ea", + "ntrianglelefteq;": "\u22ec", + "ntriangleright;": "\u22eb", + "ntrianglerighteq;": "\u22ed", + "nu;": "\u03bd", + "num;": "#", + "numero;": "\u2116", + "numsp;": "\u2007", + "nvDash;": "\u22ad", + "nvHarr;": "\u2904", + "nvap;": "\u224d\u20d2", + "nvdash;": "\u22ac", + "nvge;": "\u2265\u20d2", + "nvgt;": ">\u20d2", + "nvinfin;": "\u29de", + "nvlArr;": "\u2902", + "nvle;": "\u2264\u20d2", + "nvlt;": "<\u20d2", + "nvltrie;": "\u22b4\u20d2", + "nvrArr;": "\u2903", + "nvrtrie;": "\u22b5\u20d2", + "nvsim;": "\u223c\u20d2", + "nwArr;": "\u21d6", + "nwarhk;": "\u2923", + "nwarr;": "\u2196", + "nwarrow;": "\u2196", + "nwnear;": "\u2927", + "oS;": "\u24c8", + "oacute": "\xf3", + "oacute;": "\xf3", + "oast;": "\u229b", + "ocir;": "\u229a", + "ocirc": "\xf4", + "ocirc;": "\xf4", + "ocy;": "\u043e", + "odash;": "\u229d", + "odblac;": "\u0151", + "odiv;": "\u2a38", + "odot;": "\u2299", + "odsold;": "\u29bc", + "oelig;": "\u0153", + "ofcir;": "\u29bf", + "ofr;": "\U0001d52c", + "ogon;": "\u02db", + "ograve": "\xf2", + "ograve;": "\xf2", + "ogt;": "\u29c1", + "ohbar;": "\u29b5", + "ohm;": "\u03a9", + "oint;": "\u222e", + "olarr;": "\u21ba", + "olcir;": "\u29be", + "olcross;": "\u29bb", + "oline;": "\u203e", + "olt;": "\u29c0", + "omacr;": "\u014d", + "omega;": "\u03c9", + "omicron;": "\u03bf", + "omid;": "\u29b6", + "ominus;": "\u2296", + "oopf;": "\U0001d560", + "opar;": "\u29b7", + "operp;": "\u29b9", + "oplus;": "\u2295", + "or;": "\u2228", + "orarr;": "\u21bb", + "ord;": "\u2a5d", + "order;": "\u2134", + "orderof;": "\u2134", + "ordf": "\xaa", + "ordf;": "\xaa", + "ordm": "\xba", + "ordm;": "\xba", + "origof;": "\u22b6", + "oror;": "\u2a56", + "orslope;": "\u2a57", + "orv;": "\u2a5b", + "oscr;": "\u2134", + "oslash": "\xf8", + "oslash;": "\xf8", + "osol;": "\u2298", + "otilde": "\xf5", + "otilde;": "\xf5", + "otimes;": "\u2297", + "otimesas;": "\u2a36", + "ouml": "\xf6", + "ouml;": "\xf6", + "ovbar;": "\u233d", + "par;": "\u2225", + "para": "\xb6", + "para;": "\xb6", + "parallel;": "\u2225", + "parsim;": "\u2af3", + "parsl;": "\u2afd", + "part;": "\u2202", + "pcy;": "\u043f", + "percnt;": "%", + "period;": ".", + "permil;": "\u2030", + "perp;": "\u22a5", + "pertenk;": "\u2031", + "pfr;": "\U0001d52d", + "phi;": "\u03c6", + "phiv;": "\u03d5", + "phmmat;": "\u2133", + "phone;": "\u260e", + "pi;": "\u03c0", + "pitchfork;": "\u22d4", + "piv;": "\u03d6", + "planck;": "\u210f", + "planckh;": "\u210e", + "plankv;": "\u210f", + "plus;": "+", + "plusacir;": "\u2a23", + "plusb;": "\u229e", + "pluscir;": "\u2a22", + "plusdo;": "\u2214", + "plusdu;": "\u2a25", + "pluse;": "\u2a72", + "plusmn": "\xb1", + "plusmn;": "\xb1", + "plussim;": "\u2a26", + "plustwo;": "\u2a27", + "pm;": "\xb1", + "pointint;": "\u2a15", + "popf;": "\U0001d561", + "pound": "\xa3", + "pound;": "\xa3", + "pr;": "\u227a", + "prE;": "\u2ab3", + "prap;": "\u2ab7", + "prcue;": "\u227c", + "pre;": "\u2aaf", + "prec;": "\u227a", + "precapprox;": "\u2ab7", + "preccurlyeq;": "\u227c", + "preceq;": "\u2aaf", + "precnapprox;": "\u2ab9", + "precneqq;": "\u2ab5", + "precnsim;": "\u22e8", + "precsim;": "\u227e", + "prime;": "\u2032", + "primes;": "\u2119", + "prnE;": "\u2ab5", + "prnap;": "\u2ab9", + "prnsim;": "\u22e8", + "prod;": "\u220f", + "profalar;": "\u232e", + "profline;": "\u2312", + "profsurf;": "\u2313", + "prop;": "\u221d", + "propto;": "\u221d", + "prsim;": "\u227e", + "prurel;": "\u22b0", + "pscr;": "\U0001d4c5", + "psi;": "\u03c8", + "puncsp;": "\u2008", + "qfr;": "\U0001d52e", + "qint;": "\u2a0c", + "qopf;": "\U0001d562", + "qprime;": "\u2057", + "qscr;": "\U0001d4c6", + "quaternions;": "\u210d", + "quatint;": "\u2a16", + "quest;": "?", + "questeq;": "\u225f", + "quot": "\"", + "quot;": "\"", + "rAarr;": "\u21db", + "rArr;": "\u21d2", + "rAtail;": "\u291c", + "rBarr;": "\u290f", + "rHar;": "\u2964", + "race;": "\u223d\u0331", + "racute;": "\u0155", + "radic;": "\u221a", + "raemptyv;": "\u29b3", + "rang;": "\u27e9", + "rangd;": "\u2992", + "range;": "\u29a5", + "rangle;": "\u27e9", + "raquo": "\xbb", + "raquo;": "\xbb", + "rarr;": "\u2192", + "rarrap;": "\u2975", + "rarrb;": "\u21e5", + "rarrbfs;": "\u2920", + "rarrc;": "\u2933", + "rarrfs;": "\u291e", + "rarrhk;": "\u21aa", + "rarrlp;": "\u21ac", + "rarrpl;": "\u2945", + "rarrsim;": "\u2974", + "rarrtl;": "\u21a3", + "rarrw;": "\u219d", + "ratail;": "\u291a", + "ratio;": "\u2236", + "rationals;": "\u211a", + "rbarr;": "\u290d", + "rbbrk;": "\u2773", + "rbrace;": "}", + "rbrack;": "]", + "rbrke;": "\u298c", + "rbrksld;": "\u298e", + "rbrkslu;": "\u2990", + "rcaron;": "\u0159", + "rcedil;": "\u0157", + "rceil;": "\u2309", + "rcub;": "}", + "rcy;": "\u0440", + "rdca;": "\u2937", + "rdldhar;": "\u2969", + "rdquo;": "\u201d", + "rdquor;": "\u201d", + "rdsh;": "\u21b3", + "real;": "\u211c", + "realine;": "\u211b", + "realpart;": "\u211c", + "reals;": "\u211d", + "rect;": "\u25ad", + "reg": "\xae", + "reg;": "\xae", + "rfisht;": "\u297d", + "rfloor;": "\u230b", + "rfr;": "\U0001d52f", + "rhard;": "\u21c1", + "rharu;": "\u21c0", + "rharul;": "\u296c", + "rho;": "\u03c1", + "rhov;": "\u03f1", + "rightarrow;": "\u2192", + "rightarrowtail;": "\u21a3", + "rightharpoondown;": "\u21c1", + "rightharpoonup;": "\u21c0", + "rightleftarrows;": "\u21c4", + "rightleftharpoons;": "\u21cc", + "rightrightarrows;": "\u21c9", + "rightsquigarrow;": "\u219d", + "rightthreetimes;": "\u22cc", + "ring;": "\u02da", + "risingdotseq;": "\u2253", + "rlarr;": "\u21c4", + "rlhar;": "\u21cc", + "rlm;": "\u200f", + "rmoust;": "\u23b1", + "rmoustache;": "\u23b1", + "rnmid;": "\u2aee", + "roang;": "\u27ed", + "roarr;": "\u21fe", + "robrk;": "\u27e7", + "ropar;": "\u2986", + "ropf;": "\U0001d563", + "roplus;": "\u2a2e", + "rotimes;": "\u2a35", + "rpar;": ")", + "rpargt;": "\u2994", + "rppolint;": "\u2a12", + "rrarr;": "\u21c9", + "rsaquo;": "\u203a", + "rscr;": "\U0001d4c7", + "rsh;": "\u21b1", + "rsqb;": "]", + "rsquo;": "\u2019", + "rsquor;": "\u2019", + "rthree;": "\u22cc", + "rtimes;": "\u22ca", + "rtri;": "\u25b9", + "rtrie;": "\u22b5", + "rtrif;": "\u25b8", + "rtriltri;": "\u29ce", + "ruluhar;": "\u2968", + "rx;": "\u211e", + "sacute;": "\u015b", + "sbquo;": "\u201a", + "sc;": "\u227b", + "scE;": "\u2ab4", + "scap;": "\u2ab8", + "scaron;": "\u0161", + "sccue;": "\u227d", + "sce;": "\u2ab0", + "scedil;": "\u015f", + "scirc;": "\u015d", + "scnE;": "\u2ab6", + "scnap;": "\u2aba", + "scnsim;": "\u22e9", + "scpolint;": "\u2a13", + "scsim;": "\u227f", + "scy;": "\u0441", + "sdot;": "\u22c5", + "sdotb;": "\u22a1", + "sdote;": "\u2a66", + "seArr;": "\u21d8", + "searhk;": "\u2925", + "searr;": "\u2198", + "searrow;": "\u2198", + "sect": "\xa7", + "sect;": "\xa7", + "semi;": ";", + "seswar;": "\u2929", + "setminus;": "\u2216", + "setmn;": "\u2216", + "sext;": "\u2736", + "sfr;": "\U0001d530", + "sfrown;": "\u2322", + "sharp;": "\u266f", + "shchcy;": "\u0449", + "shcy;": "\u0448", + "shortmid;": "\u2223", + "shortparallel;": "\u2225", + "shy": "\xad", + "shy;": "\xad", + "sigma;": "\u03c3", + "sigmaf;": "\u03c2", + "sigmav;": "\u03c2", + "sim;": "\u223c", + "simdot;": "\u2a6a", + "sime;": "\u2243", + "simeq;": "\u2243", + "simg;": "\u2a9e", + "simgE;": "\u2aa0", + "siml;": "\u2a9d", + "simlE;": "\u2a9f", + "simne;": "\u2246", + "simplus;": "\u2a24", + "simrarr;": "\u2972", + "slarr;": "\u2190", + "smallsetminus;": "\u2216", + "smashp;": "\u2a33", + "smeparsl;": "\u29e4", + "smid;": "\u2223", + "smile;": "\u2323", + "smt;": "\u2aaa", + "smte;": "\u2aac", + "smtes;": "\u2aac\ufe00", + "softcy;": "\u044c", + "sol;": "/", + "solb;": "\u29c4", + "solbar;": "\u233f", + "sopf;": "\U0001d564", + "spades;": "\u2660", + "spadesuit;": "\u2660", + "spar;": "\u2225", + "sqcap;": "\u2293", + "sqcaps;": "\u2293\ufe00", + "sqcup;": "\u2294", + "sqcups;": "\u2294\ufe00", + "sqsub;": "\u228f", + "sqsube;": "\u2291", + "sqsubset;": "\u228f", + "sqsubseteq;": "\u2291", + "sqsup;": "\u2290", + "sqsupe;": "\u2292", + "sqsupset;": "\u2290", + "sqsupseteq;": "\u2292", + "squ;": "\u25a1", + "square;": "\u25a1", + "squarf;": "\u25aa", + "squf;": "\u25aa", + "srarr;": "\u2192", + "sscr;": "\U0001d4c8", + "ssetmn;": "\u2216", + "ssmile;": "\u2323", + "sstarf;": "\u22c6", + "star;": "\u2606", + "starf;": "\u2605", + "straightepsilon;": "\u03f5", + "straightphi;": "\u03d5", + "strns;": "\xaf", + "sub;": "\u2282", + "subE;": "\u2ac5", + "subdot;": "\u2abd", + "sube;": "\u2286", + "subedot;": "\u2ac3", + "submult;": "\u2ac1", + "subnE;": "\u2acb", + "subne;": "\u228a", + "subplus;": "\u2abf", + "subrarr;": "\u2979", + "subset;": "\u2282", + "subseteq;": "\u2286", + "subseteqq;": "\u2ac5", + "subsetneq;": "\u228a", + "subsetneqq;": "\u2acb", + "subsim;": "\u2ac7", + "subsub;": "\u2ad5", + "subsup;": "\u2ad3", + "succ;": "\u227b", + "succapprox;": "\u2ab8", + "succcurlyeq;": "\u227d", + "succeq;": "\u2ab0", + "succnapprox;": "\u2aba", + "succneqq;": "\u2ab6", + "succnsim;": "\u22e9", + "succsim;": "\u227f", + "sum;": "\u2211", + "sung;": "\u266a", + "sup1": "\xb9", + "sup1;": "\xb9", + "sup2": "\xb2", + "sup2;": "\xb2", + "sup3": "\xb3", + "sup3;": "\xb3", + "sup;": "\u2283", + "supE;": "\u2ac6", + "supdot;": "\u2abe", + "supdsub;": "\u2ad8", + "supe;": "\u2287", + "supedot;": "\u2ac4", + "suphsol;": "\u27c9", + "suphsub;": "\u2ad7", + "suplarr;": "\u297b", + "supmult;": "\u2ac2", + "supnE;": "\u2acc", + "supne;": "\u228b", + "supplus;": "\u2ac0", + "supset;": "\u2283", + "supseteq;": "\u2287", + "supseteqq;": "\u2ac6", + "supsetneq;": "\u228b", + "supsetneqq;": "\u2acc", + "supsim;": "\u2ac8", + "supsub;": "\u2ad4", + "supsup;": "\u2ad6", + "swArr;": "\u21d9", + "swarhk;": "\u2926", + "swarr;": "\u2199", + "swarrow;": "\u2199", + "swnwar;": "\u292a", + "szlig": "\xdf", + "szlig;": "\xdf", + "target;": "\u2316", + "tau;": "\u03c4", + "tbrk;": "\u23b4", + "tcaron;": "\u0165", + "tcedil;": "\u0163", + "tcy;": "\u0442", + "tdot;": "\u20db", + "telrec;": "\u2315", + "tfr;": "\U0001d531", + "there4;": "\u2234", + "therefore;": "\u2234", + "theta;": "\u03b8", + "thetasym;": "\u03d1", + "thetav;": "\u03d1", + "thickapprox;": "\u2248", + "thicksim;": "\u223c", + "thinsp;": "\u2009", + "thkap;": "\u2248", + "thksim;": "\u223c", + "thorn": "\xfe", + "thorn;": "\xfe", + "tilde;": "\u02dc", + "times": "\xd7", + "times;": "\xd7", + "timesb;": "\u22a0", + "timesbar;": "\u2a31", + "timesd;": "\u2a30", + "tint;": "\u222d", + "toea;": "\u2928", + "top;": "\u22a4", + "topbot;": "\u2336", + "topcir;": "\u2af1", + "topf;": "\U0001d565", + "topfork;": "\u2ada", + "tosa;": "\u2929", + "tprime;": "\u2034", + "trade;": "\u2122", + "triangle;": "\u25b5", + "triangledown;": "\u25bf", + "triangleleft;": "\u25c3", + "trianglelefteq;": "\u22b4", + "triangleq;": "\u225c", + "triangleright;": "\u25b9", + "trianglerighteq;": "\u22b5", + "tridot;": "\u25ec", + "trie;": "\u225c", + "triminus;": "\u2a3a", + "triplus;": "\u2a39", + "trisb;": "\u29cd", + "tritime;": "\u2a3b", + "trpezium;": "\u23e2", + "tscr;": "\U0001d4c9", + "tscy;": "\u0446", + "tshcy;": "\u045b", + "tstrok;": "\u0167", + "twixt;": "\u226c", + "twoheadleftarrow;": "\u219e", + "twoheadrightarrow;": "\u21a0", + "uArr;": "\u21d1", + "uHar;": "\u2963", + "uacute": "\xfa", + "uacute;": "\xfa", + "uarr;": "\u2191", + "ubrcy;": "\u045e", + "ubreve;": "\u016d", + "ucirc": "\xfb", + "ucirc;": "\xfb", + "ucy;": "\u0443", + "udarr;": "\u21c5", + "udblac;": "\u0171", + "udhar;": "\u296e", + "ufisht;": "\u297e", + "ufr;": "\U0001d532", + "ugrave": "\xf9", + "ugrave;": "\xf9", + "uharl;": "\u21bf", + "uharr;": "\u21be", + "uhblk;": "\u2580", + "ulcorn;": "\u231c", + "ulcorner;": "\u231c", + "ulcrop;": "\u230f", + "ultri;": "\u25f8", + "umacr;": "\u016b", + "uml": "\xa8", + "uml;": "\xa8", + "uogon;": "\u0173", + "uopf;": "\U0001d566", + "uparrow;": "\u2191", + "updownarrow;": "\u2195", + "upharpoonleft;": "\u21bf", + "upharpoonright;": "\u21be", + "uplus;": "\u228e", + "upsi;": "\u03c5", + "upsih;": "\u03d2", + "upsilon;": "\u03c5", + "upuparrows;": "\u21c8", + "urcorn;": "\u231d", + "urcorner;": "\u231d", + "urcrop;": "\u230e", + "uring;": "\u016f", + "urtri;": "\u25f9", + "uscr;": "\U0001d4ca", + "utdot;": "\u22f0", + "utilde;": "\u0169", + "utri;": "\u25b5", + "utrif;": "\u25b4", + "uuarr;": "\u21c8", + "uuml": "\xfc", + "uuml;": "\xfc", + "uwangle;": "\u29a7", + "vArr;": "\u21d5", + "vBar;": "\u2ae8", + "vBarv;": "\u2ae9", + "vDash;": "\u22a8", + "vangrt;": "\u299c", + "varepsilon;": "\u03f5", + "varkappa;": "\u03f0", + "varnothing;": "\u2205", + "varphi;": "\u03d5", + "varpi;": "\u03d6", + "varpropto;": "\u221d", + "varr;": "\u2195", + "varrho;": "\u03f1", + "varsigma;": "\u03c2", + "varsubsetneq;": "\u228a\ufe00", + "varsubsetneqq;": "\u2acb\ufe00", + "varsupsetneq;": "\u228b\ufe00", + "varsupsetneqq;": "\u2acc\ufe00", + "vartheta;": "\u03d1", + "vartriangleleft;": "\u22b2", + "vartriangleright;": "\u22b3", + "vcy;": "\u0432", + "vdash;": "\u22a2", + "vee;": "\u2228", + "veebar;": "\u22bb", + "veeeq;": "\u225a", + "vellip;": "\u22ee", + "verbar;": "|", + "vert;": "|", + "vfr;": "\U0001d533", + "vltri;": "\u22b2", + "vnsub;": "\u2282\u20d2", + "vnsup;": "\u2283\u20d2", + "vopf;": "\U0001d567", + "vprop;": "\u221d", + "vrtri;": "\u22b3", + "vscr;": "\U0001d4cb", + "vsubnE;": "\u2acb\ufe00", + "vsubne;": "\u228a\ufe00", + "vsupnE;": "\u2acc\ufe00", + "vsupne;": "\u228b\ufe00", + "vzigzag;": "\u299a", + "wcirc;": "\u0175", + "wedbar;": "\u2a5f", + "wedge;": "\u2227", + "wedgeq;": "\u2259", + "weierp;": "\u2118", + "wfr;": "\U0001d534", + "wopf;": "\U0001d568", + "wp;": "\u2118", + "wr;": "\u2240", + "wreath;": "\u2240", + "wscr;": "\U0001d4cc", + "xcap;": "\u22c2", + "xcirc;": "\u25ef", + "xcup;": "\u22c3", + "xdtri;": "\u25bd", + "xfr;": "\U0001d535", + "xhArr;": "\u27fa", + "xharr;": "\u27f7", + "xi;": "\u03be", + "xlArr;": "\u27f8", + "xlarr;": "\u27f5", + "xmap;": "\u27fc", + "xnis;": "\u22fb", + "xodot;": "\u2a00", + "xopf;": "\U0001d569", + "xoplus;": "\u2a01", + "xotime;": "\u2a02", + "xrArr;": "\u27f9", + "xrarr;": "\u27f6", + "xscr;": "\U0001d4cd", + "xsqcup;": "\u2a06", + "xuplus;": "\u2a04", + "xutri;": "\u25b3", + "xvee;": "\u22c1", + "xwedge;": "\u22c0", + "yacute": "\xfd", + "yacute;": "\xfd", + "yacy;": "\u044f", + "ycirc;": "\u0177", + "ycy;": "\u044b", + "yen": "\xa5", + "yen;": "\xa5", + "yfr;": "\U0001d536", + "yicy;": "\u0457", + "yopf;": "\U0001d56a", + "yscr;": "\U0001d4ce", + "yucy;": "\u044e", + "yuml": "\xff", + "yuml;": "\xff", + "zacute;": "\u017a", + "zcaron;": "\u017e", + "zcy;": "\u0437", + "zdot;": "\u017c", + "zeetrf;": "\u2128", + "zeta;": "\u03b6", + "zfr;": "\U0001d537", + "zhcy;": "\u0436", + "zigrarr;": "\u21dd", + "zopf;": "\U0001d56b", + "zscr;": "\U0001d4cf", + "zwj;": "\u200d", + "zwnj;": "\u200c", +} + +replacementCharacters = { + 0x0: "\uFFFD", + 0x0d: "\u000D", + 0x80: "\u20AC", + 0x81: "\u0081", + 0x81: "\u0081", + 0x82: "\u201A", + 0x83: "\u0192", + 0x84: "\u201E", + 0x85: "\u2026", + 0x86: "\u2020", + 0x87: "\u2021", + 0x88: "\u02C6", + 0x89: "\u2030", + 0x8A: "\u0160", + 0x8B: "\u2039", + 0x8C: "\u0152", + 0x8D: "\u008D", + 0x8E: "\u017D", + 0x8F: "\u008F", + 0x90: "\u0090", + 0x91: "\u2018", + 0x92: "\u2019", + 0x93: "\u201C", + 0x94: "\u201D", + 0x95: "\u2022", + 0x96: "\u2013", + 0x97: "\u2014", + 0x98: "\u02DC", + 0x99: "\u2122", + 0x9A: "\u0161", + 0x9B: "\u203A", + 0x9C: "\u0153", + 0x9D: "\u009D", + 0x9E: "\u017E", + 0x9F: "\u0178", +} + +encodings = { + '437': 'cp437', + '850': 'cp850', + '852': 'cp852', + '855': 'cp855', + '857': 'cp857', + '860': 'cp860', + '861': 'cp861', + '862': 'cp862', + '863': 'cp863', + '865': 'cp865', + '866': 'cp866', + '869': 'cp869', + 'ansix341968': 'ascii', + 'ansix341986': 'ascii', + 'arabic': 'iso8859-6', + 'ascii': 'ascii', + 'asmo708': 'iso8859-6', + 'big5': 'big5', + 'big5hkscs': 'big5hkscs', + 'chinese': 'gbk', + 'cp037': 'cp037', + 'cp1026': 'cp1026', + 'cp154': 'ptcp154', + 'cp367': 'ascii', + 'cp424': 'cp424', + 'cp437': 'cp437', + 'cp500': 'cp500', + 'cp775': 'cp775', + 'cp819': 'windows-1252', + 'cp850': 'cp850', + 'cp852': 'cp852', + 'cp855': 'cp855', + 'cp857': 'cp857', + 'cp860': 'cp860', + 'cp861': 'cp861', + 'cp862': 'cp862', + 'cp863': 'cp863', + 'cp864': 'cp864', + 'cp865': 'cp865', + 'cp866': 'cp866', + 'cp869': 'cp869', + 'cp936': 'gbk', + 'cpgr': 'cp869', + 'cpis': 'cp861', + 'csascii': 'ascii', + 'csbig5': 'big5', + 'cseuckr': 'cp949', + 'cseucpkdfmtjapanese': 'euc_jp', + 'csgb2312': 'gbk', + 'cshproman8': 'hp-roman8', + 'csibm037': 'cp037', + 'csibm1026': 'cp1026', + 'csibm424': 'cp424', + 'csibm500': 'cp500', + 'csibm855': 'cp855', + 'csibm857': 'cp857', + 'csibm860': 'cp860', + 'csibm861': 'cp861', + 'csibm863': 'cp863', + 'csibm864': 'cp864', + 'csibm865': 'cp865', + 'csibm866': 'cp866', + 'csibm869': 'cp869', + 'csiso2022jp': 'iso2022_jp', + 'csiso2022jp2': 'iso2022_jp_2', + 'csiso2022kr': 'iso2022_kr', + 'csiso58gb231280': 'gbk', + 'csisolatin1': 'windows-1252', + 'csisolatin2': 'iso8859-2', + 'csisolatin3': 'iso8859-3', + 'csisolatin4': 'iso8859-4', + 'csisolatin5': 'windows-1254', + 'csisolatin6': 'iso8859-10', + 'csisolatinarabic': 'iso8859-6', + 'csisolatincyrillic': 'iso8859-5', + 'csisolatingreek': 'iso8859-7', + 'csisolatinhebrew': 'iso8859-8', + 'cskoi8r': 'koi8-r', + 'csksc56011987': 'cp949', + 'cspc775baltic': 'cp775', + 'cspc850multilingual': 'cp850', + 'cspc862latinhebrew': 'cp862', + 'cspc8codepage437': 'cp437', + 'cspcp852': 'cp852', + 'csptcp154': 'ptcp154', + 'csshiftjis': 'shift_jis', + 'csunicode11utf7': 'utf-7', + 'cyrillic': 'iso8859-5', + 'cyrillicasian': 'ptcp154', + 'ebcdiccpbe': 'cp500', + 'ebcdiccpca': 'cp037', + 'ebcdiccpch': 'cp500', + 'ebcdiccphe': 'cp424', + 'ebcdiccpnl': 'cp037', + 'ebcdiccpus': 'cp037', + 'ebcdiccpwt': 'cp037', + 'ecma114': 'iso8859-6', + 'ecma118': 'iso8859-7', + 'elot928': 'iso8859-7', + 'eucjp': 'euc_jp', + 'euckr': 'cp949', + 'extendedunixcodepackedformatforjapanese': 'euc_jp', + 'gb18030': 'gb18030', + 'gb2312': 'gbk', + 'gb231280': 'gbk', + 'gbk': 'gbk', + 'greek': 'iso8859-7', + 'greek8': 'iso8859-7', + 'hebrew': 'iso8859-8', + 'hproman8': 'hp-roman8', + 'hzgb2312': 'hz', + 'ibm037': 'cp037', + 'ibm1026': 'cp1026', + 'ibm367': 'ascii', + 'ibm424': 'cp424', + 'ibm437': 'cp437', + 'ibm500': 'cp500', + 'ibm775': 'cp775', + 'ibm819': 'windows-1252', + 'ibm850': 'cp850', + 'ibm852': 'cp852', + 'ibm855': 'cp855', + 'ibm857': 'cp857', + 'ibm860': 'cp860', + 'ibm861': 'cp861', + 'ibm862': 'cp862', + 'ibm863': 'cp863', + 'ibm864': 'cp864', + 'ibm865': 'cp865', + 'ibm866': 'cp866', + 'ibm869': 'cp869', + 'iso2022jp': 'iso2022_jp', + 'iso2022jp2': 'iso2022_jp_2', + 'iso2022kr': 'iso2022_kr', + 'iso646irv1991': 'ascii', + 'iso646us': 'ascii', + 'iso88591': 'windows-1252', + 'iso885910': 'iso8859-10', + 'iso8859101992': 'iso8859-10', + 'iso885911987': 'windows-1252', + 'iso885913': 'iso8859-13', + 'iso885914': 'iso8859-14', + 'iso8859141998': 'iso8859-14', + 'iso885915': 'iso8859-15', + 'iso885916': 'iso8859-16', + 'iso8859162001': 'iso8859-16', + 'iso88592': 'iso8859-2', + 'iso885921987': 'iso8859-2', + 'iso88593': 'iso8859-3', + 'iso885931988': 'iso8859-3', + 'iso88594': 'iso8859-4', + 'iso885941988': 'iso8859-4', + 'iso88595': 'iso8859-5', + 'iso885951988': 'iso8859-5', + 'iso88596': 'iso8859-6', + 'iso885961987': 'iso8859-6', + 'iso88597': 'iso8859-7', + 'iso885971987': 'iso8859-7', + 'iso88598': 'iso8859-8', + 'iso885981988': 'iso8859-8', + 'iso88599': 'windows-1254', + 'iso885991989': 'windows-1254', + 'isoceltic': 'iso8859-14', + 'isoir100': 'windows-1252', + 'isoir101': 'iso8859-2', + 'isoir109': 'iso8859-3', + 'isoir110': 'iso8859-4', + 'isoir126': 'iso8859-7', + 'isoir127': 'iso8859-6', + 'isoir138': 'iso8859-8', + 'isoir144': 'iso8859-5', + 'isoir148': 'windows-1254', + 'isoir149': 'cp949', + 'isoir157': 'iso8859-10', + 'isoir199': 'iso8859-14', + 'isoir226': 'iso8859-16', + 'isoir58': 'gbk', + 'isoir6': 'ascii', + 'koi8r': 'koi8-r', + 'koi8u': 'koi8-u', + 'korean': 'cp949', + 'ksc5601': 'cp949', + 'ksc56011987': 'cp949', + 'ksc56011989': 'cp949', + 'l1': 'windows-1252', + 'l10': 'iso8859-16', + 'l2': 'iso8859-2', + 'l3': 'iso8859-3', + 'l4': 'iso8859-4', + 'l5': 'windows-1254', + 'l6': 'iso8859-10', + 'l8': 'iso8859-14', + 'latin1': 'windows-1252', + 'latin10': 'iso8859-16', + 'latin2': 'iso8859-2', + 'latin3': 'iso8859-3', + 'latin4': 'iso8859-4', + 'latin5': 'windows-1254', + 'latin6': 'iso8859-10', + 'latin8': 'iso8859-14', + 'latin9': 'iso8859-15', + 'ms936': 'gbk', + 'mskanji': 'shift_jis', + 'pt154': 'ptcp154', + 'ptcp154': 'ptcp154', + 'r8': 'hp-roman8', + 'roman8': 'hp-roman8', + 'shiftjis': 'shift_jis', + 'tis620': 'cp874', + 'unicode11utf7': 'utf-7', + 'us': 'ascii', + 'usascii': 'ascii', + 'utf16': 'utf-16', + 'utf16be': 'utf-16-be', + 'utf16le': 'utf-16-le', + 'utf8': 'utf-8', + 'windows1250': 'cp1250', + 'windows1251': 'cp1251', + 'windows1252': 'cp1252', + 'windows1253': 'cp1253', + 'windows1254': 'cp1254', + 'windows1255': 'cp1255', + 'windows1256': 'cp1256', + 'windows1257': 'cp1257', + 'windows1258': 'cp1258', + 'windows936': 'gbk', + 'x-x-big5': 'big5'} + +tokenTypes = { + "Doctype": 0, + "Characters": 1, + "SpaceCharacters": 2, + "StartTag": 3, + "EndTag": 4, + "EmptyTag": 5, + "Comment": 6, + "ParseError": 7 +} + +tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], + tokenTypes["EmptyTag"]]) + + +prefixes = dict([(v, k) for k, v in namespaces.items()]) +prefixes["http://www.w3.org/1998/Math/MathML"] = "math" + + +class DataLossWarning(UserWarning): + pass + + +class ReparseException(Exception): + pass diff --git a/lib/html5lib/filters/__init__.py b/lib/html5lib/filters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/html5lib/filters/_base.py b/lib/html5lib/filters/_base.py new file mode 100644 index 00000000..c7dbaed0 --- /dev/null +++ b/lib/html5lib/filters/_base.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + + +class Filter(object): + def __init__(self, source): + self.source = source + + def __iter__(self): + return iter(self.source) + + def __getattr__(self, name): + return getattr(self.source, name) diff --git a/lib/html5lib/filters/alphabeticalattributes.py b/lib/html5lib/filters/alphabeticalattributes.py new file mode 100644 index 00000000..fed6996c --- /dev/null +++ b/lib/html5lib/filters/alphabeticalattributes.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base + +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict + + +class Filter(_base.Filter): + def __iter__(self): + for token in _base.Filter.__iter__(self): + if token["type"] in ("StartTag", "EmptyTag"): + attrs = OrderedDict() + for name, value in sorted(token["data"].items(), + key=lambda x: x[0]): + attrs[name] = value + token["data"] = attrs + yield token diff --git a/lib/html5lib/filters/inject_meta_charset.py b/lib/html5lib/filters/inject_meta_charset.py new file mode 100644 index 00000000..ca33b70b --- /dev/null +++ b/lib/html5lib/filters/inject_meta_charset.py @@ -0,0 +1,65 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base + + +class Filter(_base.Filter): + def __init__(self, source, encoding): + _base.Filter.__init__(self, source) + self.encoding = encoding + + def __iter__(self): + state = "pre_head" + meta_found = (self.encoding is None) + pending = [] + + for token in _base.Filter.__iter__(self): + type = token["type"] + if type == "StartTag": + if token["name"].lower() == "head": + state = "in_head" + + elif type == "EmptyTag": + if token["name"].lower() == "meta": + # replace charset with actual encoding + has_http_equiv_content_type = False + for (namespace, name), value in token["data"].items(): + if namespace is not None: + continue + elif name.lower() == 'charset': + token["data"][(namespace, name)] = self.encoding + meta_found = True + break + elif name == 'http-equiv' and value.lower() == 'content-type': + has_http_equiv_content_type = True + else: + if has_http_equiv_content_type and (None, "content") in token["data"]: + token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding + meta_found = True + + elif token["name"].lower() == "head" and not meta_found: + # insert meta into empty head + yield {"type": "StartTag", "name": "head", + "data": token["data"]} + yield {"type": "EmptyTag", "name": "meta", + "data": {(None, "charset"): self.encoding}} + yield {"type": "EndTag", "name": "head"} + meta_found = True + continue + + elif type == "EndTag": + if token["name"].lower() == "head" and pending: + # insert meta into head (if necessary) and flush pending queue + yield pending.pop(0) + if not meta_found: + yield {"type": "EmptyTag", "name": "meta", + "data": {(None, "charset"): self.encoding}} + while pending: + yield pending.pop(0) + meta_found = True + state = "post_head" + + if state == "in_head": + pending.append(token) + else: + yield token diff --git a/lib/html5lib/filters/lint.py b/lib/html5lib/filters/lint.py new file mode 100644 index 00000000..8884696d --- /dev/null +++ b/lib/html5lib/filters/lint.py @@ -0,0 +1,90 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base +from ..constants import cdataElements, rcdataElements, voidElements + +from ..constants import spaceCharacters +spaceCharacters = "".join(spaceCharacters) + + +class LintError(Exception): + pass + + +class Filter(_base.Filter): + def __iter__(self): + open_elements = [] + contentModelFlag = "PCDATA" + for token in _base.Filter.__iter__(self): + type = token["type"] + if type in ("StartTag", "EmptyTag"): + name = token["name"] + if contentModelFlag != "PCDATA": + raise LintError("StartTag not in PCDATA content model flag: %(tag)s" % {"tag": name}) + if not isinstance(name, str): + raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) + if not name: + raise LintError("Empty tag name") + if type == "StartTag" and name in voidElements: + raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name}) + elif type == "EmptyTag" and name not in voidElements: + raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]}) + if type == "StartTag": + open_elements.append(name) + for name, value in token["data"]: + if not isinstance(name, str): + raise LintError("Attribute name is not a string: %(name)r" % {"name": name}) + if not name: + raise LintError("Empty attribute name") + if not isinstance(value, str): + raise LintError("Attribute value is not a string: %(value)r" % {"value": value}) + if name in cdataElements: + contentModelFlag = "CDATA" + elif name in rcdataElements: + contentModelFlag = "RCDATA" + elif name == "plaintext": + contentModelFlag = "PLAINTEXT" + + elif type == "EndTag": + name = token["name"] + if not isinstance(name, str): + raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) + if not name: + raise LintError("Empty tag name") + if name in voidElements: + raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name}) + start_name = open_elements.pop() + if start_name != name: + raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name}) + contentModelFlag = "PCDATA" + + elif type == "Comment": + if contentModelFlag != "PCDATA": + raise LintError("Comment not in PCDATA content model flag") + + elif type in ("Characters", "SpaceCharacters"): + data = token["data"] + if not isinstance(data, str): + raise LintError("Attribute name is not a string: %(name)r" % {"name": data}) + if not data: + raise LintError("%(type)s token with empty data" % {"type": type}) + if type == "SpaceCharacters": + data = data.strip(spaceCharacters) + if data: + raise LintError("Non-space character(s) found in SpaceCharacters token: %(token)r" % {"token": data}) + + elif type == "Doctype": + name = token["name"] + if contentModelFlag != "PCDATA": + raise LintError("Doctype not in PCDATA content model flag: %(name)s" % {"name": name}) + if not isinstance(name, str): + raise LintError("Tag name is not a string: %(tag)r" % {"tag": name}) + # XXX: what to do with token["data"] ? + + elif type in ("ParseError", "SerializeError"): + pass + + else: + raise LintError("Unknown token type: %(type)s" % {"type": type}) + + yield token diff --git a/lib/html5lib/filters/optionaltags.py b/lib/html5lib/filters/optionaltags.py new file mode 100644 index 00000000..fefe0b30 --- /dev/null +++ b/lib/html5lib/filters/optionaltags.py @@ -0,0 +1,205 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base + + +class Filter(_base.Filter): + def slider(self): + previous1 = previous2 = None + for token in self.source: + if previous1 is not None: + yield previous2, previous1, token + previous2 = previous1 + previous1 = token + yield previous2, previous1, None + + def __iter__(self): + for previous, token, next in self.slider(): + type = token["type"] + if type == "StartTag": + if (token["data"] or + not self.is_optional_start(token["name"], previous, next)): + yield token + elif type == "EndTag": + if not self.is_optional_end(token["name"], next): + yield token + else: + yield token + + def is_optional_start(self, tagname, previous, next): + type = next and next["type"] or None + if tagname in 'html': + # An html element's start tag may be omitted if the first thing + # inside the html element is not a space character or a comment. + return type not in ("Comment", "SpaceCharacters") + elif tagname == 'head': + # A head element's start tag may be omitted if the first thing + # inside the head element is an element. + # XXX: we also omit the start tag if the head element is empty + if type in ("StartTag", "EmptyTag"): + return True + elif type == "EndTag": + return next["name"] == "head" + elif tagname == 'body': + # A body element's start tag may be omitted if the first thing + # inside the body element is not a space character or a comment, + # except if the first thing inside the body element is a script + # or style element and the node immediately preceding the body + # element is a head element whose end tag has been omitted. + if type in ("Comment", "SpaceCharacters"): + return False + elif type == "StartTag": + # XXX: we do not look at the preceding event, so we never omit + # the body element's start tag if it's followed by a script or + # a style element. + return next["name"] not in ('script', 'style') + else: + return True + elif tagname == 'colgroup': + # A colgroup element's start tag may be omitted if the first thing + # inside the colgroup element is a col element, and if the element + # is not immediately preceeded by another colgroup element whose + # end tag has been omitted. + if type in ("StartTag", "EmptyTag"): + # XXX: we do not look at the preceding event, so instead we never + # omit the colgroup element's end tag when it is immediately + # followed by another colgroup element. See is_optional_end. + return next["name"] == "col" + else: + return False + elif tagname == 'tbody': + # A tbody element's start tag may be omitted if the first thing + # inside the tbody element is a tr element, and if the element is + # not immediately preceeded by a tbody, thead, or tfoot element + # whose end tag has been omitted. + if type == "StartTag": + # omit the thead and tfoot elements' end tag when they are + # immediately followed by a tbody element. See is_optional_end. + if previous and previous['type'] == 'EndTag' and \ + previous['name'] in ('tbody', 'thead', 'tfoot'): + return False + return next["name"] == 'tr' + else: + return False + return False + + def is_optional_end(self, tagname, next): + type = next and next["type"] or None + if tagname in ('html', 'head', 'body'): + # An html element's end tag may be omitted if the html element + # is not immediately followed by a space character or a comment. + return type not in ("Comment", "SpaceCharacters") + elif tagname in ('li', 'optgroup', 'tr'): + # A li element's end tag may be omitted if the li element is + # immediately followed by another li element or if there is + # no more content in the parent element. + # An optgroup element's end tag may be omitted if the optgroup + # element is immediately followed by another optgroup element, + # or if there is no more content in the parent element. + # A tr element's end tag may be omitted if the tr element is + # immediately followed by another tr element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] == tagname + else: + return type == "EndTag" or type is None + elif tagname in ('dt', 'dd'): + # A dt element's end tag may be omitted if the dt element is + # immediately followed by another dt element or a dd element. + # A dd element's end tag may be omitted if the dd element is + # immediately followed by another dd element or a dt element, + # or if there is no more content in the parent element. + if type == "StartTag": + return next["name"] in ('dt', 'dd') + elif tagname == 'dd': + return type == "EndTag" or type is None + else: + return False + elif tagname == 'p': + # A p element's end tag may be omitted if the p element is + # immediately followed by an address, article, aside, + # blockquote, datagrid, dialog, dir, div, dl, fieldset, + # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, + # nav, ol, p, pre, section, table, or ul, element, or if + # there is no more content in the parent element. + if type in ("StartTag", "EmptyTag"): + return next["name"] in ('address', 'article', 'aside', + 'blockquote', 'datagrid', 'dialog', + 'dir', 'div', 'dl', 'fieldset', 'footer', + 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + 'header', 'hr', 'menu', 'nav', 'ol', + 'p', 'pre', 'section', 'table', 'ul') + else: + return type == "EndTag" or type is None + elif tagname == 'option': + # An option element's end tag may be omitted if the option + # element is immediately followed by another option element, + # or if it is immediately followed by an optgroup + # element, or if there is no more content in the parent + # element. + if type == "StartTag": + return next["name"] in ('option', 'optgroup') + else: + return type == "EndTag" or type is None + elif tagname in ('rt', 'rp'): + # An rt element's end tag may be omitted if the rt element is + # immediately followed by an rt or rp element, or if there is + # no more content in the parent element. + # An rp element's end tag may be omitted if the rp element is + # immediately followed by an rt or rp element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] in ('rt', 'rp') + else: + return type == "EndTag" or type is None + elif tagname == 'colgroup': + # A colgroup element's end tag may be omitted if the colgroup + # element is not immediately followed by a space character or + # a comment. + if type in ("Comment", "SpaceCharacters"): + return False + elif type == "StartTag": + # XXX: we also look for an immediately following colgroup + # element. See is_optional_start. + return next["name"] != 'colgroup' + else: + return True + elif tagname in ('thead', 'tbody'): + # A thead element's end tag may be omitted if the thead element + # is immediately followed by a tbody or tfoot element. + # A tbody element's end tag may be omitted if the tbody element + # is immediately followed by a tbody or tfoot element, or if + # there is no more content in the parent element. + # A tfoot element's end tag may be omitted if the tfoot element + # is immediately followed by a tbody element, or if there is no + # more content in the parent element. + # XXX: we never omit the end tag when the following element is + # a tbody. See is_optional_start. + if type == "StartTag": + return next["name"] in ['tbody', 'tfoot'] + elif tagname == 'tbody': + return type == "EndTag" or type is None + else: + return False + elif tagname == 'tfoot': + # A tfoot element's end tag may be omitted if the tfoot element + # is immediately followed by a tbody element, or if there is no + # more content in the parent element. + # XXX: we never omit the end tag when the following element is + # a tbody. See is_optional_start. + if type == "StartTag": + return next["name"] == 'tbody' + else: + return type == "EndTag" or type is None + elif tagname in ('td', 'th'): + # A td element's end tag may be omitted if the td element is + # immediately followed by a td or th element, or if there is + # no more content in the parent element. + # A th element's end tag may be omitted if the th element is + # immediately followed by a td or th element, or if there is + # no more content in the parent element. + if type == "StartTag": + return next["name"] in ('td', 'th') + else: + return type == "EndTag" or type is None + return False diff --git a/lib/html5lib/filters/sanitizer.py b/lib/html5lib/filters/sanitizer.py new file mode 100644 index 00000000..b206b54e --- /dev/null +++ b/lib/html5lib/filters/sanitizer.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base +from ..sanitizer import HTMLSanitizerMixin + + +class Filter(_base.Filter, HTMLSanitizerMixin): + def __iter__(self): + for token in _base.Filter.__iter__(self): + token = self.sanitize_token(token) + if token: + yield token diff --git a/lib/html5lib/filters/whitespace.py b/lib/html5lib/filters/whitespace.py new file mode 100644 index 00000000..dfc60eeb --- /dev/null +++ b/lib/html5lib/filters/whitespace.py @@ -0,0 +1,38 @@ +from __future__ import absolute_import, division, unicode_literals + +import re + +from . import _base +from ..constants import rcdataElements, spaceCharacters +spaceCharacters = "".join(spaceCharacters) + +SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) + + +class Filter(_base.Filter): + + spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) + + def __iter__(self): + preserve = 0 + for token in _base.Filter.__iter__(self): + type = token["type"] + if type == "StartTag" \ + and (preserve or token["name"] in self.spacePreserveElements): + preserve += 1 + + elif type == "EndTag" and preserve: + preserve -= 1 + + elif not preserve and type == "SpaceCharacters" and token["data"]: + # Test on token["data"] above to not introduce spaces where there were not + token["data"] = " " + + elif not preserve and type == "Characters": + token["data"] = collapse_spaces(token["data"]) + + yield token + + +def collapse_spaces(text): + return SPACES_REGEX.sub(' ', text) diff --git a/lib/html5lib/html5parser.py b/lib/html5lib/html5parser.py new file mode 100644 index 00000000..12aa6a35 --- /dev/null +++ b/lib/html5lib/html5parser.py @@ -0,0 +1,2724 @@ +from __future__ import absolute_import, division, unicode_literals +from six import with_metaclass + +import types + +from . import inputstream +from . import tokenizer + +from . import treebuilders +from .treebuilders._base import Marker + +from . import utils +from . import constants +from .constants import spaceCharacters, asciiUpper2Lower +from .constants import specialElements +from .constants import headingElements +from .constants import cdataElements, rcdataElements +from .constants import tokenTypes, ReparseException, namespaces +from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements +from .constants import adjustForeignAttributes as adjustForeignAttributesMap +from .constants import E + + +def parse(doc, treebuilder="etree", encoding=None, + namespaceHTMLElements=True): + """Parse a string or file-like object into a tree""" + tb = treebuilders.getTreeBuilder(treebuilder) + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) + return p.parse(doc, encoding=encoding) + + +def parseFragment(doc, container="div", treebuilder="etree", encoding=None, + namespaceHTMLElements=True): + tb = treebuilders.getTreeBuilder(treebuilder) + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) + return p.parseFragment(doc, container=container, encoding=encoding) + + +def method_decorator_metaclass(function): + class Decorated(type): + def __new__(meta, classname, bases, classDict): + for attributeName, attribute in classDict.items(): + if isinstance(attribute, types.FunctionType): + attribute = function(attribute) + + classDict[attributeName] = attribute + return type.__new__(meta, classname, bases, classDict) + return Decorated + + +class HTMLParser(object): + """HTML parser. Generates a tree structure from a stream of (possibly + malformed) HTML""" + + def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer, + strict=False, namespaceHTMLElements=True, debug=False): + """ + strict - raise an exception when a parse error is encountered + + tree - a treebuilder class controlling the type of tree that will be + returned. Built in treebuilders can be accessed through + html5lib.treebuilders.getTreeBuilder(treeType) + + tokenizer - a class that provides a stream of tokens to the treebuilder. + This may be replaced for e.g. a sanitizer which converts some tags to + text + """ + + # Raise an exception on the first error encountered + self.strict = strict + + if tree is None: + tree = treebuilders.getTreeBuilder("etree") + self.tree = tree(namespaceHTMLElements) + self.tokenizer_class = tokenizer + self.errors = [] + + self.phases = dict([(name, cls(self, self.tree)) for name, cls in + getPhases(debug).items()]) + + def _parse(self, stream, innerHTML=False, container="div", + encoding=None, parseMeta=True, useChardet=True, **kwargs): + + self.innerHTMLMode = innerHTML + self.container = container + self.tokenizer = self.tokenizer_class(stream, encoding=encoding, + parseMeta=parseMeta, + useChardet=useChardet, + parser=self, **kwargs) + self.reset() + + while True: + try: + self.mainLoop() + break + except ReparseException: + self.reset() + + def reset(self): + self.tree.reset() + self.firstStartTag = False + self.errors = [] + self.log = [] # only used with debug mode + # "quirks" / "limited quirks" / "no quirks" + self.compatMode = "no quirks" + + if self.innerHTMLMode: + self.innerHTML = self.container.lower() + + if self.innerHTML in cdataElements: + self.tokenizer.state = self.tokenizer.rcdataState + elif self.innerHTML in rcdataElements: + self.tokenizer.state = self.tokenizer.rawtextState + elif self.innerHTML == 'plaintext': + self.tokenizer.state = self.tokenizer.plaintextState + else: + # state already is data state + # self.tokenizer.state = self.tokenizer.dataState + pass + self.phase = self.phases["beforeHtml"] + self.phase.insertHtmlElement() + self.resetInsertionMode() + else: + self.innerHTML = False + self.phase = self.phases["initial"] + + self.lastPhase = None + + self.beforeRCDataPhase = None + + self.framesetOK = True + + @property + def documentEncoding(self): + """The name of the character encoding + that was used to decode the input stream, + or :obj:`None` if that is not determined yet. + + """ + if not hasattr(self, 'tokenizer'): + return None + return self.tokenizer.stream.charEncoding[0] + + def isHTMLIntegrationPoint(self, element): + if (element.name == "annotation-xml" and + element.namespace == namespaces["mathml"]): + return ("encoding" in element.attributes and + element.attributes["encoding"].translate( + asciiUpper2Lower) in + ("text/html", "application/xhtml+xml")) + else: + return (element.namespace, element.name) in htmlIntegrationPointElements + + def isMathMLTextIntegrationPoint(self, element): + return (element.namespace, element.name) in mathmlTextIntegrationPointElements + + def mainLoop(self): + CharactersToken = tokenTypes["Characters"] + SpaceCharactersToken = tokenTypes["SpaceCharacters"] + StartTagToken = tokenTypes["StartTag"] + EndTagToken = tokenTypes["EndTag"] + CommentToken = tokenTypes["Comment"] + DoctypeToken = tokenTypes["Doctype"] + ParseErrorToken = tokenTypes["ParseError"] + + for token in self.normalizedTokens(): + new_token = token + while new_token is not None: + currentNode = self.tree.openElements[-1] if self.tree.openElements else None + currentNodeNamespace = currentNode.namespace if currentNode else None + currentNodeName = currentNode.name if currentNode else None + + type = new_token["type"] + + if type == ParseErrorToken: + self.parseError(new_token["data"], new_token.get("datavars", {})) + new_token = None + else: + if (len(self.tree.openElements) == 0 or + currentNodeNamespace == self.tree.defaultNamespace or + (self.isMathMLTextIntegrationPoint(currentNode) and + ((type == StartTagToken and + token["name"] not in frozenset(["mglyph", "malignmark"])) or + type in (CharactersToken, SpaceCharactersToken))) or + (currentNodeNamespace == namespaces["mathml"] and + currentNodeName == "annotation-xml" and + token["name"] == "svg") or + (self.isHTMLIntegrationPoint(currentNode) and + type in (StartTagToken, CharactersToken, SpaceCharactersToken))): + phase = self.phase + else: + phase = self.phases["inForeignContent"] + + if type == CharactersToken: + new_token = phase.processCharacters(new_token) + elif type == SpaceCharactersToken: + new_token = phase.processSpaceCharacters(new_token) + elif type == StartTagToken: + new_token = phase.processStartTag(new_token) + elif type == EndTagToken: + new_token = phase.processEndTag(new_token) + elif type == CommentToken: + new_token = phase.processComment(new_token) + elif type == DoctypeToken: + new_token = phase.processDoctype(new_token) + + if (type == StartTagToken and token["selfClosing"] + and not token["selfClosingAcknowledged"]): + self.parseError("non-void-element-with-trailing-solidus", + {"name": token["name"]}) + + # When the loop finishes it's EOF + reprocess = True + phases = [] + while reprocess: + phases.append(self.phase) + reprocess = self.phase.processEOF() + if reprocess: + assert self.phase not in phases + + def normalizedTokens(self): + for token in self.tokenizer: + yield self.normalizeToken(token) + + def parse(self, stream, encoding=None, parseMeta=True, useChardet=True): + """Parse a HTML document into a well-formed tree + + stream - a filelike object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + """ + self._parse(stream, innerHTML=False, encoding=encoding, + parseMeta=parseMeta, useChardet=useChardet) + return self.tree.getDocument() + + def parseFragment(self, stream, container="div", encoding=None, + parseMeta=False, useChardet=True): + """Parse a HTML fragment into a well-formed tree fragment + + container - name of the element we're setting the innerHTML property + if set to None, default to 'div' + + stream - a filelike object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + """ + self._parse(stream, True, container=container, encoding=encoding) + return self.tree.getFragment() + + def parseError(self, errorcode="XXX-undefined-error", datavars={}): + # XXX The idea is to make errorcode mandatory. + self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) + if self.strict: + raise ParseError(E[errorcode] % datavars) + + def normalizeToken(self, token): + """ HTML5 specific normalizations to the token stream """ + + if token["type"] == tokenTypes["StartTag"]: + token["data"] = dict(token["data"][::-1]) + + return token + + def adjustMathMLAttributes(self, token): + replacements = {"definitionurl": "definitionURL"} + for k, v in replacements.items(): + if k in token["data"]: + token["data"][v] = token["data"][k] + del token["data"][k] + + def adjustSVGAttributes(self, token): + replacements = { + "attributename": "attributeName", + "attributetype": "attributeType", + "basefrequency": "baseFrequency", + "baseprofile": "baseProfile", + "calcmode": "calcMode", + "clippathunits": "clipPathUnits", + "contentscripttype": "contentScriptType", + "contentstyletype": "contentStyleType", + "diffuseconstant": "diffuseConstant", + "edgemode": "edgeMode", + "externalresourcesrequired": "externalResourcesRequired", + "filterres": "filterRes", + "filterunits": "filterUnits", + "glyphref": "glyphRef", + "gradienttransform": "gradientTransform", + "gradientunits": "gradientUnits", + "kernelmatrix": "kernelMatrix", + "kernelunitlength": "kernelUnitLength", + "keypoints": "keyPoints", + "keysplines": "keySplines", + "keytimes": "keyTimes", + "lengthadjust": "lengthAdjust", + "limitingconeangle": "limitingConeAngle", + "markerheight": "markerHeight", + "markerunits": "markerUnits", + "markerwidth": "markerWidth", + "maskcontentunits": "maskContentUnits", + "maskunits": "maskUnits", + "numoctaves": "numOctaves", + "pathlength": "pathLength", + "patterncontentunits": "patternContentUnits", + "patterntransform": "patternTransform", + "patternunits": "patternUnits", + "pointsatx": "pointsAtX", + "pointsaty": "pointsAtY", + "pointsatz": "pointsAtZ", + "preservealpha": "preserveAlpha", + "preserveaspectratio": "preserveAspectRatio", + "primitiveunits": "primitiveUnits", + "refx": "refX", + "refy": "refY", + "repeatcount": "repeatCount", + "repeatdur": "repeatDur", + "requiredextensions": "requiredExtensions", + "requiredfeatures": "requiredFeatures", + "specularconstant": "specularConstant", + "specularexponent": "specularExponent", + "spreadmethod": "spreadMethod", + "startoffset": "startOffset", + "stddeviation": "stdDeviation", + "stitchtiles": "stitchTiles", + "surfacescale": "surfaceScale", + "systemlanguage": "systemLanguage", + "tablevalues": "tableValues", + "targetx": "targetX", + "targety": "targetY", + "textlength": "textLength", + "viewbox": "viewBox", + "viewtarget": "viewTarget", + "xchannelselector": "xChannelSelector", + "ychannelselector": "yChannelSelector", + "zoomandpan": "zoomAndPan" + } + for originalName in list(token["data"].keys()): + if originalName in replacements: + svgName = replacements[originalName] + token["data"][svgName] = token["data"][originalName] + del token["data"][originalName] + + def adjustForeignAttributes(self, token): + replacements = adjustForeignAttributesMap + + for originalName in token["data"].keys(): + if originalName in replacements: + foreignName = replacements[originalName] + token["data"][foreignName] = token["data"][originalName] + del token["data"][originalName] + + def reparseTokenNormal(self, token): + self.parser.phase() + + def resetInsertionMode(self): + # The name of this method is mostly historical. (It's also used in the + # specification.) + last = False + newModes = { + "select": "inSelect", + "td": "inCell", + "th": "inCell", + "tr": "inRow", + "tbody": "inTableBody", + "thead": "inTableBody", + "tfoot": "inTableBody", + "caption": "inCaption", + "colgroup": "inColumnGroup", + "table": "inTable", + "head": "inBody", + "body": "inBody", + "frameset": "inFrameset", + "html": "beforeHead" + } + for node in self.tree.openElements[::-1]: + nodeName = node.name + new_phase = None + if node == self.tree.openElements[0]: + assert self.innerHTML + last = True + nodeName = self.innerHTML + # Check for conditions that should only happen in the innerHTML + # case + if nodeName in ("select", "colgroup", "head", "html"): + assert self.innerHTML + + if not last and node.namespace != self.tree.defaultNamespace: + continue + + if nodeName in newModes: + new_phase = self.phases[newModes[nodeName]] + break + elif last: + new_phase = self.phases["inBody"] + break + + self.phase = new_phase + + def parseRCDataRawtext(self, token, contentType): + """Generic RCDATA/RAWTEXT Parsing algorithm + contentType - RCDATA or RAWTEXT + """ + assert contentType in ("RAWTEXT", "RCDATA") + + self.tree.insertElement(token) + + if contentType == "RAWTEXT": + self.tokenizer.state = self.tokenizer.rawtextState + else: + self.tokenizer.state = self.tokenizer.rcdataState + + self.originalPhase = self.phase + + self.phase = self.phases["text"] + + +def getPhases(debug): + def log(function): + """Logger that records which phase processes each token""" + type_names = dict((value, key) for key, value in + constants.tokenTypes.items()) + + def wrapped(self, *args, **kwargs): + if function.__name__.startswith("process") and len(args) > 0: + token = args[0] + try: + info = {"type": type_names[token['type']]} + except: + raise + if token['type'] in constants.tagTokenTypes: + info["name"] = token['name'] + + self.parser.log.append((self.parser.tokenizer.state.__name__, + self.parser.phase.__class__.__name__, + self.__class__.__name__, + function.__name__, + info)) + return function(self, *args, **kwargs) + else: + return function(self, *args, **kwargs) + return wrapped + + def getMetaclass(use_metaclass, metaclass_func): + if use_metaclass: + return method_decorator_metaclass(metaclass_func) + else: + return type + + class Phase(with_metaclass(getMetaclass(debug, log))): + """Base class for helper object that implements each phase of processing + """ + + def __init__(self, parser, tree): + self.parser = parser + self.tree = tree + + def processEOF(self): + raise NotImplementedError + + def processComment(self, token): + # For most phases the following is correct. Where it's not it will be + # overridden. + self.tree.insertComment(token, self.tree.openElements[-1]) + + def processDoctype(self, token): + self.parser.parseError("unexpected-doctype") + + def processCharacters(self, token): + self.tree.insertText(token["data"]) + + def processSpaceCharacters(self, token): + self.tree.insertText(token["data"]) + + def processStartTag(self, token): + return self.startTagHandler[token["name"]](token) + + def startTagHtml(self, token): + if not self.parser.firstStartTag and token["name"] == "html": + self.parser.parseError("non-html-root") + # XXX Need a check here to see if the first start tag token emitted is + # this token... If it's not, invoke self.parser.parseError(). + for attr, value in token["data"].items(): + if attr not in self.tree.openElements[0].attributes: + self.tree.openElements[0].attributes[attr] = value + self.parser.firstStartTag = False + + def processEndTag(self, token): + return self.endTagHandler[token["name"]](token) + + class InitialPhase(Phase): + def processSpaceCharacters(self, token): + pass + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + correct = token["correct"] + + if (name != "html" or publicId is not None or + systemId is not None and systemId != "about:legacy-compat"): + self.parser.parseError("unknown-doctype") + + if publicId is None: + publicId = "" + + self.tree.insertDoctype(token) + + if publicId != "": + publicId = publicId.translate(asciiUpper2Lower) + + if (not correct or token["name"] != "html" + or publicId.startswith( + ("+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//")) + or publicId in + ("-//w3o//dtd w3 html strict 3.0//en//", + "-/w3c/dtd html 4.0 transitional/en", + "html") + or publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is None + or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): + self.parser.compatMode = "quirks" + elif (publicId.startswith( + ("-//w3c//dtd xhtml 1.0 frameset//", + "-//w3c//dtd xhtml 1.0 transitional//")) + or publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is not None): + self.parser.compatMode = "limited quirks" + + self.parser.phase = self.parser.phases["beforeHtml"] + + def anythingElse(self): + self.parser.compatMode = "quirks" + self.parser.phase = self.parser.phases["beforeHtml"] + + def processCharacters(self, token): + self.parser.parseError("expected-doctype-but-got-chars") + self.anythingElse() + return token + + def processStartTag(self, token): + self.parser.parseError("expected-doctype-but-got-start-tag", + {"name": token["name"]}) + self.anythingElse() + return token + + def processEndTag(self, token): + self.parser.parseError("expected-doctype-but-got-end-tag", + {"name": token["name"]}) + self.anythingElse() + return token + + def processEOF(self): + self.parser.parseError("expected-doctype-but-got-eof") + self.anythingElse() + return True + + class BeforeHtmlPhase(Phase): + # helper methods + def insertHtmlElement(self): + self.tree.insertRoot(impliedTagToken("html", "StartTag")) + self.parser.phase = self.parser.phases["beforeHead"] + + # other + def processEOF(self): + self.insertHtmlElement() + return True + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processSpaceCharacters(self, token): + pass + + def processCharacters(self, token): + self.insertHtmlElement() + return token + + def processStartTag(self, token): + if token["name"] == "html": + self.parser.firstStartTag = True + self.insertHtmlElement() + return token + + def processEndTag(self, token): + if token["name"] not in ("head", "body", "html", "br"): + self.parser.parseError("unexpected-end-tag-before-html", + {"name": token["name"]}) + else: + self.insertHtmlElement() + return token + + class BeforeHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + + self.endTagHandler = utils.MethodDispatcher([ + (("head", "body", "html", "br"), self.endTagImplyHead) + ]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.startTagHead(impliedTagToken("head", "StartTag")) + return True + + def processSpaceCharacters(self, token): + pass + + def processCharacters(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagHead(self, token): + self.tree.insertElement(token) + self.tree.headPointer = self.tree.openElements[-1] + self.parser.phase = self.parser.phases["inHead"] + + def startTagOther(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def endTagImplyHead(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def endTagOther(self, token): + self.parser.parseError("end-tag-after-implied-root", + {"name": token["name"]}) + + class InHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("title", self.startTagTitle), + (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle), + ("script", self.startTagScript), + (("base", "basefont", "bgsound", "command", "link"), + self.startTagBaseLinkCommand), + ("meta", self.startTagMeta), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + + self. endTagHandler = utils.MethodDispatcher([ + ("head", self.endTagHead), + (("br", "html", "body"), self.endTagHtmlBodyBr) + ]) + self.endTagHandler.default = self.endTagOther + + # the real thing + def processEOF(self): + self.anythingElse() + return True + + def processCharacters(self, token): + self.anythingElse() + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagHead(self, token): + self.parser.parseError("two-heads-are-not-better-than-one") + + def startTagBaseLinkCommand(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + def startTagMeta(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + attributes = token["data"] + if self.parser.tokenizer.stream.charEncoding[1] == "tentative": + if "charset" in attributes: + self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) + elif ("content" in attributes and + "http-equiv" in attributes and + attributes["http-equiv"].lower() == "content-type"): + # Encoding it as UTF-8 here is a hack, as really we should pass + # the abstract Unicode string, and just use the + # ContentAttrParser on that, but using UTF-8 allows all chars + # to be encoded and as a ASCII-superset works. + data = inputstream.EncodingBytes(attributes["content"].encode("utf-8")) + parser = inputstream.ContentAttrParser(data) + codec = parser.parse() + self.parser.tokenizer.stream.changeEncoding(codec) + + def startTagTitle(self, token): + self.parser.parseRCDataRawtext(token, "RCDATA") + + def startTagNoScriptNoFramesStyle(self, token): + # Need to decide whether to implement the scripting-disabled case + self.parser.parseRCDataRawtext(token, "RAWTEXT") + + def startTagScript(self, token): + self.tree.insertElement(token) + self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState + self.parser.originalPhase = self.parser.phase + self.parser.phase = self.parser.phases["text"] + + def startTagOther(self, token): + self.anythingElse() + return token + + def endTagHead(self, token): + node = self.parser.tree.openElements.pop() + assert node.name == "head", "Expected head got %s" % node.name + self.parser.phase = self.parser.phases["afterHead"] + + def endTagHtmlBodyBr(self, token): + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + self.endTagHead(impliedTagToken("head")) + + # XXX If we implement a parser for which scripting is disabled we need to + # implement this phase. + # + # class InHeadNoScriptPhase(Phase): + class AfterHeadPhase(Phase): + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + self.startTagHandler = utils.MethodDispatcher([ + ("html", self.startTagHtml), + ("body", self.startTagBody), + ("frameset", self.startTagFrameset), + (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", + "style", "title"), + self.startTagFromHead), + ("head", self.startTagHead) + ]) + self.startTagHandler.default = self.startTagOther + self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"), + self.endTagHtmlBodyBr)]) + self.endTagHandler.default = self.endTagOther + + def processEOF(self): + self.anythingElse() + return True + + def processCharacters(self, token): + self.anythingElse() + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagBody(self, token): + self.parser.framesetOK = False + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inBody"] + + def startTagFrameset(self, token): + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inFrameset"] + + def startTagFromHead(self, token): + self.parser.parseError("unexpected-start-tag-out-of-my-head", + {"name": token["name"]}) + self.tree.openElements.append(self.tree.headPointer) + self.parser.phases["inHead"].processStartTag(token) + for node in self.tree.openElements[::-1]: + if node.name == "head": + self.tree.openElements.remove(node) + break + + def startTagHead(self, token): + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + + def startTagOther(self, token): + self.anythingElse() + return token + + def endTagHtmlBodyBr(self, token): + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + self.tree.insertElement(impliedTagToken("body", "StartTag")) + self.parser.phase = self.parser.phases["inBody"] + self.parser.framesetOK = True + + class InBodyPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody + # the really-really-really-very crazy mode + def __init__(self, parser, tree): + Phase.__init__(self, parser, tree) + + # Keep a ref to this for special handling of whitespace in
+            self.processSpaceCharactersNonPre = self.processSpaceCharacters
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                (("base", "basefont", "bgsound", "command", "link", "meta",
+                  "script", "style", "title"),
+                 self.startTagProcessInHead),
+                ("body", self.startTagBody),
+                ("frameset", self.startTagFrameset),
+                (("address", "article", "aside", "blockquote", "center", "details",
+                  "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+                  "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
+                  "section", "summary", "ul"),
+                 self.startTagCloseP),
+                (headingElements, self.startTagHeading),
+                (("pre", "listing"), self.startTagPreListing),
+                ("form", self.startTagForm),
+                (("li", "dd", "dt"), self.startTagListItem),
+                ("plaintext", self.startTagPlaintext),
+                ("a", self.startTagA),
+                (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
+                  "strong", "tt", "u"), self.startTagFormatting),
+                ("nobr", self.startTagNobr),
+                ("button", self.startTagButton),
+                (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
+                ("xmp", self.startTagXmp),
+                ("table", self.startTagTable),
+                (("area", "br", "embed", "img", "keygen", "wbr"),
+                 self.startTagVoidFormatting),
+                (("param", "source", "track"), self.startTagParamSource),
+                ("input", self.startTagInput),
+                ("hr", self.startTagHr),
+                ("image", self.startTagImage),
+                ("isindex", self.startTagIsIndex),
+                ("textarea", self.startTagTextarea),
+                ("iframe", self.startTagIFrame),
+                (("noembed", "noframes", "noscript"), self.startTagRawtext),
+                ("select", self.startTagSelect),
+                (("rp", "rt"), self.startTagRpRt),
+                (("option", "optgroup"), self.startTagOpt),
+                (("math"), self.startTagMath),
+                (("svg"), self.startTagSvg),
+                (("caption", "col", "colgroup", "frame", "head",
+                  "tbody", "td", "tfoot", "th", "thead",
+                  "tr"), self.startTagMisplaced)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                ("body", self.endTagBody),
+                ("html", self.endTagHtml),
+                (("address", "article", "aside", "blockquote", "button", "center",
+                  "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+                  "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
+                  "section", "summary", "ul"), self.endTagBlock),
+                ("form", self.endTagForm),
+                ("p", self.endTagP),
+                (("dd", "dt", "li"), self.endTagListItem),
+                (headingElements, self.endTagHeading),
+                (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
+                  "strike", "strong", "tt", "u"), self.endTagFormatting),
+                (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
+                ("br", self.endTagBr),
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        def isMatchingFormattingElement(self, node1, node2):
+            if node1.name != node2.name or node1.namespace != node2.namespace:
+                return False
+            elif len(node1.attributes) != len(node2.attributes):
+                return False
+            else:
+                attributes1 = sorted(node1.attributes.items())
+                attributes2 = sorted(node2.attributes.items())
+                for attr1, attr2 in zip(attributes1, attributes2):
+                    if attr1 != attr2:
+                        return False
+            return True
+
+        # helper
+        def addFormattingElement(self, token):
+            self.tree.insertElement(token)
+            element = self.tree.openElements[-1]
+
+            matchingElements = []
+            for node in self.tree.activeFormattingElements[::-1]:
+                if node is Marker:
+                    break
+                elif self.isMatchingFormattingElement(node, element):
+                    matchingElements.append(node)
+
+            assert len(matchingElements) <= 3
+            if len(matchingElements) == 3:
+                self.tree.activeFormattingElements.remove(matchingElements[-1])
+            self.tree.activeFormattingElements.append(element)
+
+        # the real deal
+        def processEOF(self):
+            allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
+                                          "tfoot", "th", "thead", "tr", "body",
+                                          "html"))
+            for node in self.tree.openElements[::-1]:
+                if node.name not in allowed_elements:
+                    self.parser.parseError("expected-closing-tag-but-got-eof")
+                    break
+            # Stop parsing
+
+        def processSpaceCharactersDropNewline(self, token):
+            # Sometimes (start of 
, , and