From 045b5638b21a968c40a7600ec79c1a4ce117df80 Mon Sep 17 00:00:00 2001 From: Remy Date: Wed, 13 Jul 2011 22:41:25 -0700 Subject: [PATCH] Big Update: Automatic updating, Show what albums/songs you already have, Config fixes, Fixed restart & shutdown buttons --- .gitignore | 17 +- apscheduler/__init__.py | 3 - apscheduler/events.py | 64 - apscheduler/job.py | 134 - apscheduler/jobstores/__init__.py | 0 apscheduler/jobstores/base.py | 25 - apscheduler/jobstores/mongodb_store.py | 84 - apscheduler/jobstores/ram_store.py | 25 - apscheduler/jobstores/shelve_store.py | 65 - apscheduler/jobstores/sqlalchemy_store.py | 87 - apscheduler/scheduler.py | 559 --- apscheduler/threadpool.py | 133 - apscheduler/triggers/__init__.py | 3 - apscheduler/triggers/cron/__init__.py | 135 - apscheduler/triggers/cron/expressions.py | 178 - apscheduler/triggers/cron/fields.py | 99 - apscheduler/triggers/interval.py | 39 - apscheduler/triggers/simple.py | 17 - apscheduler/util.py | 204 -- config.py | 290 -- configcreate.py | 41 - configobj.py | 2468 ------------- data/css/style.css | 18 +- feedparser.py | 3909 --------------------- headphones.py | 212 +- helpers.py | 11 - itunesimport.py | 118 - logger.py | 177 - mb.py | 84 - mover.py | 21 - musicbrainz2/__init__.py | 26 - musicbrainz2/data/__init__.py | 10 - musicbrainz2/data/countrynames.py | 253 -- musicbrainz2/data/languagenames.py | 400 --- musicbrainz2/data/releasetypenames.py | 24 - musicbrainz2/data/scriptnames.py | 59 - musicbrainz2/disc.py | 221 -- musicbrainz2/model.py | 2488 ------------- musicbrainz2/utils.py | 204 -- musicbrainz2/webservice.py | 1519 -------- musicbrainz2/wsxml.py | 1675 --------- pyItunes/Library.py | 41 - pyItunes/Library.pyc | Bin 1990 -> 0 bytes pyItunes/Song.py | 46 - pyItunes/Song.pyc | Bin 1236 -> 0 bytes pyItunes/XMLLibraryParser.py | 42 - pyItunes/XMLLibraryParser.pyc | Bin 2010 -> 0 bytes pyItunes/__init__.py | 3 - pyItunes/__init__.pyc | Bin 305 -> 0 bytes searcher.py | 212 -- templates.py | 43 - threadtools.py | 41 - updater.py | 76 - webServer.py | 541 --- 54 files changed, 124 insertions(+), 17020 deletions(-) delete mode 100644 apscheduler/__init__.py delete mode 100644 apscheduler/events.py delete mode 100644 apscheduler/job.py delete mode 100644 apscheduler/jobstores/__init__.py delete mode 100644 apscheduler/jobstores/base.py delete mode 100644 apscheduler/jobstores/mongodb_store.py delete mode 100644 apscheduler/jobstores/ram_store.py delete mode 100644 apscheduler/jobstores/shelve_store.py delete mode 100644 apscheduler/jobstores/sqlalchemy_store.py delete mode 100644 apscheduler/scheduler.py delete mode 100644 apscheduler/threadpool.py delete mode 100644 apscheduler/triggers/__init__.py delete mode 100644 apscheduler/triggers/cron/__init__.py delete mode 100644 apscheduler/triggers/cron/expressions.py delete mode 100644 apscheduler/triggers/cron/fields.py delete mode 100644 apscheduler/triggers/interval.py delete mode 100644 apscheduler/triggers/simple.py delete mode 100644 apscheduler/util.py delete mode 100644 config.py delete mode 100644 configcreate.py delete mode 100644 configobj.py delete mode 100644 feedparser.py delete mode 100644 helpers.py delete mode 100644 itunesimport.py delete mode 100644 logger.py delete mode 100644 mb.py delete mode 100644 mover.py delete mode 100644 musicbrainz2/__init__.py delete mode 100644 musicbrainz2/data/__init__.py delete mode 100644 musicbrainz2/data/countrynames.py delete mode 100644 musicbrainz2/data/languagenames.py delete mode 100644 musicbrainz2/data/releasetypenames.py delete mode 100644 musicbrainz2/data/scriptnames.py delete mode 100644 musicbrainz2/disc.py delete mode 100644 musicbrainz2/model.py delete mode 100644 musicbrainz2/utils.py delete mode 100644 musicbrainz2/webservice.py delete mode 100644 musicbrainz2/wsxml.py delete mode 100644 pyItunes/Library.py delete mode 100644 pyItunes/Library.pyc delete mode 100644 pyItunes/Song.py delete mode 100644 pyItunes/Song.pyc delete mode 100644 pyItunes/XMLLibraryParser.py delete mode 100644 pyItunes/XMLLibraryParser.pyc delete mode 100644 pyItunes/__init__.py delete mode 100644 pyItunes/__init__.pyc delete mode 100644 searcher.py delete mode 100644 templates.py delete mode 100644 threadtools.py delete mode 100644 updater.py delete mode 100644 webServer.py diff --git a/.gitignore b/.gitignore index 939db295..f1b2e7cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,17 @@ +# Compiled source # +################### *.pyc -*.log \ No newline at end of file + +# Logs and databases # +###################### +*.log +*.db +*.ini +logs/* + +# OS generated files # +###################### +.DS_Store? +ehthumbs.db +Icon? +Thumbs.db \ No newline at end of file diff --git a/apscheduler/__init__.py b/apscheduler/__init__.py deleted file mode 100644 index 6b502147..00000000 --- a/apscheduler/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -version_info = (2, 0, 0, 'rc', 2) -version = '.'.join(str(n) for n in version_info[:3]) -release = version + ''.join(str(n) for n in version_info[3:]) diff --git a/apscheduler/events.py b/apscheduler/events.py deleted file mode 100644 index 80bde8e6..00000000 --- a/apscheduler/events.py +++ /dev/null @@ -1,64 +0,0 @@ -__all__ = ('EVENT_SCHEDULER_START', 'EVENT_SCHEDULER_SHUTDOWN', - 'EVENT_JOBSTORE_ADDED', 'EVENT_JOBSTORE_REMOVED', - 'EVENT_JOBSTORE_JOB_ADDED', 'EVENT_JOBSTORE_JOB_REMOVED', - 'EVENT_JOB_EXECUTED', 'EVENT_JOB_ERROR', 'EVENT_JOB_MISSED', - 'EVENT_ALL', 'SchedulerEvent', 'JobStoreEvent', 'JobEvent') - - -EVENT_SCHEDULER_START = 1 # The scheduler was started -EVENT_SCHEDULER_SHUTDOWN = 2 # The scheduler was shut down -EVENT_JOBSTORE_ADDED = 4 # A job store was added to the scheduler -EVENT_JOBSTORE_REMOVED = 8 # A job store was removed from the scheduler -EVENT_JOBSTORE_JOB_ADDED = 16 # A job was added to a job store -EVENT_JOBSTORE_JOB_REMOVED = 32 # A job was removed from a job store -EVENT_JOB_EXECUTED = 64 # A job was executed successfully -EVENT_JOB_ERROR = 128 # A job raised an exception during execution -EVENT_JOB_MISSED = 256 # A job's execution was missed -EVENT_ALL = (EVENT_SCHEDULER_START | EVENT_SCHEDULER_SHUTDOWN | - EVENT_JOBSTORE_ADDED | EVENT_JOBSTORE_REMOVED | - EVENT_JOBSTORE_JOB_ADDED | EVENT_JOBSTORE_JOB_REMOVED | - EVENT_JOB_EXECUTED | EVENT_JOB_ERROR | EVENT_JOB_MISSED) - - -class SchedulerEvent(object): - """ - An event that concerns the scheduler itself. - - :var code: the type code of this event - """ - def __init__(self, code): - self.code = code - - -class JobStoreEvent(SchedulerEvent): - """ - An event that concerns job stores. - - :var alias: the alias of the job store involved - :var job: the new job if a job was added - """ - def __init__(self, code, alias, job=None): - SchedulerEvent.__init__(self, code) - self.alias = alias - if job: - self.job = job - - -class JobEvent(SchedulerEvent): - """ - An event that concerns the execution of individual jobs. - - :var job: the job instance in question - :var scheduled_run_time: the time when the job was scheduled to be run - :var retval: the return value of the successfully executed job - :var exception: the exception raised by the job - :var traceback: the traceback object associated with the exception - """ - def __init__(self, code, job, scheduled_run_time, retval=None, - exception=None, traceback=None): - SchedulerEvent.__init__(self, code) - self.job = job - self.scheduled_run_time = scheduled_run_time - self.retval = retval - self.exception = exception - self.traceback = traceback diff --git a/apscheduler/job.py b/apscheduler/job.py deleted file mode 100644 index 868e7234..00000000 --- a/apscheduler/job.py +++ /dev/null @@ -1,134 +0,0 @@ -""" -Jobs represent scheduled tasks. -""" - -from threading import Lock -from datetime import timedelta - -from apscheduler.util import to_unicode, ref_to_obj, get_callable_name,\ - obj_to_ref - - -class MaxInstancesReachedError(Exception): - pass - - -class Job(object): - """ - Encapsulates the actual Job along with its metadata. Job instances - are created by the scheduler when adding jobs, and it should not be - directly instantiated. - - :param trigger: trigger that determines the execution times - :param func: callable to call when the trigger is triggered - :param args: list of positional arguments to call func with - :param kwargs: dict of keyword arguments to call func with - :param name: name of the job (optional) - :param misfire_grace_time: seconds after the designated run time that - the job is still allowed to be run - :param coalesce: run once instead of many times if the scheduler determines - that the job should be run more than once in succession - :param max_runs: maximum number of times this job is allowed to be - triggered - :param max_instances: maximum number of concurrently running - instances allowed for this job - """ - id = None - next_run_time = None - - def __init__(self, trigger, func, args, kwargs, misfire_grace_time, - coalesce, name=None, max_runs=None, max_instances=1): - if not trigger: - raise ValueError('The trigger must not be None') - if not hasattr(func, '__call__'): - raise TypeError('func must be callable') - if not hasattr(args, '__getitem__'): - raise TypeError('args must be a list-like object') - if not hasattr(kwargs, '__getitem__'): - raise TypeError('kwargs must be a dict-like object') - if misfire_grace_time <= 0: - raise ValueError('misfire_grace_time must be a positive value') - if max_runs is not None and max_runs <= 0: - raise ValueError('max_runs must be a positive value') - if max_instances <= 0: - raise ValueError('max_instances must be a positive value') - - self._lock = Lock() - - self.trigger = trigger - self.func = func - self.args = args - self.kwargs = kwargs - self.name = to_unicode(name or get_callable_name(func)) - self.misfire_grace_time = misfire_grace_time - self.coalesce = coalesce - self.max_runs = max_runs - self.max_instances = max_instances - self.runs = 0 - self.instances = 0 - - def compute_next_run_time(self, now): - if self.runs == self.max_runs: - self.next_run_time = None - else: - self.next_run_time = self.trigger.get_next_fire_time(now) - - return self.next_run_time - - def get_run_times(self, now): - """ - Computes the scheduled run times between ``next_run_time`` and ``now``. - """ - run_times = [] - run_time = self.next_run_time - increment = timedelta(microseconds=1) - while ((not self.max_runs or self.runs < self.max_runs) and - run_time and run_time <= now): - run_times.append(run_time) - run_time = self.trigger.get_next_fire_time(run_time + increment) - - return run_times - - def add_instance(self): - self._lock.acquire() - try: - if self.instances == self.max_instances: - raise MaxInstancesReachedError - self.instances += 1 - finally: - self._lock.release() - - def remove_instance(self): - self._lock.acquire() - try: - assert self.instances > 0, 'Already at 0 instances' - self.instances -= 1 - finally: - self._lock.release() - - def __getstate__(self): - # Prevents the unwanted pickling of transient or unpicklable variables - state = self.__dict__.copy() - state.pop('instances', None) - state.pop('func', None) - state.pop('_lock', None) - state['func_ref'] = obj_to_ref(self.func) - return state - - def __setstate__(self, state): - state['instances'] = 0 - state['func'] = ref_to_obj(state.pop('func_ref')) - state['_lock'] = Lock() - self.__dict__ = state - - def __eq__(self, other): - if isinstance(other, Job): - return self.id is not None and other.id == self.id or self is other - return NotImplemented - - def __repr__(self): - return '' % (self.name, repr(self.trigger)) - - def __str__(self): - return '%s (trigger: %s, next run at: %s)' % (self.name, - str(self.trigger), str(self.next_run_time)) diff --git a/apscheduler/jobstores/__init__.py b/apscheduler/jobstores/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/apscheduler/jobstores/base.py b/apscheduler/jobstores/base.py deleted file mode 100644 index f0a16ddb..00000000 --- a/apscheduler/jobstores/base.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Abstract base class that provides the interface needed by all job stores. -Job store methods are also documented here. -""" - - -class JobStore(object): - def add_job(self, job): - """Adds the given job from this store.""" - raise NotImplementedError - - def update_job(self, job): - """Persists the running state of the given job.""" - raise NotImplementedError - - def remove_job(self, job): - """Removes the given jobs from this store.""" - raise NotImplementedError - - def load_jobs(self): - """Loads jobs from this store into memory.""" - raise NotImplementedError - - def close(self): - """Frees any resources still bound to this job store.""" diff --git a/apscheduler/jobstores/mongodb_store.py b/apscheduler/jobstores/mongodb_store.py deleted file mode 100644 index 3f522c25..00000000 --- a/apscheduler/jobstores/mongodb_store.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Stores jobs in a MongoDB database. -""" -import logging - -from apscheduler.jobstores.base import JobStore -from apscheduler.job import Job - -try: - import cPickle as pickle -except ImportError: # pragma: nocover - import pickle - -try: - from bson.binary import Binary - from pymongo.connection import Connection -except ImportError: # pragma: nocover - raise ImportError('MongoDBJobStore requires PyMongo installed') - -logger = logging.getLogger(__name__) - - -class MongoDBJobStore(JobStore): - def __init__(self, database='apscheduler', collection='jobs', - connection=None, pickle_protocol=pickle.HIGHEST_PROTOCOL, - **connect_args): - self.jobs = [] - self.pickle_protocol = pickle_protocol - - if not database: - raise ValueError('The "database" parameter must not be empty') - if not collection: - raise ValueError('The "collection" parameter must not be empty') - - if connection: - self.connection = connection - else: - self.connection = Connection(**connect_args) - - self.collection = self.connection[database][collection] - - def add_job(self, job): - job_dict = job.__getstate__() - job_dict['trigger'] = Binary(pickle.dumps(job.trigger, - self.pickle_protocol)) - job_dict['args'] = Binary(pickle.dumps(job.args, - self.pickle_protocol)) - job_dict['kwargs'] = Binary(pickle.dumps(job.kwargs, - self.pickle_protocol)) - job.id = self.collection.insert(job_dict) - self.jobs.append(job) - - def remove_job(self, job): - self.collection.remove(job.id) - self.jobs.remove(job) - - def load_jobs(self): - jobs = [] - for job_dict in self.collection.find(): - try: - job = Job.__new__(Job) - job_dict['id'] = job_dict.pop('_id') - job_dict['trigger'] = pickle.loads(job_dict['trigger']) - job_dict['args'] = pickle.loads(job_dict['args']) - job_dict['kwargs'] = pickle.loads(job_dict['kwargs']) - job.__setstate__(job_dict) - jobs.append(job) - except Exception: - job_name = job_dict.get('name', '(unknown)') - logger.exception('Unable to restore job "%s"', job_name) - self.jobs = jobs - - def update_job(self, job): - spec = {'_id': job.id} - document = {'$set': {'next_run_time': job.next_run_time}, - '$inc': {'runs': 1}} - self.collection.update(spec, document) - - def close(self): - self.connection.disconnect() - - def __repr__(self): - connection = self.collection.database.connection - return '<%s (connection=%s)>' % (self.__class__.__name__, connection) diff --git a/apscheduler/jobstores/ram_store.py b/apscheduler/jobstores/ram_store.py deleted file mode 100644 index 85091fe8..00000000 --- a/apscheduler/jobstores/ram_store.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Stores jobs in an array in RAM. Provides no persistence support. -""" - -from apscheduler.jobstores.base import JobStore - - -class RAMJobStore(JobStore): - def __init__(self): - self.jobs = [] - - def add_job(self, job): - self.jobs.append(job) - - def update_job(self, job): - pass - - def remove_job(self, job): - self.jobs.remove(job) - - def load_jobs(self): - pass - - def __repr__(self): - return '<%s>' % (self.__class__.__name__) diff --git a/apscheduler/jobstores/shelve_store.py b/apscheduler/jobstores/shelve_store.py deleted file mode 100644 index 87c95f8f..00000000 --- a/apscheduler/jobstores/shelve_store.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -Stores jobs in a file governed by the :mod:`shelve` module. -""" - -import shelve -import pickle -import random -import logging - -from apscheduler.jobstores.base import JobStore -from apscheduler.job import Job -from apscheduler.util import itervalues - -logger = logging.getLogger(__name__) - - -class ShelveJobStore(JobStore): - MAX_ID = 1000000 - - def __init__(self, path, pickle_protocol=pickle.HIGHEST_PROTOCOL): - self.jobs = [] - self.path = path - self.pickle_protocol = pickle_protocol - self.store = shelve.open(path, 'c', self.pickle_protocol) - - def _generate_id(self): - id = None - while not id: - id = str(random.randint(1, self.MAX_ID)) - if not id in self.store: - return id - - def add_job(self, job): - job.id = self._generate_id() - self.jobs.append(job) - self.store[job.id] = job.__getstate__() - - def update_job(self, job): - job_dict = self.store[job.id] - job_dict['next_run_time'] = job.next_run_time - job_dict['runs'] = job.runs - self.store[job.id] = job_dict - - def remove_job(self, job): - del self.store[job.id] - self.jobs.remove(job) - - def load_jobs(self): - jobs = [] - for job_dict in itervalues(self.store): - try: - job = Job.__new__(Job) - job.__setstate__(job_dict) - jobs.append(job) - except Exception: - job_name = job_dict.get('name', '(unknown)') - logger.exception('Unable to restore job "%s"', job_name) - - self.jobs = jobs - - def close(self): - self.store.close() - - def __repr__(self): - return '<%s (path=%s)>' % (self.__class__.__name__, self.path) diff --git a/apscheduler/jobstores/sqlalchemy_store.py b/apscheduler/jobstores/sqlalchemy_store.py deleted file mode 100644 index 8ece7e24..00000000 --- a/apscheduler/jobstores/sqlalchemy_store.py +++ /dev/null @@ -1,87 +0,0 @@ -""" -Stores jobs in a database table using SQLAlchemy. -""" -import pickle -import logging - -from apscheduler.jobstores.base import JobStore -from apscheduler.job import Job - -try: - from sqlalchemy import * -except ImportError: # pragma: nocover - raise ImportError('SQLAlchemyJobStore requires SQLAlchemy installed') - -logger = logging.getLogger(__name__) - - -class SQLAlchemyJobStore(JobStore): - def __init__(self, url=None, engine=None, tablename='apscheduler_jobs', - metadata=None, pickle_protocol=pickle.HIGHEST_PROTOCOL): - self.jobs = [] - self.pickle_protocol = pickle_protocol - - if engine: - self.engine = engine - elif url: - self.engine = create_engine(url) - else: - raise ValueError('Need either "engine" or "url" defined') - - self.jobs_t = Table(tablename, metadata or MetaData(), - Column('id', Integer, - Sequence(tablename + '_id_seq', optional=True), - primary_key=True), - Column('trigger', PickleType(pickle_protocol, mutable=False), - nullable=False), - Column('func_ref', String(1024), nullable=False), - Column('args', PickleType(pickle_protocol, mutable=False), - nullable=False), - Column('kwargs', PickleType(pickle_protocol, mutable=False), - nullable=False), - Column('name', Unicode(1024), unique=True), - Column('misfire_grace_time', Integer, nullable=False), - Column('coalesce', Boolean, nullable=False), - Column('max_runs', Integer), - Column('max_instances', Integer), - Column('next_run_time', DateTime, nullable=False), - Column('runs', BigInteger)) - - self.jobs_t.create(self.engine, True) - - def add_job(self, job): - job_dict = job.__getstate__() - result = self.engine.execute(self.jobs_t.insert().values(**job_dict)) - job.id = result.inserted_primary_key[0] - self.jobs.append(job) - - def remove_job(self, job): - delete = self.jobs_t.delete().where(self.jobs_t.c.id == job.id) - self.engine.execute(delete) - self.jobs.remove(job) - - def load_jobs(self): - jobs = [] - for row in self.engine.execute(select([self.jobs_t])): - try: - job = Job.__new__(Job) - job_dict = dict(row.items()) - job.__setstate__(job_dict) - jobs.append(job) - except Exception: - job_name = job_dict.get('name', '(unknown)') - logger.exception('Unable to restore job "%s"', job_name) - self.jobs = jobs - - def update_job(self, job): - job_dict = job.__getstate__() - update = self.jobs_t.update().where(self.jobs_t.c.id == job.id).\ - values(next_run_time=job_dict['next_run_time'], - runs=job_dict['runs']) - self.engine.execute(update) - - def close(self): - self.engine.dispose() - - def __repr__(self): - return '<%s (url=%s)>' % (self.__class__.__name__, self.engine.url) diff --git a/apscheduler/scheduler.py b/apscheduler/scheduler.py deleted file mode 100644 index ee08ad8b..00000000 --- a/apscheduler/scheduler.py +++ /dev/null @@ -1,559 +0,0 @@ -""" -This module is the main part of the library. It houses the Scheduler class -and related exceptions. -""" - -from threading import Thread, Event, Lock -from datetime import datetime, timedelta -from logging import getLogger -import os -import sys - -from apscheduler.util import * -from apscheduler.triggers import SimpleTrigger, IntervalTrigger, CronTrigger -from apscheduler.jobstores.ram_store import RAMJobStore -from apscheduler.job import Job, MaxInstancesReachedError -from apscheduler.events import * -from apscheduler.threadpool import ThreadPool - -logger = getLogger(__name__) - - -class SchedulerAlreadyRunningError(Exception): - """ - Raised when attempting to start or configure the scheduler when it's - already running. - """ - - def __str__(self): - return 'Scheduler is already running' - - -class Scheduler(object): - """ - This class is responsible for scheduling jobs and triggering - their execution. - """ - - _stopped = False - _thread = None - - def __init__(self, gconfig={}, **options): - self._wakeup = Event() - self._jobstores = {} - self._jobstores_lock = Lock() - self._listeners = [] - self._listeners_lock = Lock() - self._pending_jobs = [] - self.configure(gconfig, **options) - - def configure(self, gconfig={}, **options): - """ - Reconfigures the scheduler with the given options. Can only be done - when the scheduler isn't running. - """ - if self.running: - raise SchedulerAlreadyRunningError - - # Set general options - config = combine_opts(gconfig, 'apscheduler.', options) - self.misfire_grace_time = int(config.pop('misfire_grace_time', 1)) - self.coalesce = asbool(config.pop('coalesce', True)) - self.daemonic = asbool(config.pop('daemonic', True)) - - # Configure the thread pool - if 'threadpool' in config: - self._threadpool = maybe_ref(config['threadpool']) - else: - threadpool_opts = combine_opts(config, 'threadpool.') - self._threadpool = ThreadPool(**threadpool_opts) - - # Configure job stores - jobstore_opts = combine_opts(config, 'jobstore.') - jobstores = {} - for key, value in jobstore_opts.items(): - store_name, option = key.split('.', 1) - opts_dict = jobstores.setdefault(store_name, {}) - opts_dict[option] = value - - for alias, opts in jobstores.items(): - classname = opts.pop('class') - cls = maybe_ref(classname) - jobstore = cls(**opts) - self.add_jobstore(jobstore, alias, True) - - def start(self): - """ - Starts the scheduler in a new thread. - """ - if self.running: - raise SchedulerAlreadyRunningError - - # Create a RAMJobStore as the default if there is no default job store - if not 'default' in self._jobstores: - self.add_jobstore(RAMJobStore(), 'default', True) - - # Schedule all pending jobs - for job, jobstore in self._pending_jobs: - self._real_add_job(job, jobstore, False) - del self._pending_jobs[:] - - self._stopped = False - self._thread = Thread(target=self._main_loop, name='APScheduler') - self._thread.setDaemon(self.daemonic) - self._thread.start() - - def shutdown(self, wait=True, shutdown_threadpool=True): - """ - Shuts down the scheduler and terminates the thread. - Does not interrupt any currently running jobs. - - :param wait: ``True`` to wait until all currently executing jobs have - finished (if ``shutdown_threadpool`` is also ``True``) - :param shutdown_threadpool: ``True`` to shut down the thread pool - """ - if not self.running: - return - - self._stopped = True - self._wakeup.set() - - # Shut down the thread pool - if shutdown_threadpool: - self._threadpool.shutdown(wait) - - # Wait until the scheduler thread terminates - self._thread.join() - - @property - def running(self): - return not self._stopped and self._thread and self._thread.isAlive() - - def add_jobstore(self, jobstore, alias, quiet=False): - """ - Adds a job store to this scheduler. - - :param jobstore: job store to be added - :param alias: alias for the job store - :param quiet: True to suppress scheduler thread wakeup - :type jobstore: instance of - :class:`~apscheduler.jobstores.base.JobStore` - :type alias: str - """ - self._jobstores_lock.acquire() - try: - if alias in self._jobstores: - raise KeyError('Alias "%s" is already in use' % alias) - self._jobstores[alias] = jobstore - jobstore.load_jobs() - finally: - self._jobstores_lock.release() - - # Notify listeners that a new job store has been added - self._notify_listeners(JobStoreEvent(EVENT_JOBSTORE_ADDED, alias)) - - # Notify the scheduler so it can scan the new job store for jobs - if not quiet: - self._wakeup.set() - - def remove_jobstore(self, alias): - """ - Removes the job store by the given alias from this scheduler. - - :type alias: str - """ - self._jobstores_lock.acquire() - try: - try: - del self._jobstores[alias] - except KeyError: - raise KeyError('No such job store: %s' % alias) - finally: - self._jobstores_lock.release() - - # Notify listeners that a job store has been removed - self._notify_listeners(JobStoreEvent(EVENT_JOBSTORE_REMOVED, alias)) - - def add_listener(self, callback, mask=EVENT_ALL): - """ - Adds a listener for scheduler events. When a matching event occurs, - ``callback`` is executed with the event object as its sole argument. - If the ``mask`` parameter is not provided, the callback will receive - events of all types. - - :param callback: any callable that takes one argument - :param mask: bitmask that indicates which events should be listened to - """ - self._listeners_lock.acquire() - try: - self._listeners.append((callback, mask)) - finally: - self._listeners_lock.release() - - def remove_listener(self, callback): - """ - Removes a previously added event listener. - """ - self._listeners_lock.acquire() - try: - for i, (cb, _) in enumerate(self._listeners): - if callback == cb: - del self._listeners[i] - finally: - self._listeners_lock.release() - - def _notify_listeners(self, event): - self._listeners_lock.acquire() - try: - listeners = tuple(self._listeners) - finally: - self._listeners_lock.release() - - for cb, mask in listeners: - if event.code & mask: - try: - cb(event) - except: - logger.exception('Error notifying listener') - - def _real_add_job(self, job, jobstore, wakeup): - job.compute_next_run_time(datetime.now()) - if not job.next_run_time: - raise ValueError('Not adding job since it would never be run') - - self._jobstores_lock.acquire() - try: - try: - store = self._jobstores[jobstore] - except KeyError: - raise KeyError('No such job store: %s' % jobstore) - store.add_job(job) - finally: - self._jobstores_lock.release() - - # Notify listeners that a new job has been added - event = JobStoreEvent(EVENT_JOBSTORE_JOB_ADDED, jobstore, job) - self._notify_listeners(event) - - logger.info('Added job "%s" to job store "%s"', job, jobstore) - - # Notify the scheduler about the new job - if wakeup: - self._wakeup.set() - - def add_job(self, trigger, func, args, kwargs, jobstore='default', - **options): - """ - Adds the given job to the job list and notifies the scheduler thread. - - :param trigger: alias of the job store to store the job in - :param func: callable to run at the given time - :param args: list of positional arguments to call func with - :param kwargs: dict of keyword arguments to call func with - :param jobstore: alias of the job store to store the job in - :rtype: :class:`~apscheduler.job.Job` - """ - job = Job(trigger, func, args or [], kwargs or {}, - options.pop('misfire_grace_time', self.misfire_grace_time), - options.pop('coalesce', self.coalesce), **options) - if not self.running: - self._pending_jobs.append((job, jobstore)) - logger.info('Adding job tentatively -- it will be properly ' - 'scheduled when the scheduler starts') - else: - self._real_add_job(job, jobstore, True) - return job - - def _remove_job(self, job, alias, jobstore): - jobstore.remove_job(job) - - # Notify listeners that a job has been removed - event = JobStoreEvent(EVENT_JOBSTORE_JOB_REMOVED, alias, job) - self._notify_listeners(event) - - logger.info('Removed job "%s"', job) - - def add_date_job(self, func, date, args=None, kwargs=None, **options): - """ - Schedules a job to be completed on a specific date and time. - - :param func: callable to run at the given time - :param date: the date/time to run the job at - :param name: name of the job - :param jobstore: stored the job in the named (or given) job store - :param misfire_grace_time: seconds after the designated run time that - the job is still allowed to be run - :type date: :class:`datetime.date` - :rtype: :class:`~apscheduler.job.Job` - """ - trigger = SimpleTrigger(date) - return self.add_job(trigger, func, args, kwargs, **options) - - def add_interval_job(self, func, weeks=0, days=0, hours=0, minutes=0, - seconds=0, start_date=None, args=None, kwargs=None, - **options): - """ - Schedules a job to be completed on specified intervals. - - :param func: callable to run - :param weeks: number of weeks to wait - :param days: number of days to wait - :param hours: number of hours to wait - :param minutes: number of minutes to wait - :param seconds: number of seconds to wait - :param start_date: when to first execute the job and start the - counter (default is after the given interval) - :param args: list of positional arguments to call func with - :param kwargs: dict of keyword arguments to call func with - :param name: name of the job - :param jobstore: alias of the job store to add the job to - :param misfire_grace_time: seconds after the designated run time that - the job is still allowed to be run - :rtype: :class:`~apscheduler.job.Job` - """ - interval = timedelta(weeks=weeks, days=days, hours=hours, - minutes=minutes, seconds=seconds) - trigger = IntervalTrigger(interval, start_date) - return self.add_job(trigger, func, args, kwargs, **options) - - def add_cron_job(self, func, year='*', month='*', day='*', week='*', - day_of_week='*', hour='*', minute='*', second='*', - start_date=None, args=None, kwargs=None, **options): - """ - Schedules a job to be completed on times that match the given - expressions. - - :param func: callable to run - :param year: year to run on - :param month: month to run on (0 = January) - :param day: day of month to run on - :param week: week of the year to run on - :param day_of_week: weekday to run on (0 = Monday) - :param hour: hour to run on - :param second: second to run on - :param args: list of positional arguments to call func with - :param kwargs: dict of keyword arguments to call func with - :param name: name of the job - :param jobstore: alias of the job store to add the job to - :param misfire_grace_time: seconds after the designated run time that - the job is still allowed to be run - :return: the scheduled job - :rtype: :class:`~apscheduler.job.Job` - """ - trigger = CronTrigger(year=year, month=month, day=day, week=week, - day_of_week=day_of_week, hour=hour, - minute=minute, second=second, - start_date=start_date) - return self.add_job(trigger, func, args, kwargs, **options) - - def cron_schedule(self, **options): - """ - Decorator version of :meth:`add_cron_job`. - This decorator does not wrap its host function. - Unscheduling decorated functions is possible by passing the ``job`` - attribute of the scheduled function to :meth:`unschedule_job`. - """ - def inner(func): - func.job = self.add_cron_job(func, **options) - return func - return inner - - def interval_schedule(self, **options): - """ - Decorator version of :meth:`add_interval_job`. - This decorator does not wrap its host function. - Unscheduling decorated functions is possible by passing the ``job`` - attribute of the scheduled function to :meth:`unschedule_job`. - """ - def inner(func): - func.job = self.add_interval_job(func, **options) - return func - return inner - - def get_jobs(self): - """ - Returns a list of all scheduled jobs. - - :return: list of :class:`~apscheduler.job.Job` objects - """ - self._jobstores_lock.acquire() - try: - jobs = [] - for jobstore in itervalues(self._jobstores): - jobs.extend(jobstore.jobs) - return jobs - finally: - self._jobstores_lock.release() - - def unschedule_job(self, job): - """ - Removes a job, preventing it from being run any more. - """ - self._jobstores_lock.acquire() - try: - for alias, jobstore in iteritems(self._jobstores): - if job in list(jobstore.jobs): - self._remove_job(job, alias, jobstore) - return - finally: - self._jobstores_lock.release() - - raise KeyError('Job "%s" is not scheduled in any job store' % job) - - def unschedule_func(self, func): - """ - Removes all jobs that would execute the given function. - """ - found = False - self._jobstores_lock.acquire() - try: - for alias, jobstore in iteritems(self._jobstores): - for job in list(jobstore.jobs): - if job.func == func: - self._remove_job(job, alias, jobstore) - found = True - finally: - self._jobstores_lock.release() - - if not found: - raise KeyError('The given function is not scheduled in this ' - 'scheduler') - - def print_jobs(self, out=None): - """ - Prints out a textual listing of all jobs currently scheduled on this - scheduler. - - :param out: a file-like object to print to (defaults to **sys.stdout** - if nothing is given) - """ - out = out or sys.stdout - job_strs = [] - self._jobstores_lock.acquire() - try: - for alias, jobstore in iteritems(self._jobstores): - job_strs.append('Jobstore %s:' % alias) - if jobstore.jobs: - for job in jobstore.jobs: - job_strs.append(' %s' % job) - else: - job_strs.append(' No scheduled jobs') - finally: - self._jobstores_lock.release() - - out.write(os.linesep.join(job_strs)) - - def _run_job(self, job, run_times): - """ - Acts as a harness that runs the actual job code in a thread. - """ - for run_time in run_times: - # See if the job missed its run time window, and handle possible - # misfires accordingly - difference = datetime.now() - run_time - grace_time = timedelta(seconds=job.misfire_grace_time) - if difference > grace_time: - # Notify listeners about a missed run - event = JobEvent(EVENT_JOB_MISSED, job, run_time) - self._notify_listeners(event) - logger.warning('Run time of job "%s" was missed by %s', - job, difference) - else: - try: - job.add_instance() - except MaxInstancesReachedError: - event = JobEvent(EVENT_JOB_MISSED, job, run_time) - self._notify_listeners(event) - logger.warning('Execution of job "%s" skipped: ' - 'maximum number of running instances ' - 'reached (%d)', job, job.max_instances) - break - - logger.info('Running job "%s" (scheduled at %s)', job, - run_time) - - try: - retval = job.func(*job.args, **job.kwargs) - except: - # Notify listeners about the exception - exc, tb = sys.exc_info()[1:] - event = JobEvent(EVENT_JOB_ERROR, job, run_time, - exception=exc, traceback=tb) - self._notify_listeners(event) - - logger.exception('Job "%s" raised an exception', job) - else: - # Notify listeners about successful execution - event = JobEvent(EVENT_JOB_EXECUTED, job, run_time, - retval=retval) - self._notify_listeners(event) - - logger.info('Job "%s" executed successfully', job) - - job.remove_instance() - - # If coalescing is enabled, don't attempt any further runs - if job.coalesce: - break - - def _process_jobs(self, now): - """ - Iterates through jobs in every jobstore, starts pending jobs - and figures out the next wakeup time. - """ - next_wakeup_time = None - self._jobstores_lock.acquire() - try: - for alias, jobstore in iteritems(self._jobstores): - for job in tuple(jobstore.jobs): - run_times = job.get_run_times(now) - if run_times: - self._threadpool.submit(self._run_job, job, run_times) - - # Increase the job's run count - if job.coalesce: - job.runs += 1 - else: - job.runs += len(run_times) - - # Update the job, but don't keep finished jobs around - if job.compute_next_run_time(now + timedelta(microseconds=1)): - jobstore.update_job(job) - else: - self._remove_job(job, alias, jobstore) - - if not next_wakeup_time: - next_wakeup_time = job.next_run_time - elif job.next_run_time: - next_wakeup_time = min(next_wakeup_time, - job.next_run_time) - return next_wakeup_time - finally: - self._jobstores_lock.release() - - def _main_loop(self): - """Executes jobs on schedule.""" - - logger.info('Scheduler started') - self._notify_listeners(SchedulerEvent(EVENT_SCHEDULER_START)) - - self._wakeup.clear() - while not self._stopped: - logger.debug('Looking for jobs to run') - now = datetime.now() - next_wakeup_time = self._process_jobs(now) - - # Sleep until the next job is scheduled to be run, - # a new job is added or the scheduler is stopped - if next_wakeup_time is not None: - wait_seconds = time_difference(next_wakeup_time, now) - logger.debug('Next wakeup is due at %s (in %f seconds)', - next_wakeup_time, wait_seconds) - self._wakeup.wait(wait_seconds) - else: - logger.debug('No jobs; waiting until a job is added') - self._wakeup.wait() - self._wakeup.clear() - - logger.info('Scheduler has been shut down') - self._notify_listeners(SchedulerEvent(EVENT_SCHEDULER_SHUTDOWN)) diff --git a/apscheduler/threadpool.py b/apscheduler/threadpool.py deleted file mode 100644 index 8ec47da0..00000000 --- a/apscheduler/threadpool.py +++ /dev/null @@ -1,133 +0,0 @@ -""" -Generic thread pool class. Modeled after Java's ThreadPoolExecutor. -Please note that this ThreadPool does *not* fully implement the PEP 3148 -ThreadPool! -""" - -from threading import Thread, Lock, currentThread -from weakref import ref -import logging -import atexit - -try: - from queue import Queue, Empty -except ImportError: - from Queue import Queue, Empty - -logger = logging.getLogger(__name__) -_threadpools = set() - - -# Worker threads are daemonic in order to let the interpreter exit without -# an explicit shutdown of the thread pool. The following trick is necessary -# to allow worker threads to finish cleanly. -def _shutdown_all(): - for pool_ref in tuple(_threadpools): - pool = pool_ref() - if pool: - pool.shutdown() - -atexit.register(_shutdown_all) - - -class ThreadPool(object): - def __init__(self, core_threads=0, max_threads=20, keepalive=1): - """ - :param core_threads: maximum number of persistent threads in the pool - :param max_threads: maximum number of total threads in the pool - :param thread_class: callable that creates a Thread object - :param keepalive: seconds to keep non-core worker threads waiting - for new tasks - """ - self.core_threads = core_threads - self.max_threads = max(max_threads, core_threads, 1) - self.keepalive = keepalive - self._queue = Queue() - self._threads_lock = Lock() - self._threads = set() - self._shutdown = False - - _threadpools.add(ref(self)) - logger.info('Started thread pool with %d core threads and %s maximum ' - 'threads', core_threads, max_threads or 'unlimited') - - def _adjust_threadcount(self): - self._threads_lock.acquire() - try: - if self.num_threads < self.max_threads: - self._add_thread(self.num_threads < self.core_threads) - finally: - self._threads_lock.release() - - def _add_thread(self, core): - t = Thread(target=self._run_jobs, args=(core,)) - t.setDaemon(True) - t.start() - self._threads.add(t) - - def _run_jobs(self, core): - logger.debug('Started worker thread') - block = True - timeout = None - if not core: - block = self.keepalive > 0 - timeout = self.keepalive - - while True: - try: - func, args, kwargs = self._queue.get(block, timeout) - except Empty: - break - - if self._shutdown: - break - - try: - func(*args, **kwargs) - except: - logger.exception('Error in worker thread') - - self._threads_lock.acquire() - self._threads.remove(currentThread()) - self._threads_lock.release() - - logger.debug('Exiting worker thread') - - @property - def num_threads(self): - return len(self._threads) - - def submit(self, func, *args, **kwargs): - if self._shutdown: - raise RuntimeError('Cannot schedule new tasks after shutdown') - - self._queue.put((func, args, kwargs)) - self._adjust_threadcount() - - def shutdown(self, wait=True): - if self._shutdown: - return - - logging.info('Shutting down thread pool') - self._shutdown = True - _threadpools.remove(ref(self)) - - self._threads_lock.acquire() - for _ in range(self.num_threads): - self._queue.put((None, None, None)) - self._threads_lock.release() - - if wait: - self._threads_lock.acquire() - threads = tuple(self._threads) - self._threads_lock.release() - for thread in threads: - thread.join() - - def __repr__(self): - if self.max_threads: - threadcount = '%d/%d' % (self.num_threads, self.max_threads) - else: - threadcount = '%d' % self.num_threads - - return '' % (id(self), threadcount) diff --git a/apscheduler/triggers/__init__.py b/apscheduler/triggers/__init__.py deleted file mode 100644 index 74a97884..00000000 --- a/apscheduler/triggers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from apscheduler.triggers.cron import CronTrigger -from apscheduler.triggers.interval import IntervalTrigger -from apscheduler.triggers.simple import SimpleTrigger diff --git a/apscheduler/triggers/cron/__init__.py b/apscheduler/triggers/cron/__init__.py deleted file mode 100644 index 3f8d9a8f..00000000 --- a/apscheduler/triggers/cron/__init__.py +++ /dev/null @@ -1,135 +0,0 @@ -from datetime import date, datetime - -from apscheduler.triggers.cron.fields import * -from apscheduler.util import datetime_ceil, convert_to_datetime - - -class CronTrigger(object): - FIELD_NAMES = ('year', 'month', 'day', 'week', 'day_of_week', 'hour', - 'minute', 'second') - FIELDS_MAP = {'year': BaseField, - 'month': BaseField, - 'week': WeekField, - 'day': DayOfMonthField, - 'day_of_week': DayOfWeekField, - 'hour': BaseField, - 'minute': BaseField, - 'second': BaseField} - - def __init__(self, **values): - self.start_date = values.pop('start_date', None) - if self.start_date: - self.start_date = convert_to_datetime(self.start_date) - - self.fields = [] - for field_name in self.FIELD_NAMES: - if field_name in values: - exprs = values.pop(field_name) - is_default = False - elif not values: - exprs = DEFAULT_VALUES[field_name] - is_default = True - else: - exprs = '*' - is_default = True - - field_class = self.FIELDS_MAP[field_name] - field = field_class(field_name, exprs, is_default) - self.fields.append(field) - - def _increment_field_value(self, dateval, fieldnum): - """ - Increments the designated field and resets all less significant fields - to their minimum values. - - :type dateval: datetime - :type fieldnum: int - :type amount: int - :rtype: tuple - :return: a tuple containing the new date, and the number of the field - that was actually incremented - """ - i = 0 - values = {} - while i < len(self.fields): - field = self.fields[i] - if not field.REAL: - if i == fieldnum: - fieldnum -= 1 - i -= 1 - else: - i += 1 - continue - - if i < fieldnum: - values[field.name] = field.get_value(dateval) - i += 1 - elif i > fieldnum: - values[field.name] = field.get_min(dateval) - i += 1 - else: - value = field.get_value(dateval) - maxval = field.get_max(dateval) - if value == maxval: - fieldnum -= 1 - i -= 1 - else: - values[field.name] = value + 1 - i += 1 - - return datetime(**values), fieldnum - - def _set_field_value(self, dateval, fieldnum, new_value): - values = {} - for i, field in enumerate(self.fields): - if field.REAL: - if i < fieldnum: - values[field.name] = field.get_value(dateval) - elif i > fieldnum: - values[field.name] = field.get_min(dateval) - else: - values[field.name] = new_value - - return datetime(**values) - - def get_next_fire_time(self, start_date): - if self.start_date: - start_date = max(start_date, self.start_date) - next_date = datetime_ceil(start_date) - fieldnum = 0 - while 0 <= fieldnum < len(self.fields): - field = self.fields[fieldnum] - curr_value = field.get_value(next_date) - next_value = field.get_next_value(next_date) - - if next_value is None: - # No valid value was found - next_date, fieldnum = self._increment_field_value(next_date, - fieldnum - 1) - elif next_value > curr_value: - # A valid, but higher than the starting value, was found - if field.REAL: - next_date = self._set_field_value(next_date, fieldnum, - next_value) - fieldnum += 1 - else: - next_date, fieldnum = self._increment_field_value(next_date, - fieldnum) - else: - # A valid value was found, no changes necessary - fieldnum += 1 - - if fieldnum >= 0: - return next_date - - def __str__(self): - options = ["%s='%s'" % (f.name, str(f)) for f in self.fields - if not f.is_default] - return 'cron[%s]' % (', '.join(options)) - - def __repr__(self): - options = ["%s='%s'" % (f.name, str(f)) for f in self.fields - if not f.is_default] - if self.start_date: - options.append("start_date='%s'" % self.start_date.isoformat(' ')) - return '<%s (%s)>' % (self.__class__.__name__, ', '.join(options)) diff --git a/apscheduler/triggers/cron/expressions.py b/apscheduler/triggers/cron/expressions.py deleted file mode 100644 index 018c7a30..00000000 --- a/apscheduler/triggers/cron/expressions.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -This module contains the expressions applicable for CronTrigger's fields. -""" - -from calendar import monthrange -import re - -from apscheduler.util import asint - -__all__ = ('AllExpression', 'RangeExpression', 'WeekdayRangeExpression', - 'WeekdayPositionExpression') - - -WEEKDAYS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] - - -class AllExpression(object): - value_re = re.compile(r'\*(?:/(?P\d+))?$') - - def __init__(self, step=None): - self.step = asint(step) - if self.step == 0: - raise ValueError('Increment must be higher than 0') - - def get_next_value(self, date, field): - start = field.get_value(date) - minval = field.get_min(date) - maxval = field.get_max(date) - start = max(start, minval) - - if not self.step: - next = start - else: - distance_to_next = (self.step - (start - minval)) % self.step - next = start + distance_to_next - - if next <= maxval: - return next - - def __str__(self): - if self.step: - return '*/%d' % self.step - return '*' - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self.step) - - -class RangeExpression(AllExpression): - value_re = re.compile( - r'(?P\d+)(?:-(?P\d+))?(?:/(?P\d+))?$') - - def __init__(self, first, last=None, step=None): - AllExpression.__init__(self, step) - first = asint(first) - last = asint(last) - if last is None and step is None: - last = first - if last is not None and first > last: - raise ValueError('The minimum value in a range must not be ' - 'higher than the maximum') - self.first = first - self.last = last - - def get_next_value(self, date, field): - start = field.get_value(date) - minval = field.get_min(date) - maxval = field.get_max(date) - - # Apply range limits - minval = max(minval, self.first) - if self.last is not None: - maxval = min(maxval, self.last) - start = max(start, minval) - - if not self.step: - next = start - else: - distance_to_next = (self.step - (start - minval)) % self.step - next = start + distance_to_next - - if next <= maxval: - return next - - def __str__(self): - if self.last != self.first and self.last is not None: - range = '%d-%d' % (self.first, self.last) - else: - range = str(self.first) - - if self.step: - return '%s/%d' % (range, self.step) - return range - - def __repr__(self): - args = [str(self.first)] - if self.last != self.first and self.last is not None or self.step: - args.append(str(self.last)) - if self.step: - args.append(str(self.step)) - return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) - - -class WeekdayRangeExpression(RangeExpression): - value_re = re.compile(r'(?P[a-z]+)(?:-(?P[a-z]+))?', - re.IGNORECASE) - - def __init__(self, first, last=None): - try: - first_num = WEEKDAYS.index(first.lower()) - except ValueError: - raise ValueError('Invalid weekday name "%s"' % first) - - if last: - try: - last_num = WEEKDAYS.index(last.lower()) - except ValueError: - raise ValueError('Invalid weekday name "%s"' % last) - else: - last_num = None - - RangeExpression.__init__(self, first_num, last_num) - - def __str__(self): - if self.last != self.first and self.last is not None: - return '%s-%s' % (WEEKDAYS[self.first], WEEKDAYS[self.last]) - return WEEKDAYS[self.first] - - def __repr__(self): - args = ["'%s'" % WEEKDAYS[self.first]] - if self.last != self.first and self.last is not None: - args.append("'%s'" % WEEKDAYS[self.last]) - return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) - - -class WeekdayPositionExpression(AllExpression): - options = ['1st', '2nd', '3rd', '4th', '5th', 'last'] - value_re = re.compile(r'(?P%s) +(?P(?:\d+|\w+))' - % '|'.join(options), re.IGNORECASE) - - def __init__(self, option_name, weekday_name): - try: - self.option_num = self.options.index(option_name.lower()) - except ValueError: - raise ValueError('Invalid weekday position "%s"' % option_name) - - try: - self.weekday = WEEKDAYS.index(weekday_name.lower()) - except ValueError: - raise ValueError('Invalid weekday name "%s"' % weekday_name) - - def get_next_value(self, date, field): - # Figure out the weekday of the month's first day and the number - # of days in that month - first_day_wday, last_day = monthrange(date.year, date.month) - - # Calculate which day of the month is the first of the target weekdays - first_hit_day = self.weekday - first_day_wday + 1 - if first_hit_day <= 0: - first_hit_day += 7 - - # Calculate what day of the month the target weekday would be - if self.option_num < 5: - target_day = first_hit_day + self.option_num * 7 - else: - target_day = first_hit_day + ((last_day - first_hit_day) / 7) * 7 - - if target_day <= last_day and target_day >= date.day: - return target_day - - def __str__(self): - return '%s %s' % (self.options[self.option_num], - WEEKDAYS[self.weekday]) - - def __repr__(self): - return "%s('%s', '%s')" % (self.__class__.__name__, - self.options[self.option_num], - WEEKDAYS[self.weekday]) diff --git a/apscheduler/triggers/cron/fields.py b/apscheduler/triggers/cron/fields.py deleted file mode 100644 index ef970cc9..00000000 --- a/apscheduler/triggers/cron/fields.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -Fields represent CronTrigger options which map to :class:`~datetime.datetime` -fields. -""" - -from calendar import monthrange - -from apscheduler.triggers.cron.expressions import * - -__all__ = ('MIN_VALUES', 'MAX_VALUES', 'DEFAULT_VALUES', 'BaseField', - 'WeekField', 'DayOfMonthField', 'DayOfWeekField') - - -MIN_VALUES = {'year': 1970, 'month': 1, 'day': 1, 'week': 1, - 'day_of_week': 0, 'hour': 0, 'minute': 0, 'second': 0} -MAX_VALUES = {'year': 2 ** 63, 'month': 12, 'day:': 31, 'week': 53, - 'day_of_week': 6, 'hour': 23, 'minute': 59, 'second': 59} -DEFAULT_VALUES = {'year': '*', 'month': 1, 'day': 1, 'week': '*', - 'day_of_week': '*', 'hour': 0, 'minute': 0, 'second': 0} - - -class BaseField(object): - REAL = True - COMPILERS = [AllExpression, RangeExpression] - - def __init__(self, name, exprs, is_default=False): - self.name = name - self.is_default = is_default - self.compile_expressions(exprs) - - def get_min(self, dateval): - return MIN_VALUES[self.name] - - def get_max(self, dateval): - return MAX_VALUES[self.name] - - def get_value(self, dateval): - return getattr(dateval, self.name) - - def get_next_value(self, dateval): - smallest = None - for expr in self.expressions: - value = expr.get_next_value(dateval, self) - if smallest is None or (value is not None and value < smallest): - smallest = value - - return smallest - - def compile_expressions(self, exprs): - self.expressions = [] - - # Split a comma-separated expression list, if any - exprs = str(exprs).strip() - if ',' in exprs: - for expr in exprs.split(','): - self.compile_expression(expr) - else: - self.compile_expression(exprs) - - def compile_expression(self, expr): - for compiler in self.COMPILERS: - match = compiler.value_re.match(expr) - if match: - compiled_expr = compiler(**match.groupdict()) - self.expressions.append(compiled_expr) - return - - raise ValueError('Unrecognized expression "%s" for field "%s"' % - (expr, self.name)) - - def __str__(self): - expr_strings = (str(e) for e in self.expressions) - return ','.join(expr_strings) - - def __repr__(self): - return "%s('%s', '%s')" % (self.__class__.__name__, self.name, - str(self)) - - -class WeekField(BaseField): - REAL = False - - def get_value(self, dateval): - return dateval.isocalendar()[1] - - -class DayOfMonthField(BaseField): - COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression] - - def get_max(self, dateval): - return monthrange(dateval.year, dateval.month)[1] - - -class DayOfWeekField(BaseField): - REAL = False - COMPILERS = BaseField.COMPILERS + [WeekdayRangeExpression] - - def get_value(self, dateval): - return dateval.weekday() diff --git a/apscheduler/triggers/interval.py b/apscheduler/triggers/interval.py deleted file mode 100644 index dd16d777..00000000 --- a/apscheduler/triggers/interval.py +++ /dev/null @@ -1,39 +0,0 @@ -from datetime import datetime, timedelta -from math import ceil - -from apscheduler.util import convert_to_datetime, timedelta_seconds - - -class IntervalTrigger(object): - def __init__(self, interval, start_date=None): - if not isinstance(interval, timedelta): - raise TypeError('interval must be a timedelta') - if start_date: - start_date = convert_to_datetime(start_date) - - self.interval = interval - self.interval_length = timedelta_seconds(self.interval) - if self.interval_length == 0: - self.interval = timedelta(seconds=1) - self.interval_length = 1 - - if start_date is None: - self.start_date = datetime.now() + self.interval - else: - self.start_date = convert_to_datetime(start_date) - - def get_next_fire_time(self, start_date): - if start_date < self.start_date: - return self.start_date - - timediff_seconds = timedelta_seconds(start_date - self.start_date) - next_interval_num = int(ceil(timediff_seconds / self.interval_length)) - return self.start_date + self.interval * next_interval_num - - def __str__(self): - return 'interval[%s]' % str(self.interval) - - def __repr__(self): - return "<%s (interval=%s, start_date=%s)>" % ( - self.__class__.__name__, repr(self.interval), - repr(self.start_date)) diff --git a/apscheduler/triggers/simple.py b/apscheduler/triggers/simple.py deleted file mode 100644 index ea61b3f1..00000000 --- a/apscheduler/triggers/simple.py +++ /dev/null @@ -1,17 +0,0 @@ -from apscheduler.util import convert_to_datetime - - -class SimpleTrigger(object): - def __init__(self, run_date): - self.run_date = convert_to_datetime(run_date) - - def get_next_fire_time(self, start_date): - if self.run_date >= start_date: - return self.run_date - - def __str__(self): - return 'date[%s]' % str(self.run_date) - - def __repr__(self): - return '<%s (run_date=%s)>' % ( - self.__class__.__name__, repr(self.run_date)) diff --git a/apscheduler/util.py b/apscheduler/util.py deleted file mode 100644 index af28ae49..00000000 --- a/apscheduler/util.py +++ /dev/null @@ -1,204 +0,0 @@ -""" -This module contains several handy functions primarily meant for internal use. -""" - -from datetime import date, datetime, timedelta -from time import mktime -import re -import sys - -__all__ = ('asint', 'asbool', 'convert_to_datetime', 'timedelta_seconds', - 'time_difference', 'datetime_ceil', 'combine_opts', - 'get_callable_name', 'obj_to_ref', 'ref_to_obj', 'maybe_ref', - 'to_unicode', 'iteritems', 'itervalues', 'xrange') - - -def asint(text): - """ - Safely converts a string to an integer, returning None if the string - is None. - - :type text: str - :rtype: int - """ - if text is not None: - return int(text) - - -def asbool(obj): - """ - Interprets an object as a boolean value. - - :rtype: bool - """ - if isinstance(obj, str): - obj = obj.strip().lower() - if obj in ('true', 'yes', 'on', 'y', 't', '1'): - return True - if obj in ('false', 'no', 'off', 'n', 'f', '0'): - return False - raise ValueError('Unable to interpret value "%s" as boolean' % obj) - return bool(obj) - - -_DATE_REGEX = re.compile( - r'(?P\d{4})-(?P\d{1,2})-(?P\d{1,2})' - r'(?: (?P\d{1,2}):(?P\d{1,2}):(?P\d{1,2})' - r'(?:\.(?P\d{1,6}))?)?') - - -def convert_to_datetime(input): - """ - Converts the given object to a datetime object, if possible. - If an actual datetime object is passed, it is returned unmodified. - If the input is a string, it is parsed as a datetime. - - Date strings are accepted in three different forms: date only (Y-m-d), - date with time (Y-m-d H:M:S) or with date+time with microseconds - (Y-m-d H:M:S.micro). - - :rtype: datetime - """ - if isinstance(input, datetime): - return input - elif isinstance(input, date): - return datetime.fromordinal(input.toordinal()) - elif isinstance(input, str): - m = _DATE_REGEX.match(input) - if not m: - raise ValueError('Invalid date string') - values = [(k, int(v or 0)) for k, v in m.groupdict().items()] - values = dict(values) - return datetime(**values) - raise TypeError('Unsupported input type: %s' % type(input)) - - -def timedelta_seconds(delta): - """ - Converts the given timedelta to seconds. - - :type delta: timedelta - :rtype: float - """ - return delta.days * 24 * 60 * 60 + delta.seconds + \ - delta.microseconds / 1000000.0 - - -def time_difference(date1, date2): - """ - Returns the time difference in seconds between the given two - datetime objects. The difference is calculated as: date1 - date2. - - :param date1: the later datetime - :type date1: datetime - :param date2: the earlier datetime - :type date2: datetime - :rtype: float - """ - later = mktime(date1.timetuple()) + date1.microsecond / 1000000.0 - earlier = mktime(date2.timetuple()) + date2.microsecond / 1000000.0 - return later - earlier - - -def datetime_ceil(dateval): - """ - Rounds the given datetime object upwards. - - :type dateval: datetime - """ - if dateval.microsecond > 0: - return dateval + timedelta(seconds=1, - microseconds=-dateval.microsecond) - return dateval - - -def combine_opts(global_config, prefix, local_config={}): - """ - Returns a subdictionary from keys and values of ``global_config`` where - the key starts with the given prefix, combined with options from - local_config. The keys in the subdictionary have the prefix removed. - - :type global_config: dict - :type prefix: str - :type local_config: dict - :rtype: dict - """ - prefixlen = len(prefix) - subconf = {} - for key, value in global_config.items(): - if key.startswith(prefix): - key = key[prefixlen:] - subconf[key] = value - subconf.update(local_config) - return subconf - - -def get_callable_name(func): - """ - Returns the best available display name for the given function/callable. - """ - name = func.__module__ - if hasattr(func, '__self__') and func.__self__: - name += '.' + func.__self__.__name__ - elif hasattr(func, 'im_self') and func.im_self: # py2.4, 2.5 - name += '.' + func.im_self.__name__ - if hasattr(func, '__name__'): - name += '.' + func.__name__ - return name - - -def obj_to_ref(obj): - """ - Returns the path to the given object. - """ - ref = '%s:%s' % (obj.__module__, obj.__name__) - try: - obj2 = ref_to_obj(ref) - except AttributeError: - pass - else: - if obj2 == obj: - return ref - - raise ValueError('Only module level objects are supported') - - -def ref_to_obj(ref): - """ - Returns the object pointed to by ``ref``. - """ - modulename, rest = ref.split(':', 1) - obj = __import__(modulename) - for name in modulename.split('.')[1:] + rest.split('.'): - obj = getattr(obj, name) - return obj - - -def maybe_ref(ref): - """ - Returns the object that the given reference points to, if it is indeed - a reference. If it is not a reference, the object is returned as-is. - """ - if not isinstance(ref, str): - return ref - return ref_to_obj(ref) - - -def to_unicode(string, encoding='ascii'): - """ - Safely converts a string to a unicode representation on any - Python version. - """ - if hasattr(string, 'decode'): - return string.decode(encoding, 'ignore') - return string - - -if sys.version_info < (3, 0): # pragma: nocover - iteritems = lambda d: d.iteritems() - itervalues = lambda d: d.itervalues() - xrange = xrange -else: # pragma: nocover - iteritems = lambda d: d.items() - itervalues = lambda d: d.values() - xrange = range diff --git a/config.py b/config.py deleted file mode 100644 index 3b95eb78..00000000 --- a/config.py +++ /dev/null @@ -1,290 +0,0 @@ -import os -from configobj import ConfigObj -from headphones import config_file - -config = ConfigObj(config_file) - -General = config['General'] -http_host = General['http_host'] -http_port = General['http_port'] -http_username = General['http_username'] -http_password = General['http_password'] -try: - http_root = General['http_root'] -except KeyError: - General['http_root'] = '' - config.write() -launch_browser = General['launch_browser'] -usenet_retention = General['usenet_retention'] -include_lossless = General['include_lossless'] -flac_to_mp3 = General['flac_to_mp3'] -move_to_itunes = General['move_to_itunes'] -path_to_itunes = General['path_to_itunes'] -rename_mp3s = General['rename_mp3s'] -cleanup = General['cleanup'] -add_album_art = General['add_album_art'] -music_download_dir = General['music_download_dir'] -NZBMatrix = config['NZBMatrix'] -nzbmatrix = NZBMatrix['nzbmatrix'] -nzbmatrix_username = NZBMatrix['nzbmatrix_username'] -nzbmatrix_apikey = NZBMatrix['nzbmatrix_apikey'] -Newznab = config['Newznab'] -newznab = Newznab['newznab'] -newznab_host = Newznab['newznab_host'] -newznab_apikey = Newznab['newznab_apikey'] -NZBsorg = config['NZBsorg'] -nzbsorg = NZBsorg['nzbsorg'] -nzbsorg_uid = NZBsorg['nzbsorg_uid'] -nzbsorg_hash = NZBsorg['nzbsorg_hash'] -SABnzbd = config['SABnzbd'] -sab_username = SABnzbd['sab_username'] -sab_password = SABnzbd['sab_password'] -sab_apikey = SABnzbd['sab_apikey'] -sab_category = SABnzbd['sab_category'] -sab_host = SABnzbd['sab_host'] - -def var_to_chk(variable): - if variable == '1': - return 'Checked' - else: - return '' - -form = ''' -
-
- -
-
-
-
-

Web Interface

- - - - - - - - - - - - - - - - -
-

- HTTP Host:

-
- i.e. localhost or 0.0.0.0 -

-
-

- HTTP Username:

- -

-
-

- HTTP Port:

- -

-
-

- HTTP Password:

- -

-
-

Launch Browser on Startup:

-
- -

Download Settings

- - - - - - - - - - - - - - - - - - - - - - - -
-

SABnzbd Host:


- - usually localhost:8080 -
-

SABnzbd Username:

-
-
- -

SABnzbd API:

-
-
- -

SABnzbd Password:

-
-
- -

SABnzbd Category:

-
-
- -

Music Download Directory:


- - Absolute or relative path to the dir where SAB downloads your music
- i.e. Downloads/music or /Users/name/Downloads/music
-
-
- -

Usenet Retention:

-
- -

Search Providers

- - - - - - - - - - - - - - - - - - - - - - - - - -
-

NZBMatrix:

-
-

- NZBMatrix Username:
- -

-
-

- NZBMatrix API:
- -

-
-
- -

Newznab:

-
-
- -

- Newznab Host:
-
- i.e. http://nzb.su -

-
-
- -

- Newznab API:
- -

-
-
- -

NZBs.org:

-
-
- -

- NZBs.org UID:
- -

-
-
- -

- NZBs.org Hash:
- -

-
- -

Quality & Post Processing

- - - - - - - - - - - - - - - - -
-

Album Quality:

- Include lossless
- Convert lossless to mp3 -
-

-

iTunes:

- Move downloads to iTunes -

-
-
- -

Path to Music folder:
-
- i.e. /Users/name/Music/iTunes or /Volumes/share/music -

-
- Renaming & Metadata: -

- Rename & add metadata -
- Delete leftover files -

-
-
-

Album Art:

- Add album art -
- -


- (For now, all changes require a restart to take effect)

-
-
-
''' % (http_host, http_username, http_port, http_password, var_to_chk(launch_browser), sab_host, sab_username, sab_apikey, sab_password, sab_category, music_download_dir, usenet_retention, var_to_chk(nzbmatrix), nzbmatrix_username, nzbmatrix_apikey, var_to_chk(newznab), newznab_host, newznab_apikey, var_to_chk(nzbsorg), nzbsorg_uid, nzbsorg_hash, var_to_chk(include_lossless), var_to_chk(flac_to_mp3), var_to_chk(move_to_itunes), path_to_itunes, var_to_chk(rename_mp3s), var_to_chk(cleanup), var_to_chk(add_album_art)) - diff --git a/configcreate.py b/configcreate.py deleted file mode 100644 index df8cc035..00000000 --- a/configcreate.py +++ /dev/null @@ -1,41 +0,0 @@ -from configobj import ConfigObj - -def configCreate(path): - config = ConfigObj() - config.filename = path - config['General'] = {} - config['General']['http_host'] = '0.0.0.0' - config['General']['http_port'] = 8181 - config['General']['http_username'] = '' - config['General']['http_password'] = '' - config['General']['http_root'] = '' - config['General']['launch_browser'] = 1 - config['General']['include_lossless'] = 0 - config['General']['flac_to_mp3'] = 0 - config['General']['move_to_itunes'] = 0 - config['General']['path_to_itunes'] = '' - config['General']['rename_mp3s'] = 0 - config['General']['cleanup'] = 0 - config['General']['add_album_art'] = 0 - config['General']['music_download_dir'] = '' - config['General']['usenet_retention'] = 500 - config['SABnzbd'] = {} - config['SABnzbd']['sab_host'] = '' - config['SABnzbd']['sab_username'] = '' - config['SABnzbd']['sab_password'] = '' - config['SABnzbd']['sab_apikey'] = '' - config['SABnzbd']['sab_category'] = '' - config['NZBMatrix'] = {} - config['NZBMatrix']['nzbmatrix'] = 0 - config['NZBMatrix']['nzbmatrix_username'] = '' - config['NZBMatrix']['nzbmatrix_apikey'] = '' - config['Newznab'] = {} - config['Newznab']['newznab'] = 0 - config['Newznab']['newznab_host'] = '' - config['Newznab']['newznab_apikey'] = '' - config['NZBsorg'] = {} - config['NZBsorg']['nzbsorg'] = 0 - config['NZBsorg']['nzbsorg_uid'] = '' - config['NZBsorg']['nzbsorg_hash'] = '' - - config.write() \ No newline at end of file diff --git a/configobj.py b/configobj.py deleted file mode 100644 index c1f6e6df..00000000 --- a/configobj.py +++ /dev/null @@ -1,2468 +0,0 @@ -# configobj.py -# A config file reader/writer that supports nested sections in config files. -# Copyright (C) 2005-2010 Michael Foord, Nicola Larosa -# E-mail: fuzzyman AT voidspace DOT org DOT uk -# nico AT tekNico DOT net - -# ConfigObj 4 -# http://www.voidspace.org.uk/python/configobj.html - -# Released subject to the BSD License -# Please see http://www.voidspace.org.uk/python/license.shtml - -# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml -# For information about bugfixes, updates and support, please join the -# ConfigObj mailing list: -# http://lists.sourceforge.net/lists/listinfo/configobj-develop -# Comments, suggestions and bug reports welcome. - -from __future__ import generators - -import os -import re -import sys - -from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF16_BE, BOM_UTF16_LE - - -# imported lazily to avoid startup performance hit if it isn't used -compiler = None - -# A dictionary mapping BOM to -# the encoding to decode with, and what to set the -# encoding attribute to. -BOMS = { - BOM_UTF8: ('utf_8', None), - BOM_UTF16_BE: ('utf16_be', 'utf_16'), - BOM_UTF16_LE: ('utf16_le', 'utf_16'), - BOM_UTF16: ('utf_16', 'utf_16'), - } -# All legal variants of the BOM codecs. -# TODO: the list of aliases is not meant to be exhaustive, is there a -# better way ? -BOM_LIST = { - 'utf_16': 'utf_16', - 'u16': 'utf_16', - 'utf16': 'utf_16', - 'utf-16': 'utf_16', - 'utf16_be': 'utf16_be', - 'utf_16_be': 'utf16_be', - 'utf-16be': 'utf16_be', - 'utf16_le': 'utf16_le', - 'utf_16_le': 'utf16_le', - 'utf-16le': 'utf16_le', - 'utf_8': 'utf_8', - 'u8': 'utf_8', - 'utf': 'utf_8', - 'utf8': 'utf_8', - 'utf-8': 'utf_8', - } - -# Map of encodings to the BOM to write. -BOM_SET = { - 'utf_8': BOM_UTF8, - 'utf_16': BOM_UTF16, - 'utf16_be': BOM_UTF16_BE, - 'utf16_le': BOM_UTF16_LE, - None: BOM_UTF8 - } - - -def match_utf8(encoding): - return BOM_LIST.get(encoding.lower()) == 'utf_8' - - -# Quote strings used for writing values -squot = "'%s'" -dquot = '"%s"' -noquot = "%s" -wspace_plus = ' \r\n\v\t\'"' -tsquot = '"""%s"""' -tdquot = "'''%s'''" - -# Sentinel for use in getattr calls to replace hasattr -MISSING = object() - -__version__ = '4.7.2' - -try: - any -except NameError: - def any(iterable): - for entry in iterable: - if entry: - return True - return False - - -__all__ = ( - '__version__', - 'DEFAULT_INDENT_TYPE', - 'DEFAULT_INTERPOLATION', - 'ConfigObjError', - 'NestingError', - 'ParseError', - 'DuplicateError', - 'ConfigspecError', - 'ConfigObj', - 'SimpleVal', - 'InterpolationError', - 'InterpolationLoopError', - 'MissingInterpolationOption', - 'RepeatSectionError', - 'ReloadError', - 'UnreprError', - 'UnknownType', - 'flatten_errors', - 'get_extra_values' -) - -DEFAULT_INTERPOLATION = 'configparser' -DEFAULT_INDENT_TYPE = ' ' -MAX_INTERPOL_DEPTH = 10 - -OPTION_DEFAULTS = { - 'interpolation': True, - 'raise_errors': False, - 'list_values': True, - 'create_empty': False, - 'file_error': False, - 'configspec': None, - 'stringify': True, - # option may be set to one of ('', ' ', '\t') - 'indent_type': None, - 'encoding': None, - 'default_encoding': None, - 'unrepr': False, - 'write_empty_values': False, -} - - - -def getObj(s): - global compiler - if compiler is None: - import compiler - s = "a=" + s - p = compiler.parse(s) - return p.getChildren()[1].getChildren()[0].getChildren()[1] - - -class UnknownType(Exception): - pass - - -class Builder(object): - - def build(self, o): - m = getattr(self, 'build_' + o.__class__.__name__, None) - if m is None: - raise UnknownType(o.__class__.__name__) - return m(o) - - def build_List(self, o): - return map(self.build, o.getChildren()) - - def build_Const(self, o): - return o.value - - def build_Dict(self, o): - d = {} - i = iter(map(self.build, o.getChildren())) - for el in i: - d[el] = i.next() - return d - - def build_Tuple(self, o): - return tuple(self.build_List(o)) - - def build_Name(self, o): - if o.name == 'None': - return None - if o.name == 'True': - return True - if o.name == 'False': - return False - - # An undefined Name - raise UnknownType('Undefined Name') - - def build_Add(self, o): - real, imag = map(self.build_Const, o.getChildren()) - try: - real = float(real) - except TypeError: - raise UnknownType('Add') - if not isinstance(imag, complex) or imag.real != 0.0: - raise UnknownType('Add') - return real+imag - - def build_Getattr(self, o): - parent = self.build(o.expr) - return getattr(parent, o.attrname) - - def build_UnarySub(self, o): - return -self.build_Const(o.getChildren()[0]) - - def build_UnaryAdd(self, o): - return self.build_Const(o.getChildren()[0]) - - -_builder = Builder() - - -def unrepr(s): - if not s: - return s - return _builder.build(getObj(s)) - - - -class ConfigObjError(SyntaxError): - """ - This is the base class for all errors that ConfigObj raises. - It is a subclass of SyntaxError. - """ - def __init__(self, message='', line_number=None, line=''): - self.line = line - self.line_number = line_number - SyntaxError.__init__(self, message) - - -class NestingError(ConfigObjError): - """ - This error indicates a level of nesting that doesn't match. - """ - - -class ParseError(ConfigObjError): - """ - This error indicates that a line is badly written. - It is neither a valid ``key = value`` line, - nor a valid section marker line. - """ - - -class ReloadError(IOError): - """ - A 'reload' operation failed. - This exception is a subclass of ``IOError``. - """ - def __init__(self): - IOError.__init__(self, 'reload failed, filename is not set.') - - -class DuplicateError(ConfigObjError): - """ - The keyword or section specified already exists. - """ - - -class ConfigspecError(ConfigObjError): - """ - An error occured whilst parsing a configspec. - """ - - -class InterpolationError(ConfigObjError): - """Base class for the two interpolation errors.""" - - -class InterpolationLoopError(InterpolationError): - """Maximum interpolation depth exceeded in string interpolation.""" - - def __init__(self, option): - InterpolationError.__init__( - self, - 'interpolation loop detected in value "%s".' % option) - - -class RepeatSectionError(ConfigObjError): - """ - This error indicates additional sections in a section with a - ``__many__`` (repeated) section. - """ - - -class MissingInterpolationOption(InterpolationError): - """A value specified for interpolation was missing.""" - def __init__(self, option): - msg = 'missing option "%s" in interpolation.' % option - InterpolationError.__init__(self, msg) - - -class UnreprError(ConfigObjError): - """An error parsing in unrepr mode.""" - - - -class InterpolationEngine(object): - """ - A helper class to help perform string interpolation. - - This class is an abstract base class; its descendants perform - the actual work. - """ - - # compiled regexp to use in self.interpolate() - _KEYCRE = re.compile(r"%\(([^)]*)\)s") - _cookie = '%' - - def __init__(self, section): - # the Section instance that "owns" this engine - self.section = section - - - def interpolate(self, key, value): - # short-cut - if not self._cookie in value: - return value - - def recursive_interpolate(key, value, section, backtrail): - """The function that does the actual work. - - ``value``: the string we're trying to interpolate. - ``section``: the section in which that string was found - ``backtrail``: a dict to keep track of where we've been, - to detect and prevent infinite recursion loops - - This is similar to a depth-first-search algorithm. - """ - # Have we been here already? - if (key, section.name) in backtrail: - # Yes - infinite loop detected - raise InterpolationLoopError(key) - # Place a marker on our backtrail so we won't come back here again - backtrail[(key, section.name)] = 1 - - # Now start the actual work - match = self._KEYCRE.search(value) - while match: - # The actual parsing of the match is implementation-dependent, - # so delegate to our helper function - k, v, s = self._parse_match(match) - if k is None: - # That's the signal that no further interpolation is needed - replacement = v - else: - # Further interpolation may be needed to obtain final value - replacement = recursive_interpolate(k, v, s, backtrail) - # Replace the matched string with its final value - start, end = match.span() - value = ''.join((value[:start], replacement, value[end:])) - new_search_start = start + len(replacement) - # Pick up the next interpolation key, if any, for next time - # through the while loop - match = self._KEYCRE.search(value, new_search_start) - - # Now safe to come back here again; remove marker from backtrail - del backtrail[(key, section.name)] - - return value - - # Back in interpolate(), all we have to do is kick off the recursive - # function with appropriate starting values - value = recursive_interpolate(key, value, self.section, {}) - return value - - - def _fetch(self, key): - """Helper function to fetch values from owning section. - - Returns a 2-tuple: the value, and the section where it was found. - """ - # switch off interpolation before we try and fetch anything ! - save_interp = self.section.main.interpolation - self.section.main.interpolation = False - - # Start at section that "owns" this InterpolationEngine - current_section = self.section - while True: - # try the current section first - val = current_section.get(key) - if val is not None and not isinstance(val, Section): - break - # try "DEFAULT" next - val = current_section.get('DEFAULT', {}).get(key) - if val is not None and not isinstance(val, Section): - break - # move up to parent and try again - # top-level's parent is itself - if current_section.parent is current_section: - # reached top level, time to give up - break - current_section = current_section.parent - - # restore interpolation to previous value before returning - self.section.main.interpolation = save_interp - if val is None: - raise MissingInterpolationOption(key) - return val, current_section - - - def _parse_match(self, match): - """Implementation-dependent helper function. - - Will be passed a match object corresponding to the interpolation - key we just found (e.g., "%(foo)s" or "$foo"). Should look up that - key in the appropriate config file section (using the ``_fetch()`` - helper function) and return a 3-tuple: (key, value, section) - - ``key`` is the name of the key we're looking for - ``value`` is the value found for that key - ``section`` is a reference to the section where it was found - - ``key`` and ``section`` should be None if no further - interpolation should be performed on the resulting value - (e.g., if we interpolated "$$" and returned "$"). - """ - raise NotImplementedError() - - - -class ConfigParserInterpolation(InterpolationEngine): - """Behaves like ConfigParser.""" - _cookie = '%' - _KEYCRE = re.compile(r"%\(([^)]*)\)s") - - def _parse_match(self, match): - key = match.group(1) - value, section = self._fetch(key) - return key, value, section - - - -class TemplateInterpolation(InterpolationEngine): - """Behaves like string.Template.""" - _cookie = '$' - _delimiter = '$' - _KEYCRE = re.compile(r""" - \$(?: - (?P\$) | # Two $ signs - (?P[_a-z][_a-z0-9]*) | # $name format - {(?P[^}]*)} # ${name} format - ) - """, re.IGNORECASE | re.VERBOSE) - - def _parse_match(self, match): - # Valid name (in or out of braces): fetch value from section - key = match.group('named') or match.group('braced') - if key is not None: - value, section = self._fetch(key) - return key, value, section - # Escaped delimiter (e.g., $$): return single delimiter - if match.group('escaped') is not None: - # Return None for key and section to indicate it's time to stop - return None, self._delimiter, None - # Anything else: ignore completely, just return it unchanged - return None, match.group(), None - - -interpolation_engines = { - 'configparser': ConfigParserInterpolation, - 'template': TemplateInterpolation, -} - - -def __newobj__(cls, *args): - # Hack for pickle - return cls.__new__(cls, *args) - -class Section(dict): - """ - A dictionary-like object that represents a section in a config file. - - It does string interpolation if the 'interpolation' attribute - of the 'main' object is set to True. - - Interpolation is tried first from this object, then from the 'DEFAULT' - section of this object, next from the parent and its 'DEFAULT' section, - and so on until the main object is reached. - - A Section will behave like an ordered dictionary - following the - order of the ``scalars`` and ``sections`` attributes. - You can use this to change the order of members. - - Iteration follows the order: scalars, then sections. - """ - - - def __setstate__(self, state): - dict.update(self, state[0]) - self.__dict__.update(state[1]) - - def __reduce__(self): - state = (dict(self), self.__dict__) - return (__newobj__, (self.__class__,), state) - - - def __init__(self, parent, depth, main, indict=None, name=None): - """ - * parent is the section above - * depth is the depth level of this section - * main is the main ConfigObj - * indict is a dictionary to initialise the section with - """ - if indict is None: - indict = {} - dict.__init__(self) - # used for nesting level *and* interpolation - self.parent = parent - # used for the interpolation attribute - self.main = main - # level of nesting depth of this Section - self.depth = depth - # purely for information - self.name = name - # - self._initialise() - # we do this explicitly so that __setitem__ is used properly - # (rather than just passing to ``dict.__init__``) - for entry, value in indict.iteritems(): - self[entry] = value - - - def _initialise(self): - # the sequence of scalar values in this Section - self.scalars = [] - # the sequence of sections in this Section - self.sections = [] - # for comments :-) - self.comments = {} - self.inline_comments = {} - # the configspec - self.configspec = None - # for defaults - self.defaults = [] - self.default_values = {} - self.extra_values = [] - self._created = False - - - def _interpolate(self, key, value): - try: - # do we already have an interpolation engine? - engine = self._interpolation_engine - except AttributeError: - # not yet: first time running _interpolate(), so pick the engine - name = self.main.interpolation - if name == True: # note that "if name:" would be incorrect here - # backwards-compatibility: interpolation=True means use default - name = DEFAULT_INTERPOLATION - name = name.lower() # so that "Template", "template", etc. all work - class_ = interpolation_engines.get(name, None) - if class_ is None: - # invalid value for self.main.interpolation - self.main.interpolation = False - return value - else: - # save reference to engine so we don't have to do this again - engine = self._interpolation_engine = class_(self) - # let the engine do the actual work - return engine.interpolate(key, value) - - - def __getitem__(self, key): - """Fetch the item and do string interpolation.""" - val = dict.__getitem__(self, key) - if self.main.interpolation: - if isinstance(val, basestring): - return self._interpolate(key, val) - if isinstance(val, list): - def _check(entry): - if isinstance(entry, basestring): - return self._interpolate(key, entry) - return entry - new = [_check(entry) for entry in val] - if new != val: - return new - return val - - - def __setitem__(self, key, value, unrepr=False): - """ - Correctly set a value. - - Making dictionary values Section instances. - (We have to special case 'Section' instances - which are also dicts) - - Keys must be strings. - Values need only be strings (or lists of strings) if - ``main.stringify`` is set. - - ``unrepr`` must be set when setting a value to a dictionary, without - creating a new sub-section. - """ - if not isinstance(key, basestring): - raise ValueError('The key "%s" is not a string.' % key) - - # add the comment - if key not in self.comments: - self.comments[key] = [] - self.inline_comments[key] = '' - # remove the entry from defaults - if key in self.defaults: - self.defaults.remove(key) - # - if isinstance(value, Section): - if key not in self: - self.sections.append(key) - dict.__setitem__(self, key, value) - elif isinstance(value, dict) and not unrepr: - # First create the new depth level, - # then create the section - if key not in self: - self.sections.append(key) - new_depth = self.depth + 1 - dict.__setitem__( - self, - key, - Section( - self, - new_depth, - self.main, - indict=value, - name=key)) - else: - if key not in self: - self.scalars.append(key) - if not self.main.stringify: - if isinstance(value, basestring): - pass - elif isinstance(value, (list, tuple)): - for entry in value: - if not isinstance(entry, basestring): - raise TypeError('Value is not a string "%s".' % entry) - else: - raise TypeError('Value is not a string "%s".' % value) - dict.__setitem__(self, key, value) - - - def __delitem__(self, key): - """Remove items from the sequence when deleting.""" - dict. __delitem__(self, key) - if key in self.scalars: - self.scalars.remove(key) - else: - self.sections.remove(key) - del self.comments[key] - del self.inline_comments[key] - - - def get(self, key, default=None): - """A version of ``get`` that doesn't bypass string interpolation.""" - try: - return self[key] - except KeyError: - return default - - - def update(self, indict): - """ - A version of update that uses our ``__setitem__``. - """ - for entry in indict: - self[entry] = indict[entry] - - - def pop(self, key, default=MISSING): - """ - 'D.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised' - """ - try: - val = self[key] - except KeyError: - if default is MISSING: - raise - val = default - else: - del self[key] - return val - - - def popitem(self): - """Pops the first (key,val)""" - sequence = (self.scalars + self.sections) - if not sequence: - raise KeyError(": 'popitem(): dictionary is empty'") - key = sequence[0] - val = self[key] - del self[key] - return key, val - - - def clear(self): - """ - A version of clear that also affects scalars/sections - Also clears comments and configspec. - - Leaves other attributes alone : - depth/main/parent are not affected - """ - dict.clear(self) - self.scalars = [] - self.sections = [] - self.comments = {} - self.inline_comments = {} - self.configspec = None - self.defaults = [] - self.extra_values = [] - - - def setdefault(self, key, default=None): - """A version of setdefault that sets sequence if appropriate.""" - try: - return self[key] - except KeyError: - self[key] = default - return self[key] - - - def items(self): - """D.items() -> list of D's (key, value) pairs, as 2-tuples""" - return zip((self.scalars + self.sections), self.values()) - - - def keys(self): - """D.keys() -> list of D's keys""" - return (self.scalars + self.sections) - - - def values(self): - """D.values() -> list of D's values""" - return [self[key] for key in (self.scalars + self.sections)] - - - def iteritems(self): - """D.iteritems() -> an iterator over the (key, value) items of D""" - return iter(self.items()) - - - def iterkeys(self): - """D.iterkeys() -> an iterator over the keys of D""" - return iter((self.scalars + self.sections)) - - __iter__ = iterkeys - - - def itervalues(self): - """D.itervalues() -> an iterator over the values of D""" - return iter(self.values()) - - - def __repr__(self): - """x.__repr__() <==> repr(x)""" - def _getval(key): - try: - return self[key] - except MissingInterpolationOption: - return dict.__getitem__(self, key) - return '{%s}' % ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) - for key in (self.scalars + self.sections)]) - - __str__ = __repr__ - __str__.__doc__ = "x.__str__() <==> str(x)" - - - # Extra methods - not in a normal dictionary - - def dict(self): - """ - Return a deepcopy of self as a dictionary. - - All members that are ``Section`` instances are recursively turned to - ordinary dictionaries - by calling their ``dict`` method. - - >>> n = a.dict() - >>> n == a - 1 - >>> n is a - 0 - """ - newdict = {} - for entry in self: - this_entry = self[entry] - if isinstance(this_entry, Section): - this_entry = this_entry.dict() - elif isinstance(this_entry, list): - # create a copy rather than a reference - this_entry = list(this_entry) - elif isinstance(this_entry, tuple): - # create a copy rather than a reference - this_entry = tuple(this_entry) - newdict[entry] = this_entry - return newdict - - - def merge(self, indict): - """ - A recursive update - useful for merging config files. - - >>> a = '''[section1] - ... option1 = True - ... [[subsection]] - ... more_options = False - ... # end of file'''.splitlines() - >>> b = '''# File is user.ini - ... [section1] - ... option1 = False - ... # end of file'''.splitlines() - >>> c1 = ConfigObj(b) - >>> c2 = ConfigObj(a) - >>> c2.merge(c1) - >>> c2 - ConfigObj({'section1': {'option1': 'False', 'subsection': {'more_options': 'False'}}}) - """ - for key, val in indict.items(): - if (key in self and isinstance(self[key], dict) and - isinstance(val, dict)): - self[key].merge(val) - else: - self[key] = val - - - def rename(self, oldkey, newkey): - """ - Change a keyname to another, without changing position in sequence. - - Implemented so that transformations can be made on keys, - as well as on values. (used by encode and decode) - - Also renames comments. - """ - if oldkey in self.scalars: - the_list = self.scalars - elif oldkey in self.sections: - the_list = self.sections - else: - raise KeyError('Key "%s" not found.' % oldkey) - pos = the_list.index(oldkey) - # - val = self[oldkey] - dict.__delitem__(self, oldkey) - dict.__setitem__(self, newkey, val) - the_list.remove(oldkey) - the_list.insert(pos, newkey) - comm = self.comments[oldkey] - inline_comment = self.inline_comments[oldkey] - del self.comments[oldkey] - del self.inline_comments[oldkey] - self.comments[newkey] = comm - self.inline_comments[newkey] = inline_comment - - - def walk(self, function, raise_errors=True, - call_on_sections=False, **keywargs): - """ - Walk every member and call a function on the keyword and value. - - Return a dictionary of the return values - - If the function raises an exception, raise the errror - unless ``raise_errors=False``, in which case set the return value to - ``False``. - - Any unrecognised keyword arguments you pass to walk, will be pased on - to the function you pass in. - - Note: if ``call_on_sections`` is ``True`` then - on encountering a - subsection, *first* the function is called for the *whole* subsection, - and then recurses into it's members. This means your function must be - able to handle strings, dictionaries and lists. This allows you - to change the key of subsections as well as for ordinary members. The - return value when called on the whole subsection has to be discarded. - - See the encode and decode methods for examples, including functions. - - .. admonition:: caution - - You can use ``walk`` to transform the names of members of a section - but you mustn't add or delete members. - - >>> config = '''[XXXXsection] - ... XXXXkey = XXXXvalue'''.splitlines() - >>> cfg = ConfigObj(config) - >>> cfg - ConfigObj({'XXXXsection': {'XXXXkey': 'XXXXvalue'}}) - >>> def transform(section, key): - ... val = section[key] - ... newkey = key.replace('XXXX', 'CLIENT1') - ... section.rename(key, newkey) - ... if isinstance(val, (tuple, list, dict)): - ... pass - ... else: - ... val = val.replace('XXXX', 'CLIENT1') - ... section[newkey] = val - >>> cfg.walk(transform, call_on_sections=True) - {'CLIENT1section': {'CLIENT1key': None}} - >>> cfg - ConfigObj({'CLIENT1section': {'CLIENT1key': 'CLIENT1value'}}) - """ - out = {} - # scalars first - for i in range(len(self.scalars)): - entry = self.scalars[i] - try: - val = function(self, entry, **keywargs) - # bound again in case name has changed - entry = self.scalars[i] - out[entry] = val - except Exception: - if raise_errors: - raise - else: - entry = self.scalars[i] - out[entry] = False - # then sections - for i in range(len(self.sections)): - entry = self.sections[i] - if call_on_sections: - try: - function(self, entry, **keywargs) - except Exception: - if raise_errors: - raise - else: - entry = self.sections[i] - out[entry] = False - # bound again in case name has changed - entry = self.sections[i] - # previous result is discarded - out[entry] = self[entry].walk( - function, - raise_errors=raise_errors, - call_on_sections=call_on_sections, - **keywargs) - return out - - - def as_bool(self, key): - """ - Accepts a key as input. The corresponding value must be a string or - the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to - retain compatibility with Python 2.2. - - If the string is one of ``True``, ``On``, ``Yes``, or ``1`` it returns - ``True``. - - If the string is one of ``False``, ``Off``, ``No``, or ``0`` it returns - ``False``. - - ``as_bool`` is not case sensitive. - - Any other input will raise a ``ValueError``. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_bool('a') - Traceback (most recent call last): - ValueError: Value "fish" is neither True nor False - >>> a['b'] = 'True' - >>> a.as_bool('b') - 1 - >>> a['b'] = 'off' - >>> a.as_bool('b') - 0 - """ - val = self[key] - if val == True: - return True - elif val == False: - return False - else: - try: - if not isinstance(val, basestring): - # TODO: Why do we raise a KeyError here? - raise KeyError() - else: - return self.main._bools[val.lower()] - except KeyError: - raise ValueError('Value "%s" is neither True nor False' % val) - - - def as_int(self, key): - """ - A convenience method which coerces the specified value to an integer. - - If the value is an invalid literal for ``int``, a ``ValueError`` will - be raised. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_int('a') - Traceback (most recent call last): - ValueError: invalid literal for int() with base 10: 'fish' - >>> a['b'] = '1' - >>> a.as_int('b') - 1 - >>> a['b'] = '3.2' - >>> a.as_int('b') - Traceback (most recent call last): - ValueError: invalid literal for int() with base 10: '3.2' - """ - return int(self[key]) - - - def as_float(self, key): - """ - A convenience method which coerces the specified value to a float. - - If the value is an invalid literal for ``float``, a ``ValueError`` will - be raised. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_float('a') - Traceback (most recent call last): - ValueError: invalid literal for float(): fish - >>> a['b'] = '1' - >>> a.as_float('b') - 1.0 - >>> a['b'] = '3.2' - >>> a.as_float('b') - 3.2000000000000002 - """ - return float(self[key]) - - - def as_list(self, key): - """ - A convenience method which fetches the specified value, guaranteeing - that it is a list. - - >>> a = ConfigObj() - >>> a['a'] = 1 - >>> a.as_list('a') - [1] - >>> a['a'] = (1,) - >>> a.as_list('a') - [1] - >>> a['a'] = [1] - >>> a.as_list('a') - [1] - """ - result = self[key] - if isinstance(result, (tuple, list)): - return list(result) - return [result] - - - def restore_default(self, key): - """ - Restore (and return) default value for the specified key. - - This method will only work for a ConfigObj that was created - with a configspec and has been validated. - - If there is no default value for this key, ``KeyError`` is raised. - """ - default = self.default_values[key] - dict.__setitem__(self, key, default) - if key not in self.defaults: - self.defaults.append(key) - return default - - - def restore_defaults(self): - """ - Recursively restore default values to all members - that have them. - - This method will only work for a ConfigObj that was created - with a configspec and has been validated. - - It doesn't delete or modify entries without default values. - """ - for key in self.default_values: - self.restore_default(key) - - for section in self.sections: - self[section].restore_defaults() - - -class ConfigObj(Section): - """An object to read, create, and write config files.""" - - _keyword = re.compile(r'''^ # line start - (\s*) # indentation - ( # keyword - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'"=].*?) # no quotes - ) - \s*=\s* # divider - (.*) # value (including list values and comments) - $ # line end - ''', - re.VERBOSE) - - _sectionmarker = re.compile(r'''^ - (\s*) # 1: indentation - ((?:\[\s*)+) # 2: section marker open - ( # 3: section name open - (?:"\s*\S.*?\s*")| # at least one non-space with double quotes - (?:'\s*\S.*?\s*')| # at least one non-space with single quotes - (?:[^'"\s].*?) # at least one non-space unquoted - ) # section name close - ((?:\s*\])+) # 4: section marker close - \s*(\#.*)? # 5: optional comment - $''', - re.VERBOSE) - - # this regexp pulls list values out as a single string - # or single values and comments - # FIXME: this regex adds a '' to the end of comma terminated lists - # workaround in ``_handle_value`` - _valueexp = re.compile(r'''^ - (?: - (?: - ( - (?: - (?: - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'",\#][^,\#]*?) # unquoted - ) - \s*,\s* # comma - )* # match all list items ending in a comma (if any) - ) - ( - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'",\#\s][^,]*?)| # unquoted - (?:(? 1: - msg = "Parsing failed with several errors.\nFirst error %s" % info - error = ConfigObjError(msg) - else: - error = self._errors[0] - # set the errors attribute; it's a list of tuples: - # (error_type, message, line_number) - error.errors = self._errors - # set the config attribute - error.config = self - raise error - # delete private attributes - del self._errors - - if configspec is None: - self.configspec = None - else: - self._handle_configspec(configspec) - - - def _initialise(self, options=None): - if options is None: - options = OPTION_DEFAULTS - - # initialise a few variables - self.filename = None - self._errors = [] - self.raise_errors = options['raise_errors'] - self.interpolation = options['interpolation'] - self.list_values = options['list_values'] - self.create_empty = options['create_empty'] - self.file_error = options['file_error'] - self.stringify = options['stringify'] - self.indent_type = options['indent_type'] - self.encoding = options['encoding'] - self.default_encoding = options['default_encoding'] - self.BOM = False - self.newlines = None - self.write_empty_values = options['write_empty_values'] - self.unrepr = options['unrepr'] - - self.initial_comment = [] - self.final_comment = [] - self.configspec = None - - if self._inspec: - self.list_values = False - - # Clear section attributes as well - Section._initialise(self) - - - def __repr__(self): - def _getval(key): - try: - return self[key] - except MissingInterpolationOption: - return dict.__getitem__(self, key) - return ('ConfigObj({%s})' % - ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) - for key in (self.scalars + self.sections)])) - - - def _handle_bom(self, infile): - """ - Handle any BOM, and decode if necessary. - - If an encoding is specified, that *must* be used - but the BOM should - still be removed (and the BOM attribute set). - - (If the encoding is wrongly specified, then a BOM for an alternative - encoding won't be discovered or removed.) - - If an encoding is not specified, UTF8 or UTF16 BOM will be detected and - removed. The BOM attribute will be set. UTF16 will be decoded to - unicode. - - NOTE: This method must not be called with an empty ``infile``. - - Specifying the *wrong* encoding is likely to cause a - ``UnicodeDecodeError``. - - ``infile`` must always be returned as a list of lines, but may be - passed in as a single string. - """ - if ((self.encoding is not None) and - (self.encoding.lower() not in BOM_LIST)): - # No need to check for a BOM - # the encoding specified doesn't have one - # just decode - return self._decode(infile, self.encoding) - - if isinstance(infile, (list, tuple)): - line = infile[0] - else: - line = infile - if self.encoding is not None: - # encoding explicitly supplied - # And it could have an associated BOM - # TODO: if encoding is just UTF16 - we ought to check for both - # TODO: big endian and little endian versions. - enc = BOM_LIST[self.encoding.lower()] - if enc == 'utf_16': - # For UTF16 we try big endian and little endian - for BOM, (encoding, final_encoding) in BOMS.items(): - if not final_encoding: - # skip UTF8 - continue - if infile.startswith(BOM): - ### BOM discovered - ##self.BOM = True - # Don't need to remove BOM - return self._decode(infile, encoding) - - # If we get this far, will *probably* raise a DecodeError - # As it doesn't appear to start with a BOM - return self._decode(infile, self.encoding) - - # Must be UTF8 - BOM = BOM_SET[enc] - if not line.startswith(BOM): - return self._decode(infile, self.encoding) - - newline = line[len(BOM):] - - # BOM removed - if isinstance(infile, (list, tuple)): - infile[0] = newline - else: - infile = newline - self.BOM = True - return self._decode(infile, self.encoding) - - # No encoding specified - so we need to check for UTF8/UTF16 - for BOM, (encoding, final_encoding) in BOMS.items(): - if not line.startswith(BOM): - continue - else: - # BOM discovered - self.encoding = final_encoding - if not final_encoding: - self.BOM = True - # UTF8 - # remove BOM - newline = line[len(BOM):] - if isinstance(infile, (list, tuple)): - infile[0] = newline - else: - infile = newline - # UTF8 - don't decode - if isinstance(infile, basestring): - return infile.splitlines(True) - else: - return infile - # UTF16 - have to decode - return self._decode(infile, encoding) - - # No BOM discovered and no encoding specified, just return - if isinstance(infile, basestring): - # infile read from a file will be a single string - return infile.splitlines(True) - return infile - - - def _a_to_u(self, aString): - """Decode ASCII strings to unicode if a self.encoding is specified.""" - if self.encoding: - return aString.decode('ascii') - else: - return aString - - - def _decode(self, infile, encoding): - """ - Decode infile to unicode. Using the specified encoding. - - if is a string, it also needs converting to a list. - """ - if isinstance(infile, basestring): - # can't be unicode - # NOTE: Could raise a ``UnicodeDecodeError`` - return infile.decode(encoding).splitlines(True) - for i, line in enumerate(infile): - if not isinstance(line, unicode): - # NOTE: The isinstance test here handles mixed lists of unicode/string - # NOTE: But the decode will break on any non-string values - # NOTE: Or could raise a ``UnicodeDecodeError`` - infile[i] = line.decode(encoding) - return infile - - - def _decode_element(self, line): - """Decode element to unicode if necessary.""" - if not self.encoding: - return line - if isinstance(line, str) and self.default_encoding: - return line.decode(self.default_encoding) - return line - - - def _str(self, value): - """ - Used by ``stringify`` within validate, to turn non-string values - into strings. - """ - if not isinstance(value, basestring): - return str(value) - else: - return value - - - def _parse(self, infile): - """Actually parse the config file.""" - temp_list_values = self.list_values - if self.unrepr: - self.list_values = False - - comment_list = [] - done_start = False - this_section = self - maxline = len(infile) - 1 - cur_index = -1 - reset_comment = False - - while cur_index < maxline: - if reset_comment: - comment_list = [] - cur_index += 1 - line = infile[cur_index] - sline = line.strip() - # do we have anything on the line ? - if not sline or sline.startswith('#'): - reset_comment = False - comment_list.append(line) - continue - - if not done_start: - # preserve initial comment - self.initial_comment = comment_list - comment_list = [] - done_start = True - - reset_comment = True - # first we check if it's a section marker - mat = self._sectionmarker.match(line) - if mat is not None: - # is a section line - (indent, sect_open, sect_name, sect_close, comment) = mat.groups() - if indent and (self.indent_type is None): - self.indent_type = indent - cur_depth = sect_open.count('[') - if cur_depth != sect_close.count(']'): - self._handle_error("Cannot compute the section depth at line %s.", - NestingError, infile, cur_index) - continue - - if cur_depth < this_section.depth: - # the new section is dropping back to a previous level - try: - parent = self._match_depth(this_section, - cur_depth).parent - except SyntaxError: - self._handle_error("Cannot compute nesting level at line %s.", - NestingError, infile, cur_index) - continue - elif cur_depth == this_section.depth: - # the new section is a sibling of the current section - parent = this_section.parent - elif cur_depth == this_section.depth + 1: - # the new section is a child the current section - parent = this_section - else: - self._handle_error("Section too nested at line %s.", - NestingError, infile, cur_index) - - sect_name = self._unquote(sect_name) - if sect_name in parent: - self._handle_error('Duplicate section name at line %s.', - DuplicateError, infile, cur_index) - continue - - # create the new section - this_section = Section( - parent, - cur_depth, - self, - name=sect_name) - parent[sect_name] = this_section - parent.inline_comments[sect_name] = comment - parent.comments[sect_name] = comment_list - continue - # - # it's not a section marker, - # so it should be a valid ``key = value`` line - mat = self._keyword.match(line) - if mat is None: - # it neither matched as a keyword - # or a section marker - self._handle_error( - 'Invalid line at line "%s".', - ParseError, infile, cur_index) - else: - # is a keyword value - # value will include any inline comment - (indent, key, value) = mat.groups() - if indent and (self.indent_type is None): - self.indent_type = indent - # check for a multiline value - if value[:3] in ['"""', "'''"]: - try: - value, comment, cur_index = self._multiline( - value, infile, cur_index, maxline) - except SyntaxError: - self._handle_error( - 'Parse error in value at line %s.', - ParseError, infile, cur_index) - continue - else: - if self.unrepr: - comment = '' - try: - value = unrepr(value) - except Exception, e: - if type(e) == UnknownType: - msg = 'Unknown name or type in value at line %s.' - else: - msg = 'Parse error in value at line %s.' - self._handle_error(msg, UnreprError, infile, - cur_index) - continue - else: - if self.unrepr: - comment = '' - try: - value = unrepr(value) - except Exception, e: - if isinstance(e, UnknownType): - msg = 'Unknown name or type in value at line %s.' - else: - msg = 'Parse error in value at line %s.' - self._handle_error(msg, UnreprError, infile, - cur_index) - continue - else: - # extract comment and lists - try: - (value, comment) = self._handle_value(value) - except SyntaxError: - self._handle_error( - 'Parse error in value at line %s.', - ParseError, infile, cur_index) - continue - # - key = self._unquote(key) - if key in this_section: - self._handle_error( - 'Duplicate keyword name at line %s.', - DuplicateError, infile, cur_index) - continue - # add the key. - # we set unrepr because if we have got this far we will never - # be creating a new section - this_section.__setitem__(key, value, unrepr=True) - this_section.inline_comments[key] = comment - this_section.comments[key] = comment_list - continue - # - if self.indent_type is None: - # no indentation used, set the type accordingly - self.indent_type = '' - - # preserve the final comment - if not self and not self.initial_comment: - self.initial_comment = comment_list - elif not reset_comment: - self.final_comment = comment_list - self.list_values = temp_list_values - - - def _match_depth(self, sect, depth): - """ - Given a section and a depth level, walk back through the sections - parents to see if the depth level matches a previous section. - - Return a reference to the right section, - or raise a SyntaxError. - """ - while depth < sect.depth: - if sect is sect.parent: - # we've reached the top level already - raise SyntaxError() - sect = sect.parent - if sect.depth == depth: - return sect - # shouldn't get here - raise SyntaxError() - - - def _handle_error(self, text, ErrorClass, infile, cur_index): - """ - Handle an error according to the error settings. - - Either raise the error or store it. - The error will have occured at ``cur_index`` - """ - line = infile[cur_index] - cur_index += 1 - message = text % cur_index - error = ErrorClass(message, cur_index, line) - if self.raise_errors: - # raise the error - parsing stops here - raise error - # store the error - # reraise when parsing has finished - self._errors.append(error) - - - def _unquote(self, value): - """Return an unquoted version of a value""" - if not value: - # should only happen during parsing of lists - raise SyntaxError - if (value[0] == value[-1]) and (value[0] in ('"', "'")): - value = value[1:-1] - return value - - - def _quote(self, value, multiline=True): - """ - Return a safely quoted version of a value. - - Raise a ConfigObjError if the value cannot be safely quoted. - If multiline is ``True`` (default) then use triple quotes - if necessary. - - * Don't quote values that don't need it. - * Recursively quote members of a list and return a comma joined list. - * Multiline is ``False`` for lists. - * Obey list syntax for empty and single member lists. - - If ``list_values=False`` then the value is only quoted if it contains - a ``\\n`` (is multiline) or '#'. - - If ``write_empty_values`` is set, and the value is an empty string, it - won't be quoted. - """ - if multiline and self.write_empty_values and value == '': - # Only if multiline is set, so that it is used for values not - # keys, and not values that are part of a list - return '' - - if multiline and isinstance(value, (list, tuple)): - if not value: - return ',' - elif len(value) == 1: - return self._quote(value[0], multiline=False) + ',' - return ', '.join([self._quote(val, multiline=False) - for val in value]) - if not isinstance(value, basestring): - if self.stringify: - value = str(value) - else: - raise TypeError('Value "%s" is not a string.' % value) - - if not value: - return '""' - - no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value - need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value )) - hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value) - check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote - - if check_for_single: - if not self.list_values: - # we don't quote if ``list_values=False`` - quot = noquot - # for normal values either single or double quotes will do - elif '\n' in value: - # will only happen if multiline is off - e.g. '\n' in key - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - elif ((value[0] not in wspace_plus) and - (value[-1] not in wspace_plus) and - (',' not in value)): - quot = noquot - else: - quot = self._get_single_quote(value) - else: - # if value has '\n' or "'" *and* '"', it will need triple quotes - quot = self._get_triple_quote(value) - - if quot == noquot and '#' in value and self.list_values: - quot = self._get_single_quote(value) - - return quot % value - - - def _get_single_quote(self, value): - if ("'" in value) and ('"' in value): - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - elif '"' in value: - quot = squot - else: - quot = dquot - return quot - - - def _get_triple_quote(self, value): - if (value.find('"""') != -1) and (value.find("'''") != -1): - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - if value.find('"""') == -1: - quot = tdquot - else: - quot = tsquot - return quot - - - def _handle_value(self, value): - """ - Given a value string, unquote, remove comment, - handle lists. (including empty and single member lists) - """ - if self._inspec: - # Parsing a configspec so don't handle comments - return (value, '') - # do we look for lists in values ? - if not self.list_values: - mat = self._nolistvalue.match(value) - if mat is None: - raise SyntaxError() - # NOTE: we don't unquote here - return mat.groups() - # - mat = self._valueexp.match(value) - if mat is None: - # the value is badly constructed, probably badly quoted, - # or an invalid list - raise SyntaxError() - (list_values, single, empty_list, comment) = mat.groups() - if (list_values == '') and (single is None): - # change this if you want to accept empty values - raise SyntaxError() - # NOTE: note there is no error handling from here if the regex - # is wrong: then incorrect values will slip through - if empty_list is not None: - # the single comma - meaning an empty list - return ([], comment) - if single is not None: - # handle empty values - if list_values and not single: - # FIXME: the '' is a workaround because our regex now matches - # '' at the end of a list if it has a trailing comma - single = None - else: - single = single or '""' - single = self._unquote(single) - if list_values == '': - # not a list value - return (single, comment) - the_list = self._listvalueexp.findall(list_values) - the_list = [self._unquote(val) for val in the_list] - if single is not None: - the_list += [single] - return (the_list, comment) - - - def _multiline(self, value, infile, cur_index, maxline): - """Extract the value, where we are in a multiline situation.""" - quot = value[:3] - newvalue = value[3:] - single_line = self._triple_quote[quot][0] - multi_line = self._triple_quote[quot][1] - mat = single_line.match(value) - if mat is not None: - retval = list(mat.groups()) - retval.append(cur_index) - return retval - elif newvalue.find(quot) != -1: - # somehow the triple quote is missing - raise SyntaxError() - # - while cur_index < maxline: - cur_index += 1 - newvalue += '\n' - line = infile[cur_index] - if line.find(quot) == -1: - newvalue += line - else: - # end of multiline, process it - break - else: - # we've got to the end of the config, oops... - raise SyntaxError() - mat = multi_line.match(line) - if mat is None: - # a badly formed line - raise SyntaxError() - (value, comment) = mat.groups() - return (newvalue + value, comment, cur_index) - - - def _handle_configspec(self, configspec): - """Parse the configspec.""" - # FIXME: Should we check that the configspec was created with the - # correct settings ? (i.e. ``list_values=False``) - if not isinstance(configspec, ConfigObj): - try: - configspec = ConfigObj(configspec, - raise_errors=True, - file_error=True, - _inspec=True) - except ConfigObjError, e: - # FIXME: Should these errors have a reference - # to the already parsed ConfigObj ? - raise ConfigspecError('Parsing configspec failed: %s' % e) - except IOError, e: - raise IOError('Reading configspec failed: %s' % e) - - self.configspec = configspec - - - - def _set_configspec(self, section, copy): - """ - Called by validate. Handles setting the configspec on subsections - including sections to be validated by __many__ - """ - configspec = section.configspec - many = configspec.get('__many__') - if isinstance(many, dict): - for entry in section.sections: - if entry not in configspec: - section[entry].configspec = many - - for entry in configspec.sections: - if entry == '__many__': - continue - if entry not in section: - section[entry] = {} - section[entry]._created = True - if copy: - # copy comments - section.comments[entry] = configspec.comments.get(entry, []) - section.inline_comments[entry] = configspec.inline_comments.get(entry, '') - - # Could be a scalar when we expect a section - if isinstance(section[entry], Section): - section[entry].configspec = configspec[entry] - - - def _write_line(self, indent_string, entry, this_entry, comment): - """Write an individual line, for the write method""" - # NOTE: the calls to self._quote here handles non-StringType values. - if not self.unrepr: - val = self._decode_element(self._quote(this_entry)) - else: - val = repr(this_entry) - return '%s%s%s%s%s' % (indent_string, - self._decode_element(self._quote(entry, multiline=False)), - self._a_to_u(' = '), - val, - self._decode_element(comment)) - - - def _write_marker(self, indent_string, depth, entry, comment): - """Write a section marker line""" - return '%s%s%s%s%s' % (indent_string, - self._a_to_u('[' * depth), - self._quote(self._decode_element(entry), multiline=False), - self._a_to_u(']' * depth), - self._decode_element(comment)) - - - def _handle_comment(self, comment): - """Deal with a comment.""" - if not comment: - return '' - start = self.indent_type - if not comment.startswith('#'): - start += self._a_to_u(' # ') - return (start + comment) - - - # Public methods - - def write(self, outfile=None, section=None): - """ - Write the current ConfigObj as a file - - tekNico: FIXME: use StringIO instead of real files - - >>> filename = a.filename - >>> a.filename = 'test.ini' - >>> a.write() - >>> a.filename = filename - >>> a == ConfigObj('test.ini', raise_errors=True) - 1 - >>> import os - >>> os.remove('test.ini') - """ - if self.indent_type is None: - # this can be true if initialised from a dictionary - self.indent_type = DEFAULT_INDENT_TYPE - - out = [] - cs = self._a_to_u('#') - csp = self._a_to_u('# ') - if section is None: - int_val = self.interpolation - self.interpolation = False - section = self - for line in self.initial_comment: - line = self._decode_element(line) - stripped_line = line.strip() - if stripped_line and not stripped_line.startswith(cs): - line = csp + line - out.append(line) - - indent_string = self.indent_type * section.depth - for entry in (section.scalars + section.sections): - if entry in section.defaults: - # don't write out default values - continue - for comment_line in section.comments[entry]: - comment_line = self._decode_element(comment_line.lstrip()) - if comment_line and not comment_line.startswith(cs): - comment_line = csp + comment_line - out.append(indent_string + comment_line) - this_entry = section[entry] - comment = self._handle_comment(section.inline_comments[entry]) - - if isinstance(this_entry, dict): - # a section - out.append(self._write_marker( - indent_string, - this_entry.depth, - entry, - comment)) - out.extend(self.write(section=this_entry)) - else: - out.append(self._write_line( - indent_string, - entry, - this_entry, - comment)) - - if section is self: - for line in self.final_comment: - line = self._decode_element(line) - stripped_line = line.strip() - if stripped_line and not stripped_line.startswith(cs): - line = csp + line - out.append(line) - self.interpolation = int_val - - if section is not self: - return out - - if (self.filename is None) and (outfile is None): - # output a list of lines - # might need to encode - # NOTE: This will *screw* UTF16, each line will start with the BOM - if self.encoding: - out = [l.encode(self.encoding) for l in out] - if (self.BOM and ((self.encoding is None) or - (BOM_LIST.get(self.encoding.lower()) == 'utf_8'))): - # Add the UTF8 BOM - if not out: - out.append('') - out[0] = BOM_UTF8 + out[0] - return out - - # Turn the list to a string, joined with correct newlines - newline = self.newlines or os.linesep - if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w' - and sys.platform == 'win32' and newline == '\r\n'): - # Windows specific hack to avoid writing '\r\r\n' - newline = '\n' - output = self._a_to_u(newline).join(out) - if self.encoding: - output = output.encode(self.encoding) - if self.BOM and ((self.encoding is None) or match_utf8(self.encoding)): - # Add the UTF8 BOM - output = BOM_UTF8 + output - - if not output.endswith(newline): - output += newline - if outfile is not None: - outfile.write(output) - else: - h = open(self.filename, 'wb') - h.write(output) - h.close() - - - def validate(self, validator, preserve_errors=False, copy=False, - section=None): - """ - Test the ConfigObj against a configspec. - - It uses the ``validator`` object from *validate.py*. - - To run ``validate`` on the current ConfigObj, call: :: - - test = config.validate(validator) - - (Normally having previously passed in the configspec when the ConfigObj - was created - you can dynamically assign a dictionary of checks to the - ``configspec`` attribute of a section though). - - It returns ``True`` if everything passes, or a dictionary of - pass/fails (True/False). If every member of a subsection passes, it - will just have the value ``True``. (It also returns ``False`` if all - members fail). - - In addition, it converts the values from strings to their native - types if their checks pass (and ``stringify`` is set). - - If ``preserve_errors`` is ``True`` (``False`` is default) then instead - of a marking a fail with a ``False``, it will preserve the actual - exception object. This can contain info about the reason for failure. - For example the ``VdtValueTooSmallError`` indicates that the value - supplied was too small. If a value (or section) is missing it will - still be marked as ``False``. - - You must have the validate module to use ``preserve_errors=True``. - - You can then use the ``flatten_errors`` function to turn your nested - results dictionary into a flattened list of failures - useful for - displaying meaningful error messages. - """ - if section is None: - if self.configspec is None: - raise ValueError('No configspec supplied.') - if preserve_errors: - # We do this once to remove a top level dependency on the validate module - # Which makes importing configobj faster - from validate import VdtMissingValue - self._vdtMissingValue = VdtMissingValue - - section = self - - if copy: - section.initial_comment = section.configspec.initial_comment - section.final_comment = section.configspec.final_comment - section.encoding = section.configspec.encoding - section.BOM = section.configspec.BOM - section.newlines = section.configspec.newlines - section.indent_type = section.configspec.indent_type - - # - # section.default_values.clear() #?? - configspec = section.configspec - self._set_configspec(section, copy) - - - def validate_entry(entry, spec, val, missing, ret_true, ret_false): - section.default_values.pop(entry, None) - - try: - section.default_values[entry] = validator.get_default_value(configspec[entry]) - except (KeyError, AttributeError, validator.baseErrorClass): - # No default, bad default or validator has no 'get_default_value' - # (e.g. SimpleVal) - pass - - try: - check = validator.check(spec, - val, - missing=missing - ) - except validator.baseErrorClass, e: - if not preserve_errors or isinstance(e, self._vdtMissingValue): - out[entry] = False - else: - # preserve the error - out[entry] = e - ret_false = False - ret_true = False - else: - ret_false = False - out[entry] = True - if self.stringify or missing: - # if we are doing type conversion - # or the value is a supplied default - if not self.stringify: - if isinstance(check, (list, tuple)): - # preserve lists - check = [self._str(item) for item in check] - elif missing and check is None: - # convert the None from a default to a '' - check = '' - else: - check = self._str(check) - if (check != val) or missing: - section[entry] = check - if not copy and missing and entry not in section.defaults: - section.defaults.append(entry) - return ret_true, ret_false - - # - out = {} - ret_true = True - ret_false = True - - unvalidated = [k for k in section.scalars if k not in configspec] - incorrect_sections = [k for k in configspec.sections if k in section.scalars] - incorrect_scalars = [k for k in configspec.scalars if k in section.sections] - - for entry in configspec.scalars: - if entry in ('__many__', '___many___'): - # reserved names - continue - if (not entry in section.scalars) or (entry in section.defaults): - # missing entries - # or entries from defaults - missing = True - val = None - if copy and entry not in section.scalars: - # copy comments - section.comments[entry] = ( - configspec.comments.get(entry, [])) - section.inline_comments[entry] = ( - configspec.inline_comments.get(entry, '')) - # - else: - missing = False - val = section[entry] - - ret_true, ret_false = validate_entry(entry, configspec[entry], val, - missing, ret_true, ret_false) - - many = None - if '__many__' in configspec.scalars: - many = configspec['__many__'] - elif '___many___' in configspec.scalars: - many = configspec['___many___'] - - if many is not None: - for entry in unvalidated: - val = section[entry] - ret_true, ret_false = validate_entry(entry, many, val, False, - ret_true, ret_false) - unvalidated = [] - - for entry in incorrect_scalars: - ret_true = False - if not preserve_errors: - out[entry] = False - else: - ret_false = False - msg = 'Value %r was provided as a section' % entry - out[entry] = validator.baseErrorClass(msg) - for entry in incorrect_sections: - ret_true = False - if not preserve_errors: - out[entry] = False - else: - ret_false = False - msg = 'Section %r was provided as a single value' % entry - out[entry] = validator.baseErrorClass(msg) - - # Missing sections will have been created as empty ones when the - # configspec was read. - for entry in section.sections: - # FIXME: this means DEFAULT is not copied in copy mode - if section is self and entry == 'DEFAULT': - continue - if section[entry].configspec is None: - unvalidated.append(entry) - continue - if copy: - section.comments[entry] = configspec.comments.get(entry, []) - section.inline_comments[entry] = configspec.inline_comments.get(entry, '') - check = self.validate(validator, preserve_errors=preserve_errors, copy=copy, section=section[entry]) - out[entry] = check - if check == False: - ret_true = False - elif check == True: - ret_false = False - else: - ret_true = False - - section.extra_values = unvalidated - if preserve_errors and not section._created: - # If the section wasn't created (i.e. it wasn't missing) - # then we can't return False, we need to preserve errors - ret_false = False - # - if ret_false and preserve_errors and out: - # If we are preserving errors, but all - # the failures are from missing sections / values - # then we can return False. Otherwise there is a - # real failure that we need to preserve. - ret_false = not any(out.values()) - if ret_true: - return True - elif ret_false: - return False - return out - - - def reset(self): - """Clear ConfigObj instance and restore to 'freshly created' state.""" - self.clear() - self._initialise() - # FIXME: Should be done by '_initialise', but ConfigObj constructor (and reload) - # requires an empty dictionary - self.configspec = None - # Just to be sure ;-) - self._original_configspec = None - - - def reload(self): - """ - Reload a ConfigObj from file. - - This method raises a ``ReloadError`` if the ConfigObj doesn't have - a filename attribute pointing to a file. - """ - if not isinstance(self.filename, basestring): - raise ReloadError() - - filename = self.filename - current_options = {} - for entry in OPTION_DEFAULTS: - if entry == 'configspec': - continue - current_options[entry] = getattr(self, entry) - - configspec = self._original_configspec - current_options['configspec'] = configspec - - self.clear() - self._initialise(current_options) - self._load(filename, configspec) - - - -class SimpleVal(object): - """ - A simple validator. - Can be used to check that all members expected are present. - - To use it, provide a configspec with all your members in (the value given - will be ignored). Pass an instance of ``SimpleVal`` to the ``validate`` - method of your ``ConfigObj``. ``validate`` will return ``True`` if all - members are present, or a dictionary with True/False meaning - present/missing. (Whole missing sections will be replaced with ``False``) - """ - - def __init__(self): - self.baseErrorClass = ConfigObjError - - def check(self, check, member, missing=False): - """A dummy check method, always returns the value unchanged.""" - if missing: - raise self.baseErrorClass() - return member - - -def flatten_errors(cfg, res, levels=None, results=None): - """ - An example function that will turn a nested dictionary of results - (as returned by ``ConfigObj.validate``) into a flat list. - - ``cfg`` is the ConfigObj instance being checked, ``res`` is the results - dictionary returned by ``validate``. - - (This is a recursive function, so you shouldn't use the ``levels`` or - ``results`` arguments - they are used by the function.) - - Returns a list of keys that failed. Each member of the list is a tuple:: - - ([list of sections...], key, result) - - If ``validate`` was called with ``preserve_errors=False`` (the default) - then ``result`` will always be ``False``. - - *list of sections* is a flattened list of sections that the key was found - in. - - If the section was missing (or a section was expected and a scalar provided - - or vice-versa) then key will be ``None``. - - If the value (or section) was missing then ``result`` will be ``False``. - - If ``validate`` was called with ``preserve_errors=True`` and a value - was present, but failed the check, then ``result`` will be the exception - object returned. You can use this as a string that describes the failure. - - For example *The value "3" is of the wrong type*. - """ - if levels is None: - # first time called - levels = [] - results = [] - if res == True: - return results - if res == False or isinstance(res, Exception): - results.append((levels[:], None, res)) - if levels: - levels.pop() - return results - for (key, val) in res.items(): - if val == True: - continue - if isinstance(cfg.get(key), dict): - # Go down one level - levels.append(key) - flatten_errors(cfg[key], val, levels, results) - continue - results.append((levels[:], key, val)) - # - # Go up one level - if levels: - levels.pop() - # - return results - - -def get_extra_values(conf, _prepend=()): - """ - Find all the values and sections not in the configspec from a validated - ConfigObj. - - ``get_extra_values`` returns a list of tuples where each tuple represents - either an extra section, or an extra value. - - The tuples contain two values, a tuple representing the section the value - is in and the name of the extra values. For extra values in the top level - section the first member will be an empty tuple. For values in the 'foo' - section the first member will be ``('foo',)``. For members in the 'bar' - subsection of the 'foo' section the first member will be ``('foo', 'bar')``. - - NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't - been validated it will return an empty list. - """ - out = [] - - out.extend([(_prepend, name) for name in conf.extra_values]) - for name in conf.sections: - if name not in conf.extra_values: - out.extend(get_extra_values(conf[name], _prepend + (name,))) - return out - - -"""*A programming language is a medium of expression.* - Paul Graham""" diff --git a/data/css/style.css b/data/css/style.css index 3a6ee99b..54c35afc 100644 --- a/data/css/style.css +++ b/data/css/style.css @@ -89,6 +89,9 @@ h1{ .bigtext{ font-size: 22px; } +.updatebar{ + text-align: center; + } a:link { color: #5E2612; text-decoration: none; @@ -121,4 +124,17 @@ a.green { a.externalred { color: red; font-size:12px; - } \ No newline at end of file + } +div.progress-container { + border: 1px solid #ccc; + width: 100px; + margin: 2px 5px 2px 0; + padding: 1px; + float: left; + background: white; +} + +div.progress-container > div { + background-color: #ACE97C; + height: 12px +} \ No newline at end of file diff --git a/feedparser.py b/feedparser.py deleted file mode 100644 index b9144a9e..00000000 --- a/feedparser.py +++ /dev/null @@ -1,3909 +0,0 @@ -#!/usr/bin/env python -"""Universal feed parser - -Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds - -Visit http://feedparser.org/ for the latest version -Visit http://feedparser.org/docs/ for the latest documentation - -Required: Python 2.4 or later -Recommended: CJKCodecs and iconv_codec -""" - -__version__ = "5.0.1" -__license__ = """Copyright (c) 2002-2008, Mark Pilgrim, All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS' -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE.""" -__author__ = "Mark Pilgrim " -__contributors__ = ["Jason Diamond ", - "John Beimler ", - "Fazal Majid ", - "Aaron Swartz ", - "Kevin Marks ", - "Sam Ruby ", - "Ade Oshineye ", - "Martin Pool ", - "Kurt McKee "] -_debug = 0 - -# HTTP "User-Agent" header to send to servers when downloading feeds. -# If you are embedding feedparser in a larger application, you should -# change this to your application name and URL. -USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % __version__ - -# HTTP "Accept" header to send to servers when downloading feeds. If you don't -# want to send an Accept header, set this to None. -ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1" - -# List of preferred XML parsers, by SAX driver name. These will be tried first, -# but if they're not installed, Python will keep searching through its own list -# of pre-installed parsers until it finds one that supports everything we need. -PREFERRED_XML_PARSERS = ["drv_libxml2"] - -# If you want feedparser to automatically run HTML markup through HTML Tidy, set -# this to 1. Requires mxTidy -# or utidylib . -TIDY_MARKUP = 0 - -# List of Python interfaces for HTML Tidy, in order of preference. Only useful -# if TIDY_MARKUP = 1 -PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] - -# If you want feedparser to automatically resolve all relative URIs, set this -# to 1. -RESOLVE_RELATIVE_URIS = 1 - -# If you want feedparser to automatically sanitize all potentially unsafe -# HTML content, set this to 1. -SANITIZE_HTML = 1 - -# ---------- Python 3 modules (make it work if possible) ---------- -try: - import rfc822 -except ImportError: - from email import _parseaddr as rfc822 - -try: - # Python 3.1 introduces bytes.maketrans and simultaneously - # deprecates string.maketrans; use bytes.maketrans if possible - _maketrans = bytes.maketrans -except (NameError, AttributeError): - import string - _maketrans = string.maketrans - -# base64 support for Atom feeds that contain embedded binary data -try: - import base64, binascii - # Python 3.1 deprecates decodestring in favor of decodebytes - _base64decode = getattr(base64, 'decodebytes', base64.decodestring) -except: - base64 = binascii = None - -def _s2bytes(s): - # Convert a UTF-8 str to bytes if the interpreter is Python 3 - try: - return bytes(s, 'utf8') - except (NameError, TypeError): - # In Python 2.5 and below, bytes doesn't exist (NameError) - # In Python 2.6 and above, bytes and str are the same (TypeError) - return s - -def _l2bytes(l): - # Convert a list of ints to bytes if the interpreter is Python 3 - try: - if bytes is not str: - # In Python 2.6 and above, this call won't raise an exception - # but it will return bytes([65]) as '[65]' instead of 'A' - return bytes(l) - raise NameError - except NameError: - return ''.join(map(chr, l)) - -# If you want feedparser to allow all URL schemes, set this to () -# List culled from Python's urlparse documentation at: -# http://docs.python.org/library/urlparse.html -# as well as from "URI scheme" at Wikipedia: -# https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme -# Many more will likely need to be added! -ACCEPTABLE_URI_SCHEMES = ( - 'file', 'ftp', 'gopher', 'h323', 'hdl', 'http', 'https', 'imap', 'mailto', - 'mms', 'news', 'nntp', 'prospero', 'rsync', 'rtsp', 'rtspu', 'sftp', - 'shttp', 'sip', 'sips', 'snews', 'svn', 'svn+ssh', 'telnet', 'wais', - # Additional common-but-unofficial schemes - 'aim', 'callto', 'cvs', 'facetime', 'feed', 'git', 'gtalk', 'irc', 'ircs', - 'irc6', 'itms', 'mms', 'msnim', 'skype', 'ssh', 'smb', 'svn', 'ymsg', -) -#ACCEPTABLE_URI_SCHEMES = () - -# ---------- required modules (should come with any Python distribution) ---------- -import sgmllib, re, sys, copy, urlparse, time, types, cgi, urllib, urllib2, datetime -try: - from io import BytesIO as _StringIO -except ImportError: - try: - from cStringIO import StringIO as _StringIO - except: - from StringIO import StringIO as _StringIO - -# ---------- optional modules (feedparser will work without these, but with reduced functionality) ---------- - -# gzip is included with most Python distributions, but may not be available if you compiled your own -try: - import gzip -except: - gzip = None -try: - import zlib -except: - zlib = None - -# If a real XML parser is available, feedparser will attempt to use it. feedparser has -# been tested with the built-in SAX parser, PyXML, and libxml2. On platforms where the -# Python distribution does not come with an XML parser (such as Mac OS X 10.2 and some -# versions of FreeBSD), feedparser will quietly fall back on regex-based parsing. -try: - import xml.sax - xml.sax.make_parser(PREFERRED_XML_PARSERS) # test for valid parsers - from xml.sax.saxutils import escape as _xmlescape - _XML_AVAILABLE = 1 -except: - _XML_AVAILABLE = 0 - def _xmlescape(data,entities={}): - data = data.replace('&', '&') - data = data.replace('>', '>') - data = data.replace('<', '<') - for char, entity in entities: - data = data.replace(char, entity) - return data - -# cjkcodecs and iconv_codec provide support for more character encodings. -# Both are available from http://cjkpython.i18n.org/ -try: - import cjkcodecs.aliases -except: - pass -try: - import iconv_codec -except: - pass - -# chardet library auto-detects character encodings -# Download from http://chardet.feedparser.org/ -try: - import chardet - if _debug: - import chardet.constants - chardet.constants._debug = 1 -except: - chardet = None - -# reversable htmlentitydefs mappings for Python 2.2 -try: - from htmlentitydefs import name2codepoint, codepoint2name -except: - import htmlentitydefs - name2codepoint={} - codepoint2name={} - for (name,codepoint) in htmlentitydefs.entitydefs.iteritems(): - if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1])) - name2codepoint[name]=ord(codepoint) - codepoint2name[ord(codepoint)]=name - -# BeautifulSoup parser used for parsing microformats from embedded HTML content -# http://www.crummy.com/software/BeautifulSoup/ -# feedparser is tested with BeautifulSoup 3.0.x, but it might work with the -# older 2.x series. If it doesn't, and you can figure out why, I'll accept a -# patch and modify the compatibility statement accordingly. -try: - import BeautifulSoup -except: - BeautifulSoup = None - -# ---------- don't touch these ---------- -class ThingsNobodyCaresAboutButMe(Exception): pass -class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe): pass -class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe): pass -class NonXMLContentType(ThingsNobodyCaresAboutButMe): pass -class UndeclaredNamespace(Exception): pass - -sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') -sgmllib.special = re.compile(']|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''') - def search(self,string,index=0): - match = self.endbracket.match(string,index) - if match is not None: - # Returning a new object in the calling thread's context - # resolves a thread-safety. - return EndBracketMatch(match) - return None - class EndBracketMatch: - def __init__(self, match): - self.match = match - def start(self, n): - return self.match.end(n) - sgmllib.endbracket = EndBracketRegEx() - -SUPPORTED_VERSIONS = {'': 'unknown', - 'rss090': 'RSS 0.90', - 'rss091n': 'RSS 0.91 (Netscape)', - 'rss091u': 'RSS 0.91 (Userland)', - 'rss092': 'RSS 0.92', - 'rss093': 'RSS 0.93', - 'rss094': 'RSS 0.94', - 'rss20': 'RSS 2.0', - 'rss10': 'RSS 1.0', - 'rss': 'RSS (unknown version)', - 'atom01': 'Atom 0.1', - 'atom02': 'Atom 0.2', - 'atom03': 'Atom 0.3', - 'atom10': 'Atom 1.0', - 'atom': 'Atom (unknown version)', - 'cdf': 'CDF', - 'hotrss': 'Hot RSS' - } - -try: - UserDict = dict -except NameError: - # Python 2.1 does not have dict - from UserDict import UserDict - def dict(aList): - rc = {} - for k, v in aList: - rc[k] = v - return rc - -class FeedParserDict(UserDict): - keymap = {'channel': 'feed', - 'items': 'entries', - 'guid': 'id', - 'date': 'updated', - 'date_parsed': 'updated_parsed', - 'description': ['summary', 'subtitle'], - 'url': ['href'], - 'modified': 'updated', - 'modified_parsed': 'updated_parsed', - 'issued': 'published', - 'issued_parsed': 'published_parsed', - 'copyright': 'rights', - 'copyright_detail': 'rights_detail', - 'tagline': 'subtitle', - 'tagline_detail': 'subtitle_detail'} - def __getitem__(self, key): - if key == 'category': - return UserDict.__getitem__(self, 'tags')[0]['term'] - if key == 'enclosures': - norel = lambda link: FeedParserDict([(name,value) for (name,value) in link.items() if name!='rel']) - return [norel(link) for link in UserDict.__getitem__(self, 'links') if link['rel']=='enclosure'] - if key == 'license': - for link in UserDict.__getitem__(self, 'links'): - if link['rel']=='license' and link.has_key('href'): - return link['href'] - if key == 'categories': - return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')] - realkey = self.keymap.get(key, key) - if type(realkey) == types.ListType: - for k in realkey: - if UserDict.__contains__(self, k): - return UserDict.__getitem__(self, k) - if UserDict.__contains__(self, key): - return UserDict.__getitem__(self, key) - return UserDict.__getitem__(self, realkey) - - def __setitem__(self, key, value): - for k in self.keymap.keys(): - if key == k: - key = self.keymap[k] - if type(key) == types.ListType: - key = key[0] - return UserDict.__setitem__(self, key, value) - - def get(self, key, default=None): - if self.has_key(key): - return self[key] - else: - return default - - def setdefault(self, key, value): - if not self.has_key(key): - self[key] = value - return self[key] - - def has_key(self, key): - try: - return hasattr(self, key) or UserDict.__contains__(self, key) - except AttributeError: - return False - # This alias prevents the 2to3 tool from changing the semantics of the - # __contains__ function below and exhausting the maximum recursion depth - __has_key = has_key - - def __getattr__(self, key): - try: - return self.__dict__[key] - except KeyError: - pass - try: - assert not key.startswith('_') - return self.__getitem__(key) - except: - raise AttributeError, "object has no attribute '%s'" % key - - def __setattr__(self, key, value): - if key.startswith('_') or key == 'data': - self.__dict__[key] = value - else: - return self.__setitem__(key, value) - - def __contains__(self, key): - return self.__has_key(key) - -def zopeCompatibilityHack(): - global FeedParserDict - del FeedParserDict - def FeedParserDict(aDict=None): - rc = {} - if aDict: - rc.update(aDict) - return rc - -_ebcdic_to_ascii_map = None -def _ebcdic_to_ascii(s): - global _ebcdic_to_ascii_map - if not _ebcdic_to_ascii_map: - emap = ( - 0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, - 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, - 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, - 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, - 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, - 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, - 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, - 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, - 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,201, - 202,106,107,108,109,110,111,112,113,114,203,204,205,206,207,208, - 209,126,115,116,117,118,119,120,121,122,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231, - 123,65,66,67,68,69,70,71,72,73,232,233,234,235,236,237, - 125,74,75,76,77,78,79,80,81,82,238,239,240,241,242,243, - 92,159,83,84,85,86,87,88,89,90,244,245,246,247,248,249, - 48,49,50,51,52,53,54,55,56,57,250,251,252,253,254,255 - ) - _ebcdic_to_ascii_map = _maketrans( \ - _l2bytes(range(256)), _l2bytes(emap)) - return s.translate(_ebcdic_to_ascii_map) - -_cp1252 = { - unichr(128): unichr(8364), # euro sign - unichr(130): unichr(8218), # single low-9 quotation mark - unichr(131): unichr( 402), # latin small letter f with hook - unichr(132): unichr(8222), # double low-9 quotation mark - unichr(133): unichr(8230), # horizontal ellipsis - unichr(134): unichr(8224), # dagger - unichr(135): unichr(8225), # double dagger - unichr(136): unichr( 710), # modifier letter circumflex accent - unichr(137): unichr(8240), # per mille sign - unichr(138): unichr( 352), # latin capital letter s with caron - unichr(139): unichr(8249), # single left-pointing angle quotation mark - unichr(140): unichr( 338), # latin capital ligature oe - unichr(142): unichr( 381), # latin capital letter z with caron - unichr(145): unichr(8216), # left single quotation mark - unichr(146): unichr(8217), # right single quotation mark - unichr(147): unichr(8220), # left double quotation mark - unichr(148): unichr(8221), # right double quotation mark - unichr(149): unichr(8226), # bullet - unichr(150): unichr(8211), # en dash - unichr(151): unichr(8212), # em dash - unichr(152): unichr( 732), # small tilde - unichr(153): unichr(8482), # trade mark sign - unichr(154): unichr( 353), # latin small letter s with caron - unichr(155): unichr(8250), # single right-pointing angle quotation mark - unichr(156): unichr( 339), # latin small ligature oe - unichr(158): unichr( 382), # latin small letter z with caron - unichr(159): unichr( 376)} # latin capital letter y with diaeresis - -_urifixer = re.compile('^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)') -def _urljoin(base, uri): - uri = _urifixer.sub(r'\1\3', uri) - try: - return urlparse.urljoin(base, uri) - except: - uri = urlparse.urlunparse([urllib.quote(part) for part in urlparse.urlparse(uri)]) - return urlparse.urljoin(base, uri) - -class _FeedParserMixin: - namespaces = {'': '', - 'http://backend.userland.com/rss': '', - 'http://blogs.law.harvard.edu/tech/rss': '', - 'http://purl.org/rss/1.0/': '', - 'http://my.netscape.com/rdf/simple/0.9/': '', - 'http://example.com/newformat#': '', - 'http://example.com/necho': '', - 'http://purl.org/echo/': '', - 'uri/of/echo/namespace#': '', - 'http://purl.org/pie/': '', - 'http://purl.org/atom/ns#': '', - 'http://www.w3.org/2005/Atom': '', - 'http://purl.org/rss/1.0/modules/rss091#': '', - - 'http://webns.net/mvcb/': 'admin', - 'http://purl.org/rss/1.0/modules/aggregation/': 'ag', - 'http://purl.org/rss/1.0/modules/annotate/': 'annotate', - 'http://media.tangent.org/rss/1.0/': 'audio', - 'http://backend.userland.com/blogChannelModule': 'blogChannel', - 'http://web.resource.org/cc/': 'cc', - 'http://backend.userland.com/creativeCommonsRssModule': 'creativeCommons', - 'http://purl.org/rss/1.0/modules/company': 'co', - 'http://purl.org/rss/1.0/modules/content/': 'content', - 'http://my.theinfo.org/changed/1.0/rss/': 'cp', - 'http://purl.org/dc/elements/1.1/': 'dc', - 'http://purl.org/dc/terms/': 'dcterms', - 'http://purl.org/rss/1.0/modules/email/': 'email', - 'http://purl.org/rss/1.0/modules/event/': 'ev', - 'http://rssnamespace.org/feedburner/ext/1.0': 'feedburner', - 'http://freshmeat.net/rss/fm/': 'fm', - 'http://xmlns.com/foaf/0.1/': 'foaf', - 'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo', - 'http://postneo.com/icbm/': 'icbm', - 'http://purl.org/rss/1.0/modules/image/': 'image', - 'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes', - 'http://example.com/DTDs/PodCast-1.0.dtd': 'itunes', - 'http://purl.org/rss/1.0/modules/link/': 'l', - 'http://search.yahoo.com/mrss': 'media', - #Version 1.1.2 of the Media RSS spec added the trailing slash on the namespace - 'http://search.yahoo.com/mrss/': 'media', - 'http://madskills.com/public/xml/rss/module/pingback/': 'pingback', - 'http://prismstandard.org/namespaces/1.2/basic/': 'prism', - 'http://www.w3.org/1999/02/22-rdf-syntax-ns#': 'rdf', - 'http://www.w3.org/2000/01/rdf-schema#': 'rdfs', - 'http://purl.org/rss/1.0/modules/reference/': 'ref', - 'http://purl.org/rss/1.0/modules/richequiv/': 'reqv', - 'http://purl.org/rss/1.0/modules/search/': 'search', - 'http://purl.org/rss/1.0/modules/slash/': 'slash', - 'http://schemas.xmlsoap.org/soap/envelope/': 'soap', - 'http://purl.org/rss/1.0/modules/servicestatus/': 'ss', - 'http://hacks.benhammersley.com/rss/streaming/': 'str', - 'http://purl.org/rss/1.0/modules/subscription/': 'sub', - 'http://purl.org/rss/1.0/modules/syndication/': 'sy', - 'http://schemas.pocketsoap.com/rss/myDescModule/': 'szf', - 'http://purl.org/rss/1.0/modules/taxonomy/': 'taxo', - 'http://purl.org/rss/1.0/modules/threading/': 'thr', - 'http://purl.org/rss/1.0/modules/textinput/': 'ti', - 'http://madskills.com/public/xml/rss/module/trackback/':'trackback', - 'http://wellformedweb.org/commentAPI/': 'wfw', - 'http://purl.org/rss/1.0/modules/wiki/': 'wiki', - 'http://www.w3.org/1999/xhtml': 'xhtml', - 'http://www.w3.org/1999/xlink': 'xlink', - 'http://www.w3.org/XML/1998/namespace': 'xml' -} - _matchnamespaces = {} - - can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'icon', 'logo'] - can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description'] - can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description'] - html_types = ['text/html', 'application/xhtml+xml'] - - def __init__(self, baseuri=None, baselang=None, encoding='utf-8'): - if _debug: sys.stderr.write('initializing FeedParser\n') - if not self._matchnamespaces: - for k, v in self.namespaces.items(): - self._matchnamespaces[k.lower()] = v - self.feeddata = FeedParserDict() # feed-level data - self.encoding = encoding # character encoding - self.entries = [] # list of entry-level data - self.version = '' # feed type/version, see SUPPORTED_VERSIONS - self.namespacesInUse = {} # dictionary of namespaces defined by the feed - - # the following are used internally to track state; - # this is really out of control and should be refactored - self.infeed = 0 - self.inentry = 0 - self.incontent = 0 - self.intextinput = 0 - self.inimage = 0 - self.inauthor = 0 - self.incontributor = 0 - self.inpublisher = 0 - self.insource = 0 - self.sourcedata = FeedParserDict() - self.contentparams = FeedParserDict() - self._summaryKey = None - self.namespacemap = {} - self.elementstack = [] - self.basestack = [] - self.langstack = [] - self.baseuri = baseuri or '' - self.lang = baselang or None - self.svgOK = 0 - self.hasTitle = 0 - if baselang: - self.feeddata['language'] = baselang.replace('_','-') - - def unknown_starttag(self, tag, attrs): - if _debug: sys.stderr.write('start %s with %s\n' % (tag, attrs)) - # normalize attrs - attrs = [(k.lower(), v) for k, v in attrs] - attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] - # the sgml parser doesn't handle entities in attributes, but - # strict xml parsers do -- account for this difference - if isinstance(self, _LooseFeedParser): - attrs = [(k, v.replace('&', '&')) for k, v in attrs] - - # track xml:base and xml:lang - attrsD = dict(attrs) - baseuri = attrsD.get('xml:base', attrsD.get('base')) or self.baseuri - if type(baseuri) != type(u''): - try: - baseuri = unicode(baseuri, self.encoding) - except: - baseuri = unicode(baseuri, 'iso-8859-1') - # ensure that self.baseuri is always an absolute URI that - # uses a whitelisted URI scheme (e.g. not `javscript:`) - if self.baseuri: - self.baseuri = _makeSafeAbsoluteURI(self.baseuri, baseuri) or self.baseuri - else: - self.baseuri = _urljoin(self.baseuri, baseuri) - lang = attrsD.get('xml:lang', attrsD.get('lang')) - if lang == '': - # xml:lang could be explicitly set to '', we need to capture that - lang = None - elif lang is None: - # if no xml:lang is specified, use parent lang - lang = self.lang - if lang: - if tag in ('feed', 'rss', 'rdf:RDF'): - self.feeddata['language'] = lang.replace('_','-') - self.lang = lang - self.basestack.append(self.baseuri) - self.langstack.append(lang) - - # track namespaces - for prefix, uri in attrs: - if prefix.startswith('xmlns:'): - self.trackNamespace(prefix[6:], uri) - elif prefix == 'xmlns': - self.trackNamespace(None, uri) - - # track inline content - if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): - if tag in ['xhtml:div', 'div']: return # typepad does this 10/2007 - # element declared itself as escaped markup, but it isn't really - self.contentparams['type'] = 'application/xhtml+xml' - if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': - if tag.find(':') <> -1: - prefix, tag = tag.split(':', 1) - namespace = self.namespacesInUse.get(prefix, '') - if tag=='math' and namespace=='http://www.w3.org/1998/Math/MathML': - attrs.append(('xmlns',namespace)) - if tag=='svg' and namespace=='http://www.w3.org/2000/svg': - attrs.append(('xmlns',namespace)) - if tag == 'svg': self.svgOK += 1 - return self.handle_data('<%s%s>' % (tag, self.strattrs(attrs)), escape=0) - - # match namespaces - if tag.find(':') <> -1: - prefix, suffix = tag.split(':', 1) - else: - prefix, suffix = '', tag - prefix = self.namespacemap.get(prefix, prefix) - if prefix: - prefix = prefix + '_' - - # special hack for better tracking of empty textinput/image elements in illformed feeds - if (not prefix) and tag not in ('title', 'link', 'description', 'name'): - self.intextinput = 0 - if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'): - self.inimage = 0 - - # call special handler (if defined) or default handler - methodname = '_start_' + prefix + suffix - try: - method = getattr(self, methodname) - return method(attrsD) - except AttributeError: - # Since there's no handler or something has gone wrong we explicitly add the element and its attributes - unknown_tag = prefix + suffix - if len(attrsD) == 0: - # No attributes so merge it into the encosing dictionary - return self.push(unknown_tag, 1) - else: - # Has attributes so create it in its own dictionary - context = self._getContext() - context[unknown_tag] = attrsD - - def unknown_endtag(self, tag): - if _debug: sys.stderr.write('end %s\n' % tag) - # match namespaces - if tag.find(':') <> -1: - prefix, suffix = tag.split(':', 1) - else: - prefix, suffix = '', tag - prefix = self.namespacemap.get(prefix, prefix) - if prefix: - prefix = prefix + '_' - if suffix == 'svg' and self.svgOK: self.svgOK -= 1 - - # call special handler (if defined) or default handler - methodname = '_end_' + prefix + suffix - try: - if self.svgOK: raise AttributeError() - method = getattr(self, methodname) - method() - except AttributeError: - self.pop(prefix + suffix) - - # track inline content - if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): - # element declared itself as escaped markup, but it isn't really - if tag in ['xhtml:div', 'div']: return # typepad does this 10/2007 - self.contentparams['type'] = 'application/xhtml+xml' - if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': - tag = tag.split(':')[-1] - self.handle_data('' % tag, escape=0) - - # track xml:base and xml:lang going out of scope - if self.basestack: - self.basestack.pop() - if self.basestack and self.basestack[-1]: - self.baseuri = self.basestack[-1] - if self.langstack: - self.langstack.pop() - if self.langstack: # and (self.langstack[-1] is not None): - self.lang = self.langstack[-1] - - def handle_charref(self, ref): - # called for each character reference, e.g. for ' ', ref will be '160' - if not self.elementstack: return - ref = ref.lower() - if ref in ('34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e'): - text = '&#%s;' % ref - else: - if ref[0] == 'x': - c = int(ref[1:], 16) - else: - c = int(ref) - text = unichr(c).encode('utf-8') - self.elementstack[-1][2].append(text) - - def handle_entityref(self, ref): - # called for each entity reference, e.g. for '©', ref will be 'copy' - if not self.elementstack: return - if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref) - if ref in ('lt', 'gt', 'quot', 'amp', 'apos'): - text = '&%s;' % ref - elif ref in self.entities.keys(): - text = self.entities[ref] - if text.startswith('&#') and text.endswith(';'): - return self.handle_entityref(text) - else: - try: name2codepoint[ref] - except KeyError: text = '&%s;' % ref - else: text = unichr(name2codepoint[ref]).encode('utf-8') - self.elementstack[-1][2].append(text) - - def handle_data(self, text, escape=1): - # called for each block of plain text, i.e. outside of any tag and - # not containing any character or entity references - if not self.elementstack: return - if escape and self.contentparams.get('type') == 'application/xhtml+xml': - text = _xmlescape(text) - self.elementstack[-1][2].append(text) - - def handle_comment(self, text): - # called for each comment, e.g. - pass - - def handle_pi(self, text): - # called for each processing instruction, e.g. - pass - - def handle_decl(self, text): - pass - - def parse_declaration(self, i): - # override internal declaration handler to handle CDATA blocks - if _debug: sys.stderr.write('entering parse_declaration\n') - if self.rawdata[i:i+9] == '', i) - if k == -1: - # CDATA block began but didn't finish - k = len(self.rawdata) - return k - self.handle_data(_xmlescape(self.rawdata[i+9:k]), 0) - return k+3 - else: - k = self.rawdata.find('>', i) - if k >= 0: - return k+1 - else: - # We have an incomplete CDATA block. - return k - - def mapContentType(self, contentType): - contentType = contentType.lower() - if contentType == 'text' or contentType == 'plain': - contentType = 'text/plain' - elif contentType == 'html': - contentType = 'text/html' - elif contentType == 'xhtml': - contentType = 'application/xhtml+xml' - return contentType - - def trackNamespace(self, prefix, uri): - loweruri = uri.lower() - if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version: - self.version = 'rss090' - if loweruri == 'http://purl.org/rss/1.0/' and not self.version: - self.version = 'rss10' - if loweruri == 'http://www.w3.org/2005/atom' and not self.version: - self.version = 'atom10' - if loweruri.find('backend.userland.com/rss') <> -1: - # match any backend.userland.com namespace - uri = 'http://backend.userland.com/rss' - loweruri = uri - if self._matchnamespaces.has_key(loweruri): - self.namespacemap[prefix] = self._matchnamespaces[loweruri] - self.namespacesInUse[self._matchnamespaces[loweruri]] = uri - else: - self.namespacesInUse[prefix or ''] = uri - - def resolveURI(self, uri): - return _urljoin(self.baseuri or '', uri) - - def decodeEntities(self, element, data): - return data - - def strattrs(self, attrs): - return ''.join([' %s="%s"' % (t[0],_xmlescape(t[1],{'"':'"'})) for t in attrs]) - - def push(self, element, expectingText): - self.elementstack.append([element, expectingText, []]) - - def pop(self, element, stripWhitespace=1): - if not self.elementstack: return - if self.elementstack[-1][0] != element: return - - element, expectingText, pieces = self.elementstack.pop() - - if self.version == 'atom10' and self.contentparams.get('type','text') == 'application/xhtml+xml': - # remove enclosing child element, but only if it is a
and - # only if all the remaining content is nested underneath it. - # This means that the divs would be retained in the following: - #
foo
bar
- while pieces and len(pieces)>1 and not pieces[-1].strip(): - del pieces[-1] - while pieces and len(pieces)>1 and not pieces[0].strip(): - del pieces[0] - if pieces and (pieces[0] == '
' or pieces[0].startswith('
': - depth = 0 - for piece in pieces[:-1]: - if piece.startswith(''): - depth += 1 - else: - pieces = pieces[1:-1] - - # Ensure each piece is a str for Python 3 - for (i, v) in enumerate(pieces): - if not isinstance(v, basestring): - pieces[i] = v.decode('utf-8') - - output = ''.join(pieces) - if stripWhitespace: - output = output.strip() - if not expectingText: return output - - # decode base64 content - if base64 and self.contentparams.get('base64', 0): - try: - output = _base64decode(output) - except binascii.Error: - pass - except binascii.Incomplete: - pass - except TypeError: - # In Python 3, base64 takes and outputs bytes, not str - # This may not be the most correct way to accomplish this - output = _base64decode(output.encode('utf-8')).decode('utf-8') - - # resolve relative URIs - if (element in self.can_be_relative_uri) and output: - output = self.resolveURI(output) - - # decode entities within embedded markup - if not self.contentparams.get('base64', 0): - output = self.decodeEntities(element, output) - - if self.lookslikehtml(output): - self.contentparams['type']='text/html' - - # remove temporary cruft from contentparams - try: - del self.contentparams['mode'] - except KeyError: - pass - try: - del self.contentparams['base64'] - except KeyError: - pass - - is_htmlish = self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types - # resolve relative URIs within embedded markup - if is_htmlish and RESOLVE_RELATIVE_URIS: - if element in self.can_contain_relative_uris: - output = _resolveRelativeURIs(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html')) - - # parse microformats - # (must do this before sanitizing because some microformats - # rely on elements that we sanitize) - if is_htmlish and element in ['content', 'description', 'summary']: - mfresults = _parseMicroformats(output, self.baseuri, self.encoding) - if mfresults: - for tag in mfresults.get('tags', []): - self._addTag(tag['term'], tag['scheme'], tag['label']) - for enclosure in mfresults.get('enclosures', []): - self._start_enclosure(enclosure) - for xfn in mfresults.get('xfn', []): - self._addXFN(xfn['relationships'], xfn['href'], xfn['name']) - vcard = mfresults.get('vcard') - if vcard: - self._getContext()['vcard'] = vcard - - # sanitize embedded markup - if is_htmlish and SANITIZE_HTML: - if element in self.can_contain_dangerous_markup: - output = _sanitizeHTML(output, self.encoding, self.contentparams.get('type', 'text/html')) - - if self.encoding and type(output) != type(u''): - try: - output = unicode(output, self.encoding) - except: - pass - - # address common error where people take data that is already - # utf-8, presume that it is iso-8859-1, and re-encode it. - if self.encoding in ('utf-8', 'utf-8_INVALID_PYTHON_3') and type(output) == type(u''): - try: - output = unicode(output.encode('iso-8859-1'), 'utf-8') - except: - pass - - # map win-1252 extensions to the proper code points - if type(output) == type(u''): - output = u''.join([c in _cp1252.keys() and _cp1252[c] or c for c in output]) - - # categories/tags/keywords/whatever are handled in _end_category - if element == 'category': - return output - - if element == 'title' and self.hasTitle: - return output - - # store output in appropriate place(s) - if self.inentry and not self.insource: - if element == 'content': - self.entries[-1].setdefault(element, []) - contentparams = copy.deepcopy(self.contentparams) - contentparams['value'] = output - self.entries[-1][element].append(contentparams) - elif element == 'link': - if not self.inimage: - # query variables in urls in link elements are improperly - # converted from `?a=1&b=2` to `?a=1&b;=2` as if they're - # unhandled character references. fix this special case. - output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) - self.entries[-1][element] = output - if output: - self.entries[-1]['links'][-1]['href'] = output - else: - if element == 'description': - element = 'summary' - self.entries[-1][element] = output - if self.incontent: - contentparams = copy.deepcopy(self.contentparams) - contentparams['value'] = output - self.entries[-1][element + '_detail'] = contentparams - elif (self.infeed or self.insource):# and (not self.intextinput) and (not self.inimage): - context = self._getContext() - if element == 'description': - element = 'subtitle' - context[element] = output - if element == 'link': - # fix query variables; see above for the explanation - output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) - context[element] = output - context['links'][-1]['href'] = output - elif self.incontent: - contentparams = copy.deepcopy(self.contentparams) - contentparams['value'] = output - context[element + '_detail'] = contentparams - return output - - def pushContent(self, tag, attrsD, defaultContentType, expectingText): - self.incontent += 1 - if self.lang: self.lang=self.lang.replace('_','-') - self.contentparams = FeedParserDict({ - 'type': self.mapContentType(attrsD.get('type', defaultContentType)), - 'language': self.lang, - 'base': self.baseuri}) - self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams) - self.push(tag, expectingText) - - def popContent(self, tag): - value = self.pop(tag) - self.incontent -= 1 - self.contentparams.clear() - return value - - # a number of elements in a number of RSS variants are nominally plain - # text, but this is routinely ignored. This is an attempt to detect - # the most common cases. As false positives often result in silent - # data loss, this function errs on the conservative side. - def lookslikehtml(self, s): - if self.version.startswith('atom'): return - if self.contentparams.get('type','text/html') != 'text/plain': return - - # must have a close tag or a entity reference to qualify - if not (re.search(r'',s) or re.search("&#?\w+;",s)): return - - # all tags must be in a restricted subset of valid HTML tags - if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements, - re.findall(r' -1: - prefix = name[:colonpos] - suffix = name[colonpos+1:] - prefix = self.namespacemap.get(prefix, prefix) - name = prefix + ':' + suffix - return name - - def _getAttribute(self, attrsD, name): - return attrsD.get(self._mapToStandardPrefix(name)) - - def _isBase64(self, attrsD, contentparams): - if attrsD.get('mode', '') == 'base64': - return 1 - if self.contentparams['type'].startswith('text/'): - return 0 - if self.contentparams['type'].endswith('+xml'): - return 0 - if self.contentparams['type'].endswith('/xml'): - return 0 - return 1 - - def _itsAnHrefDamnIt(self, attrsD): - href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None))) - if href: - try: - del attrsD['url'] - except KeyError: - pass - try: - del attrsD['uri'] - except KeyError: - pass - attrsD['href'] = href - return attrsD - - def _save(self, key, value, overwrite=False): - context = self._getContext() - if overwrite: - context[key] = value - else: - context.setdefault(key, value) - - def _start_rss(self, attrsD): - versionmap = {'0.91': 'rss091u', - '0.92': 'rss092', - '0.93': 'rss093', - '0.94': 'rss094'} - #If we're here then this is an RSS feed. - #If we don't have a version or have a version that starts with something - #other than RSS then there's been a mistake. Correct it. - if not self.version or not self.version.startswith('rss'): - attr_version = attrsD.get('version', '') - version = versionmap.get(attr_version) - if version: - self.version = version - elif attr_version.startswith('2.'): - self.version = 'rss20' - else: - self.version = 'rss' - - def _start_dlhottitles(self, attrsD): - self.version = 'hotrss' - - def _start_channel(self, attrsD): - self.infeed = 1 - self._cdf_common(attrsD) - _start_feedinfo = _start_channel - - def _cdf_common(self, attrsD): - if attrsD.has_key('lastmod'): - self._start_modified({}) - self.elementstack[-1][-1] = attrsD['lastmod'] - self._end_modified() - if attrsD.has_key('href'): - self._start_link({}) - self.elementstack[-1][-1] = attrsD['href'] - self._end_link() - - def _start_feed(self, attrsD): - self.infeed = 1 - versionmap = {'0.1': 'atom01', - '0.2': 'atom02', - '0.3': 'atom03'} - if not self.version: - attr_version = attrsD.get('version') - version = versionmap.get(attr_version) - if version: - self.version = version - else: - self.version = 'atom' - - def _end_channel(self): - self.infeed = 0 - _end_feed = _end_channel - - def _start_image(self, attrsD): - context = self._getContext() - if not self.inentry: - context.setdefault('image', FeedParserDict()) - self.inimage = 1 - self.hasTitle = 0 - self.push('image', 0) - - def _end_image(self): - self.pop('image') - self.inimage = 0 - - def _start_textinput(self, attrsD): - context = self._getContext() - context.setdefault('textinput', FeedParserDict()) - self.intextinput = 1 - self.hasTitle = 0 - self.push('textinput', 0) - _start_textInput = _start_textinput - - def _end_textinput(self): - self.pop('textinput') - self.intextinput = 0 - _end_textInput = _end_textinput - - def _start_author(self, attrsD): - self.inauthor = 1 - self.push('author', 1) - # Append a new FeedParserDict when expecting an author - context = self._getContext() - context.setdefault('authors', []) - context['authors'].append(FeedParserDict()) - _start_managingeditor = _start_author - _start_dc_author = _start_author - _start_dc_creator = _start_author - _start_itunes_author = _start_author - - def _end_author(self): - self.pop('author') - self.inauthor = 0 - self._sync_author_detail() - _end_managingeditor = _end_author - _end_dc_author = _end_author - _end_dc_creator = _end_author - _end_itunes_author = _end_author - - def _start_itunes_owner(self, attrsD): - self.inpublisher = 1 - self.push('publisher', 0) - - def _end_itunes_owner(self): - self.pop('publisher') - self.inpublisher = 0 - self._sync_author_detail('publisher') - - def _start_contributor(self, attrsD): - self.incontributor = 1 - context = self._getContext() - context.setdefault('contributors', []) - context['contributors'].append(FeedParserDict()) - self.push('contributor', 0) - - def _end_contributor(self): - self.pop('contributor') - self.incontributor = 0 - - def _start_dc_contributor(self, attrsD): - self.incontributor = 1 - context = self._getContext() - context.setdefault('contributors', []) - context['contributors'].append(FeedParserDict()) - self.push('name', 0) - - def _end_dc_contributor(self): - self._end_name() - self.incontributor = 0 - - def _start_name(self, attrsD): - self.push('name', 0) - _start_itunes_name = _start_name - - def _end_name(self): - value = self.pop('name') - if self.inpublisher: - self._save_author('name', value, 'publisher') - elif self.inauthor: - self._save_author('name', value) - elif self.incontributor: - self._save_contributor('name', value) - elif self.intextinput: - context = self._getContext() - context['name'] = value - _end_itunes_name = _end_name - - def _start_width(self, attrsD): - self.push('width', 0) - - def _end_width(self): - value = self.pop('width') - try: - value = int(value) - except: - value = 0 - if self.inimage: - context = self._getContext() - context['width'] = value - - def _start_height(self, attrsD): - self.push('height', 0) - - def _end_height(self): - value = self.pop('height') - try: - value = int(value) - except: - value = 0 - if self.inimage: - context = self._getContext() - context['height'] = value - - def _start_url(self, attrsD): - self.push('href', 1) - _start_homepage = _start_url - _start_uri = _start_url - - def _end_url(self): - value = self.pop('href') - if self.inauthor: - self._save_author('href', value) - elif self.incontributor: - self._save_contributor('href', value) - _end_homepage = _end_url - _end_uri = _end_url - - def _start_email(self, attrsD): - self.push('email', 0) - _start_itunes_email = _start_email - - def _end_email(self): - value = self.pop('email') - if self.inpublisher: - self._save_author('email', value, 'publisher') - elif self.inauthor: - self._save_author('email', value) - elif self.incontributor: - self._save_contributor('email', value) - _end_itunes_email = _end_email - - def _getContext(self): - if self.insource: - context = self.sourcedata - elif self.inimage and self.feeddata.has_key('image'): - context = self.feeddata['image'] - elif self.intextinput: - context = self.feeddata['textinput'] - elif self.inentry: - context = self.entries[-1] - else: - context = self.feeddata - return context - - def _save_author(self, key, value, prefix='author'): - context = self._getContext() - context.setdefault(prefix + '_detail', FeedParserDict()) - context[prefix + '_detail'][key] = value - self._sync_author_detail() - context.setdefault('authors', [FeedParserDict()]) - context['authors'][-1][key] = value - - def _save_contributor(self, key, value): - context = self._getContext() - context.setdefault('contributors', [FeedParserDict()]) - context['contributors'][-1][key] = value - - def _sync_author_detail(self, key='author'): - context = self._getContext() - detail = context.get('%s_detail' % key) - if detail: - name = detail.get('name') - email = detail.get('email') - if name and email: - context[key] = '%s (%s)' % (name, email) - elif name: - context[key] = name - elif email: - context[key] = email - else: - author, email = context.get(key), None - if not author: return - emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?''', author) - if emailmatch: - email = emailmatch.group(0) - # probably a better way to do the following, but it passes all the tests - author = author.replace(email, '') - author = author.replace('()', '') - author = author.replace('<>', '') - author = author.replace('<>', '') - author = author.strip() - if author and (author[0] == '('): - author = author[1:] - if author and (author[-1] == ')'): - author = author[:-1] - author = author.strip() - if author or email: - context.setdefault('%s_detail' % key, FeedParserDict()) - if author: - context['%s_detail' % key]['name'] = author - if email: - context['%s_detail' % key]['email'] = email - - def _start_subtitle(self, attrsD): - self.pushContent('subtitle', attrsD, 'text/plain', 1) - _start_tagline = _start_subtitle - _start_itunes_subtitle = _start_subtitle - - def _end_subtitle(self): - self.popContent('subtitle') - _end_tagline = _end_subtitle - _end_itunes_subtitle = _end_subtitle - - def _start_rights(self, attrsD): - self.pushContent('rights', attrsD, 'text/plain', 1) - _start_dc_rights = _start_rights - _start_copyright = _start_rights - - def _end_rights(self): - self.popContent('rights') - _end_dc_rights = _end_rights - _end_copyright = _end_rights - - def _start_item(self, attrsD): - self.entries.append(FeedParserDict()) - self.push('item', 0) - self.inentry = 1 - self.guidislink = 0 - self.hasTitle = 0 - id = self._getAttribute(attrsD, 'rdf:about') - if id: - context = self._getContext() - context['id'] = id - self._cdf_common(attrsD) - _start_entry = _start_item - _start_product = _start_item - - def _end_item(self): - self.pop('item') - self.inentry = 0 - _end_entry = _end_item - - def _start_dc_language(self, attrsD): - self.push('language', 1) - _start_language = _start_dc_language - - def _end_dc_language(self): - self.lang = self.pop('language') - _end_language = _end_dc_language - - def _start_dc_publisher(self, attrsD): - self.push('publisher', 1) - _start_webmaster = _start_dc_publisher - - def _end_dc_publisher(self): - self.pop('publisher') - self._sync_author_detail('publisher') - _end_webmaster = _end_dc_publisher - - def _start_published(self, attrsD): - self.push('published', 1) - _start_dcterms_issued = _start_published - _start_issued = _start_published - - def _end_published(self): - value = self.pop('published') - self._save('published_parsed', _parse_date(value), overwrite=True) - _end_dcterms_issued = _end_published - _end_issued = _end_published - - def _start_updated(self, attrsD): - self.push('updated', 1) - _start_modified = _start_updated - _start_dcterms_modified = _start_updated - _start_pubdate = _start_updated - _start_dc_date = _start_updated - _start_lastbuilddate = _start_updated - - def _end_updated(self): - value = self.pop('updated') - parsed_value = _parse_date(value) - self._save('updated_parsed', parsed_value, overwrite=True) - _end_modified = _end_updated - _end_dcterms_modified = _end_updated - _end_pubdate = _end_updated - _end_dc_date = _end_updated - _end_lastbuilddate = _end_updated - - def _start_created(self, attrsD): - self.push('created', 1) - _start_dcterms_created = _start_created - - def _end_created(self): - value = self.pop('created') - self._save('created_parsed', _parse_date(value), overwrite=True) - _end_dcterms_created = _end_created - - def _start_expirationdate(self, attrsD): - self.push('expired', 1) - - def _end_expirationdate(self): - self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True) - - def _start_cc_license(self, attrsD): - context = self._getContext() - value = self._getAttribute(attrsD, 'rdf:resource') - attrsD = FeedParserDict() - attrsD['rel']='license' - if value: attrsD['href']=value - context.setdefault('links', []).append(attrsD) - - def _start_creativecommons_license(self, attrsD): - self.push('license', 1) - _start_creativeCommons_license = _start_creativecommons_license - - def _end_creativecommons_license(self): - value = self.pop('license') - context = self._getContext() - attrsD = FeedParserDict() - attrsD['rel']='license' - if value: attrsD['href']=value - context.setdefault('links', []).append(attrsD) - del context['license'] - _end_creativeCommons_license = _end_creativecommons_license - - def _addXFN(self, relationships, href, name): - context = self._getContext() - xfn = context.setdefault('xfn', []) - value = FeedParserDict({'relationships': relationships, 'href': href, 'name': name}) - if value not in xfn: - xfn.append(value) - - def _addTag(self, term, scheme, label): - context = self._getContext() - tags = context.setdefault('tags', []) - if (not term) and (not scheme) and (not label): return - value = FeedParserDict({'term': term, 'scheme': scheme, 'label': label}) - if value not in tags: - tags.append(value) - - def _start_category(self, attrsD): - if _debug: sys.stderr.write('entering _start_category with %s\n' % repr(attrsD)) - term = attrsD.get('term') - scheme = attrsD.get('scheme', attrsD.get('domain')) - label = attrsD.get('label') - self._addTag(term, scheme, label) - self.push('category', 1) - _start_dc_subject = _start_category - _start_keywords = _start_category - - def _start_media_category(self, attrsD): - attrsD.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema') - self._start_category(attrsD) - - def _end_itunes_keywords(self): - for term in self.pop('itunes_keywords').split(): - self._addTag(term, 'http://www.itunes.com/', None) - - def _start_itunes_category(self, attrsD): - self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None) - self.push('category', 1) - - def _end_category(self): - value = self.pop('category') - if not value: return - context = self._getContext() - tags = context['tags'] - if value and len(tags) and not tags[-1]['term']: - tags[-1]['term'] = value - else: - self._addTag(value, None, None) - _end_dc_subject = _end_category - _end_keywords = _end_category - _end_itunes_category = _end_category - _end_media_category = _end_category - - def _start_cloud(self, attrsD): - self._getContext()['cloud'] = FeedParserDict(attrsD) - - def _start_link(self, attrsD): - attrsD.setdefault('rel', 'alternate') - if attrsD['rel'] == 'self': - attrsD.setdefault('type', 'application/atom+xml') - else: - attrsD.setdefault('type', 'text/html') - context = self._getContext() - attrsD = self._itsAnHrefDamnIt(attrsD) - if attrsD.has_key('href'): - attrsD['href'] = self.resolveURI(attrsD['href']) - expectingText = self.infeed or self.inentry or self.insource - context.setdefault('links', []) - if not (self.inentry and self.inimage): - context['links'].append(FeedParserDict(attrsD)) - if attrsD.has_key('href'): - expectingText = 0 - if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): - context['link'] = attrsD['href'] - else: - self.push('link', expectingText) - _start_producturl = _start_link - - def _end_link(self): - value = self.pop('link') - context = self._getContext() - _end_producturl = _end_link - - def _start_guid(self, attrsD): - self.guidislink = (attrsD.get('ispermalink', 'true') == 'true') - self.push('id', 1) - - def _end_guid(self): - value = self.pop('id') - self._save('guidislink', self.guidislink and not self._getContext().has_key('link')) - if self.guidislink: - # guid acts as link, but only if 'ispermalink' is not present or is 'true', - # and only if the item doesn't already have a link element - self._save('link', value) - - def _start_title(self, attrsD): - if self.svgOK: return self.unknown_starttag('title', attrsD.items()) - self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) - _start_dc_title = _start_title - _start_media_title = _start_title - - def _end_title(self): - if self.svgOK: return - value = self.popContent('title') - if not value: return - context = self._getContext() - self.hasTitle = 1 - _end_dc_title = _end_title - - def _end_media_title(self): - hasTitle = self.hasTitle - self._end_title() - self.hasTitle = hasTitle - - def _start_description(self, attrsD): - context = self._getContext() - if context.has_key('summary'): - self._summaryKey = 'content' - self._start_content(attrsD) - else: - self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource) - _start_dc_description = _start_description - - def _start_abstract(self, attrsD): - self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) - - def _end_description(self): - if self._summaryKey == 'content': - self._end_content() - else: - value = self.popContent('description') - self._summaryKey = None - _end_abstract = _end_description - _end_dc_description = _end_description - - def _start_info(self, attrsD): - self.pushContent('info', attrsD, 'text/plain', 1) - _start_feedburner_browserfriendly = _start_info - - def _end_info(self): - self.popContent('info') - _end_feedburner_browserfriendly = _end_info - - def _start_generator(self, attrsD): - if attrsD: - attrsD = self._itsAnHrefDamnIt(attrsD) - if attrsD.has_key('href'): - attrsD['href'] = self.resolveURI(attrsD['href']) - self._getContext()['generator_detail'] = FeedParserDict(attrsD) - self.push('generator', 1) - - def _end_generator(self): - value = self.pop('generator') - context = self._getContext() - if context.has_key('generator_detail'): - context['generator_detail']['name'] = value - - def _start_admin_generatoragent(self, attrsD): - self.push('generator', 1) - value = self._getAttribute(attrsD, 'rdf:resource') - if value: - self.elementstack[-1][2].append(value) - self.pop('generator') - self._getContext()['generator_detail'] = FeedParserDict({'href': value}) - - def _start_admin_errorreportsto(self, attrsD): - self.push('errorreportsto', 1) - value = self._getAttribute(attrsD, 'rdf:resource') - if value: - self.elementstack[-1][2].append(value) - self.pop('errorreportsto') - - def _start_summary(self, attrsD): - context = self._getContext() - if context.has_key('summary'): - self._summaryKey = 'content' - self._start_content(attrsD) - else: - self._summaryKey = 'summary' - self.pushContent(self._summaryKey, attrsD, 'text/plain', 1) - _start_itunes_summary = _start_summary - - def _end_summary(self): - if self._summaryKey == 'content': - self._end_content() - else: - self.popContent(self._summaryKey or 'summary') - self._summaryKey = None - _end_itunes_summary = _end_summary - - def _start_enclosure(self, attrsD): - attrsD = self._itsAnHrefDamnIt(attrsD) - context = self._getContext() - attrsD['rel']='enclosure' - context.setdefault('links', []).append(FeedParserDict(attrsD)) - - def _start_source(self, attrsD): - if 'url' in attrsD: - # This means that we're processing a source element from an RSS 2.0 feed - self.sourcedata['href'] = attrsD[u'url'] - self.push('source', 1) - self.insource = 1 - self.hasTitle = 0 - - def _end_source(self): - self.insource = 0 - value = self.pop('source') - if value: - self.sourcedata['title'] = value - self._getContext()['source'] = copy.deepcopy(self.sourcedata) - self.sourcedata.clear() - - def _start_content(self, attrsD): - self.pushContent('content', attrsD, 'text/plain', 1) - src = attrsD.get('src') - if src: - self.contentparams['src'] = src - self.push('content', 1) - - def _start_prodlink(self, attrsD): - self.pushContent('content', attrsD, 'text/html', 1) - - def _start_body(self, attrsD): - self.pushContent('content', attrsD, 'application/xhtml+xml', 1) - _start_xhtml_body = _start_body - - def _start_content_encoded(self, attrsD): - self.pushContent('content', attrsD, 'text/html', 1) - _start_fullitem = _start_content_encoded - - def _end_content(self): - copyToSummary = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types) - value = self.popContent('content') - if copyToSummary: - self._save('summary', value) - - _end_body = _end_content - _end_xhtml_body = _end_content - _end_content_encoded = _end_content - _end_fullitem = _end_content - _end_prodlink = _end_content - - def _start_itunes_image(self, attrsD): - self.push('itunes_image', 0) - if attrsD.get('href'): - self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')}) - _start_itunes_link = _start_itunes_image - - def _end_itunes_block(self): - value = self.pop('itunes_block', 0) - self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0 - - def _end_itunes_explicit(self): - value = self.pop('itunes_explicit', 0) - # Convert 'yes' -> True, 'clean' to False, and any other value to None - # False and None both evaluate as False, so the difference can be ignored - # by applications that only need to know if the content is explicit. - self._getContext()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0] - - def _start_media_content(self, attrsD): - context = self._getContext() - context.setdefault('media_content', []) - context['media_content'].append(attrsD) - - def _start_media_thumbnail(self, attrsD): - context = self._getContext() - context.setdefault('media_thumbnail', []) - self.push('url', 1) # new - context['media_thumbnail'].append(attrsD) - - def _end_media_thumbnail(self): - url = self.pop('url') - context = self._getContext() - if url != None and len(url.strip()) != 0: - if not context['media_thumbnail'][-1].has_key('url'): - context['media_thumbnail'][-1]['url'] = url - - def _start_media_player(self, attrsD): - self.push('media_player', 0) - self._getContext()['media_player'] = FeedParserDict(attrsD) - - def _end_media_player(self): - value = self.pop('media_player') - context = self._getContext() - context['media_player']['content'] = value - - def _start_newlocation(self, attrsD): - self.push('newlocation', 1) - - def _end_newlocation(self): - url = self.pop('newlocation') - context = self._getContext() - # don't set newlocation if the context isn't right - if context is not self.feeddata: - return - context['newlocation'] = _makeSafeAbsoluteURI(self.baseuri, url.strip()) - -if _XML_AVAILABLE: - class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler): - def __init__(self, baseuri, baselang, encoding): - if _debug: sys.stderr.write('trying StrictFeedParser\n') - xml.sax.handler.ContentHandler.__init__(self) - _FeedParserMixin.__init__(self, baseuri, baselang, encoding) - self.bozo = 0 - self.exc = None - self.decls = {} - - def startPrefixMapping(self, prefix, uri): - self.trackNamespace(prefix, uri) - if uri == 'http://www.w3.org/1999/xlink': - self.decls['xmlns:'+prefix] = uri - - def startElementNS(self, name, qname, attrs): - namespace, localname = name - lowernamespace = str(namespace or '').lower() - if lowernamespace.find('backend.userland.com/rss') <> -1: - # match any backend.userland.com namespace - namespace = 'http://backend.userland.com/rss' - lowernamespace = namespace - if qname and qname.find(':') > 0: - givenprefix = qname.split(':')[0] - else: - givenprefix = None - prefix = self._matchnamespaces.get(lowernamespace, givenprefix) - if givenprefix and (prefix == None or (prefix == '' and lowernamespace == '')) and not self.namespacesInUse.has_key(givenprefix): - raise UndeclaredNamespace, "'%s' is not associated with a namespace" % givenprefix - localname = str(localname).lower() - - # qname implementation is horribly broken in Python 2.1 (it - # doesn't report any), and slightly broken in Python 2.2 (it - # doesn't report the xml: namespace). So we match up namespaces - # with a known list first, and then possibly override them with - # the qnames the SAX parser gives us (if indeed it gives us any - # at all). Thanks to MatejC for helping me test this and - # tirelessly telling me that it didn't work yet. - attrsD, self.decls = self.decls, {} - if localname=='math' and namespace=='http://www.w3.org/1998/Math/MathML': - attrsD['xmlns']=namespace - if localname=='svg' and namespace=='http://www.w3.org/2000/svg': - attrsD['xmlns']=namespace - - if prefix: - localname = prefix.lower() + ':' + localname - elif namespace and not qname: #Expat - for name,value in self.namespacesInUse.items(): - if name and value == namespace: - localname = name + ':' + localname - break - if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname)) - - for (namespace, attrlocalname), attrvalue in attrs._attrs.items(): - lowernamespace = (namespace or '').lower() - prefix = self._matchnamespaces.get(lowernamespace, '') - if prefix: - attrlocalname = prefix + ':' + attrlocalname - attrsD[str(attrlocalname).lower()] = attrvalue - for qname in attrs.getQNames(): - attrsD[str(qname).lower()] = attrs.getValueByQName(qname) - self.unknown_starttag(localname, attrsD.items()) - - def characters(self, text): - self.handle_data(text) - - def endElementNS(self, name, qname): - namespace, localname = name - lowernamespace = str(namespace or '').lower() - if qname and qname.find(':') > 0: - givenprefix = qname.split(':')[0] - else: - givenprefix = '' - prefix = self._matchnamespaces.get(lowernamespace, givenprefix) - if prefix: - localname = prefix + ':' + localname - elif namespace and not qname: #Expat - for name,value in self.namespacesInUse.items(): - if name and value == namespace: - localname = name + ':' + localname - break - localname = str(localname).lower() - self.unknown_endtag(localname) - - def error(self, exc): - self.bozo = 1 - self.exc = exc - - def fatalError(self, exc): - self.error(exc) - raise exc - -class _BaseHTMLProcessor(sgmllib.SGMLParser): - special = re.compile('''[<>'"]''') - bare_ampersand = re.compile("&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)") - elements_no_end_tag = [ - 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', - 'hr', 'img', 'input', 'isindex', 'keygen', 'link', 'meta', 'param', - 'source', 'track', 'wbr' - ] - - def __init__(self, encoding, _type): - self.encoding = encoding - self._type = _type - if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding) - sgmllib.SGMLParser.__init__(self) - - def reset(self): - self.pieces = [] - sgmllib.SGMLParser.reset(self) - - def _shorttag_replace(self, match): - tag = match.group(1) - if tag in self.elements_no_end_tag: - return '<' + tag + ' />' - else: - return '<' + tag + '>' - - def parse_starttag(self,i): - j=sgmllib.SGMLParser.parse_starttag(self, i) - if self._type == 'application/xhtml+xml': - if j>2 and self.rawdata[j-2:j]=='/>': - self.unknown_endtag(self.lasttag) - return j - - def feed(self, data): - data = re.compile(r'', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace - data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) - data = data.replace(''', "'") - data = data.replace('"', '"') - try: - bytes - if bytes is str: - raise NameError - self.encoding = self.encoding + '_INVALID_PYTHON_3' - except NameError: - if self.encoding and type(data) == type(u''): - data = data.encode(self.encoding) - sgmllib.SGMLParser.feed(self, data) - sgmllib.SGMLParser.close(self) - - def normalize_attrs(self, attrs): - if not attrs: return attrs - # utility method to be called by descendants - attrs = dict([(k.lower(), v) for k, v in attrs]).items() - attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] - attrs.sort() - return attrs - - def unknown_starttag(self, tag, attrs): - # called for each start tag - # attrs is a list of (attr, value) tuples - # e.g. for
, tag='pre', attrs=[('class', 'screen')]
-        if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
-        uattrs = []
-        strattrs=''
-        if attrs:
-            for key, value in attrs:
-                value=value.replace('>','>').replace('<','<').replace('"','"')
-                value = self.bare_ampersand.sub("&", value)
-                # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
-                if type(value) != type(u''):
-                    try:
-                        value = unicode(value, self.encoding)
-                    except:
-                        value = unicode(value, 'iso-8859-1')
-                try:
-                    # Currently, in Python 3 the key is already a str, and cannot be decoded again
-                    uattrs.append((unicode(key, self.encoding), value))
-                except TypeError:
-                    uattrs.append((key, value))
-            strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs])
-            if self.encoding:
-                try:
-                    strattrs=strattrs.encode(self.encoding)
-                except:
-                    pass
-        if tag in self.elements_no_end_tag:
-            self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
-        else:
-            self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
-
-    def unknown_endtag(self, tag):
-        # called for each end tag, e.g. for 
, tag will be 'pre' - # Reconstruct the original end tag. - if tag not in self.elements_no_end_tag: - self.pieces.append("" % locals()) - - def handle_charref(self, ref): - # called for each character reference, e.g. for ' ', ref will be '160' - # Reconstruct the original character reference. - if ref.startswith('x'): - value = unichr(int(ref[1:],16)) - else: - value = unichr(int(ref)) - - if value in _cp1252.keys(): - self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:]) - else: - self.pieces.append('&#%(ref)s;' % locals()) - - def handle_entityref(self, ref): - # called for each entity reference, e.g. for '©', ref will be 'copy' - # Reconstruct the original entity reference. - if name2codepoint.has_key(ref): - self.pieces.append('&%(ref)s;' % locals()) - else: - self.pieces.append('&%(ref)s' % locals()) - - def handle_data(self, text): - # called for each block of plain text, i.e. outside of any tag and - # not containing any character or entity references - # Store the original text verbatim. - if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_data, text=%s\n' % text) - self.pieces.append(text) - - def handle_comment(self, text): - # called for each HTML comment, e.g. - # Reconstruct the original comment. - self.pieces.append('' % locals()) - - def handle_pi(self, text): - # called for each processing instruction, e.g. - # Reconstruct original processing instruction. - self.pieces.append('' % locals()) - - def handle_decl(self, text): - # called for the DOCTYPE, if present, e.g. - # - # Reconstruct original DOCTYPE - self.pieces.append('' % locals()) - - _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match - def _scan_name(self, i, declstartpos): - rawdata = self.rawdata - n = len(rawdata) - if i == n: - return None, -1 - m = self._new_declname_match(rawdata, i) - if m: - s = m.group() - name = s.strip() - if (i + len(s)) == n: - return None, -1 # end of buffer - return name.lower(), m.end() - else: - self.handle_data(rawdata) -# self.updatepos(declstartpos, i) - return None, -1 - - def convert_charref(self, name): - return '&#%s;' % name - - def convert_entityref(self, name): - return '&%s;' % name - - def output(self): - '''Return processed HTML as a single string''' - return ''.join([str(p) for p in self.pieces]) - - def parse_declaration(self, i): - try: - return sgmllib.SGMLParser.parse_declaration(self, i) - except sgmllib.SGMLParseError: - # escape the doctype declaration and continue parsing - self.handle_data('<') - return i+1 - -class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor): - def __init__(self, baseuri, baselang, encoding, entities): - sgmllib.SGMLParser.__init__(self) - _FeedParserMixin.__init__(self, baseuri, baselang, encoding) - _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml') - self.entities=entities - - def decodeEntities(self, element, data): - data = data.replace('<', '<') - data = data.replace('<', '<') - data = data.replace('<', '<') - data = data.replace('>', '>') - data = data.replace('>', '>') - data = data.replace('>', '>') - data = data.replace('&', '&') - data = data.replace('&', '&') - data = data.replace('"', '"') - data = data.replace('"', '"') - data = data.replace(''', ''') - data = data.replace(''', ''') - if self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): - data = data.replace('<', '<') - data = data.replace('>', '>') - data = data.replace('&', '&') - data = data.replace('"', '"') - data = data.replace(''', "'") - return data - - def strattrs(self, attrs): - return ''.join([' %s="%s"' % (n,v.replace('"','"')) for n,v in attrs]) - -class _MicroformatsParser: - STRING = 1 - DATE = 2 - URI = 3 - NODE = 4 - EMAIL = 5 - - known_xfn_relationships = ['contact', 'acquaintance', 'friend', 'met', 'co-worker', 'coworker', 'colleague', 'co-resident', 'coresident', 'neighbor', 'child', 'parent', 'sibling', 'brother', 'sister', 'spouse', 'wife', 'husband', 'kin', 'relative', 'muse', 'crush', 'date', 'sweetheart', 'me'] - known_binary_extensions = ['zip','rar','exe','gz','tar','tgz','tbz2','bz2','z','7z','dmg','img','sit','sitx','hqx','deb','rpm','bz2','jar','rar','iso','bin','msi','mp2','mp3','ogg','ogm','mp4','m4v','m4a','avi','wma','wmv'] - - def __init__(self, data, baseuri, encoding): - self.document = BeautifulSoup.BeautifulSoup(data) - self.baseuri = baseuri - self.encoding = encoding - if type(data) == type(u''): - data = data.encode(encoding) - self.tags = [] - self.enclosures = [] - self.xfn = [] - self.vcard = None - - def vcardEscape(self, s): - if type(s) in (type(''), type(u'')): - s = s.replace(',', '\\,').replace(';', '\\;').replace('\n', '\\n') - return s - - def vcardFold(self, s): - s = re.sub(';+$', '', s) - sFolded = '' - iMax = 75 - sPrefix = '' - while len(s) > iMax: - sFolded += sPrefix + s[:iMax] + '\n' - s = s[iMax:] - sPrefix = ' ' - iMax = 74 - sFolded += sPrefix + s - return sFolded - - def normalize(self, s): - return re.sub(r'\s+', ' ', s).strip() - - def unique(self, aList): - results = [] - for element in aList: - if element not in results: - results.append(element) - return results - - def toISO8601(self, dt): - return time.strftime('%Y-%m-%dT%H:%M:%SZ', dt) - - def getPropertyValue(self, elmRoot, sProperty, iPropertyType=4, bAllowMultiple=0, bAutoEscape=0): - all = lambda x: 1 - sProperty = sProperty.lower() - bFound = 0 - bNormalize = 1 - propertyMatch = {'class': re.compile(r'\b%s\b' % sProperty)} - if bAllowMultiple and (iPropertyType != self.NODE): - snapResults = [] - containers = elmRoot(['ul', 'ol'], propertyMatch) - for container in containers: - snapResults.extend(container('li')) - bFound = (len(snapResults) != 0) - if not bFound: - snapResults = elmRoot(all, propertyMatch) - bFound = (len(snapResults) != 0) - if (not bFound) and (sProperty == 'value'): - snapResults = elmRoot('pre') - bFound = (len(snapResults) != 0) - bNormalize = not bFound - if not bFound: - snapResults = [elmRoot] - bFound = (len(snapResults) != 0) - arFilter = [] - if sProperty == 'vcard': - snapFilter = elmRoot(all, propertyMatch) - for node in snapFilter: - if node.findParent(all, propertyMatch): - arFilter.append(node) - arResults = [] - for node in snapResults: - if node not in arFilter: - arResults.append(node) - bFound = (len(arResults) != 0) - if not bFound: - if bAllowMultiple: return [] - elif iPropertyType == self.STRING: return '' - elif iPropertyType == self.DATE: return None - elif iPropertyType == self.URI: return '' - elif iPropertyType == self.NODE: return None - else: return None - arValues = [] - for elmResult in arResults: - sValue = None - if iPropertyType == self.NODE: - if bAllowMultiple: - arValues.append(elmResult) - continue - else: - return elmResult - sNodeName = elmResult.name.lower() - if (iPropertyType == self.EMAIL) and (sNodeName == 'a'): - sValue = (elmResult.get('href') or '').split('mailto:').pop().split('?')[0] - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if (not sValue) and (sNodeName == 'abbr'): - sValue = elmResult.get('title') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if (not sValue) and (iPropertyType == self.URI): - if sNodeName == 'a': sValue = elmResult.get('href') - elif sNodeName == 'img': sValue = elmResult.get('src') - elif sNodeName == 'object': sValue = elmResult.get('data') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if (not sValue) and (sNodeName == 'img'): - sValue = elmResult.get('alt') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if not sValue: - sValue = elmResult.renderContents() - sValue = re.sub(r'<\S[^>]*>', '', sValue) - sValue = sValue.replace('\r\n', '\n') - sValue = sValue.replace('\r', '\n') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if not sValue: continue - if iPropertyType == self.DATE: - sValue = _parse_date_iso8601(sValue) - if bAllowMultiple: - arValues.append(bAutoEscape and self.vcardEscape(sValue) or sValue) - else: - return bAutoEscape and self.vcardEscape(sValue) or sValue - return arValues - - def findVCards(self, elmRoot, bAgentParsing=0): - sVCards = '' - - if not bAgentParsing: - arCards = self.getPropertyValue(elmRoot, 'vcard', bAllowMultiple=1) - else: - arCards = [elmRoot] - - for elmCard in arCards: - arLines = [] - - def processSingleString(sProperty): - sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1).decode(self.encoding) - if sValue: - arLines.append(self.vcardFold(sProperty.upper() + ':' + sValue)) - return sValue or u'' - - def processSingleURI(sProperty): - sValue = self.getPropertyValue(elmCard, sProperty, self.URI) - if sValue: - sContentType = '' - sEncoding = '' - sValueKey = '' - if sValue.startswith('data:'): - sEncoding = ';ENCODING=b' - sContentType = sValue.split(';')[0].split('/').pop() - sValue = sValue.split(',', 1).pop() - else: - elmValue = self.getPropertyValue(elmCard, sProperty) - if elmValue: - if sProperty != 'url': - sValueKey = ';VALUE=uri' - sContentType = elmValue.get('type', '').strip().split('/').pop().strip() - sContentType = sContentType.upper() - if sContentType == 'OCTET-STREAM': - sContentType = '' - if sContentType: - sContentType = ';TYPE=' + sContentType.upper() - arLines.append(self.vcardFold(sProperty.upper() + sEncoding + sContentType + sValueKey + ':' + sValue)) - - def processTypeValue(sProperty, arDefaultType, arForceType=None): - arResults = self.getPropertyValue(elmCard, sProperty, bAllowMultiple=1) - for elmResult in arResults: - arType = self.getPropertyValue(elmResult, 'type', self.STRING, 1, 1) - if arForceType: - arType = self.unique(arForceType + arType) - if not arType: - arType = arDefaultType - sValue = self.getPropertyValue(elmResult, 'value', self.EMAIL, 0) - if sValue: - arLines.append(self.vcardFold(sProperty.upper() + ';TYPE=' + ','.join(arType) + ':' + sValue)) - - # AGENT - # must do this before all other properties because it is destructive - # (removes nested class="vcard" nodes so they don't interfere with - # this vcard's other properties) - arAgent = self.getPropertyValue(elmCard, 'agent', bAllowMultiple=1) - for elmAgent in arAgent: - if re.compile(r'\bvcard\b').search(elmAgent.get('class')): - sAgentValue = self.findVCards(elmAgent, 1) + '\n' - sAgentValue = sAgentValue.replace('\n', '\\n') - sAgentValue = sAgentValue.replace(';', '\\;') - if sAgentValue: - arLines.append(self.vcardFold('AGENT:' + sAgentValue)) - # Completely remove the agent element from the parse tree - elmAgent.extract() - else: - sAgentValue = self.getPropertyValue(elmAgent, 'value', self.URI, bAutoEscape=1); - if sAgentValue: - arLines.append(self.vcardFold('AGENT;VALUE=uri:' + sAgentValue)) - - # FN (full name) - sFN = processSingleString('fn') - - # N (name) - elmName = self.getPropertyValue(elmCard, 'n') - if elmName: - sFamilyName = self.getPropertyValue(elmName, 'family-name', self.STRING, bAutoEscape=1) - sGivenName = self.getPropertyValue(elmName, 'given-name', self.STRING, bAutoEscape=1) - arAdditionalNames = self.getPropertyValue(elmName, 'additional-name', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'additional-names', self.STRING, 1, 1) - arHonorificPrefixes = self.getPropertyValue(elmName, 'honorific-prefix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-prefixes', self.STRING, 1, 1) - arHonorificSuffixes = self.getPropertyValue(elmName, 'honorific-suffix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-suffixes', self.STRING, 1, 1) - arLines.append(self.vcardFold('N:' + sFamilyName + ';' + - sGivenName + ';' + - ','.join(arAdditionalNames) + ';' + - ','.join(arHonorificPrefixes) + ';' + - ','.join(arHonorificSuffixes))) - elif sFN: - # implied "N" optimization - # http://microformats.org/wiki/hcard#Implied_.22N.22_Optimization - arNames = self.normalize(sFN).split() - if len(arNames) == 2: - bFamilyNameFirst = (arNames[0].endswith(',') or - len(arNames[1]) == 1 or - ((len(arNames[1]) == 2) and (arNames[1].endswith('.')))) - if bFamilyNameFirst: - arLines.append(self.vcardFold('N:' + arNames[0] + ';' + arNames[1])) - else: - arLines.append(self.vcardFold('N:' + arNames[1] + ';' + arNames[0])) - - # SORT-STRING - sSortString = self.getPropertyValue(elmCard, 'sort-string', self.STRING, bAutoEscape=1) - if sSortString: - arLines.append(self.vcardFold('SORT-STRING:' + sSortString)) - - # NICKNAME - arNickname = self.getPropertyValue(elmCard, 'nickname', self.STRING, 1, 1) - if arNickname: - arLines.append(self.vcardFold('NICKNAME:' + ','.join(arNickname))) - - # PHOTO - processSingleURI('photo') - - # BDAY - dtBday = self.getPropertyValue(elmCard, 'bday', self.DATE) - if dtBday: - arLines.append(self.vcardFold('BDAY:' + self.toISO8601(dtBday))) - - # ADR (address) - arAdr = self.getPropertyValue(elmCard, 'adr', bAllowMultiple=1) - for elmAdr in arAdr: - arType = self.getPropertyValue(elmAdr, 'type', self.STRING, 1, 1) - if not arType: - arType = ['intl','postal','parcel','work'] # default adr types, see RFC 2426 section 3.2.1 - sPostOfficeBox = self.getPropertyValue(elmAdr, 'post-office-box', self.STRING, 0, 1) - sExtendedAddress = self.getPropertyValue(elmAdr, 'extended-address', self.STRING, 0, 1) - sStreetAddress = self.getPropertyValue(elmAdr, 'street-address', self.STRING, 0, 1) - sLocality = self.getPropertyValue(elmAdr, 'locality', self.STRING, 0, 1) - sRegion = self.getPropertyValue(elmAdr, 'region', self.STRING, 0, 1) - sPostalCode = self.getPropertyValue(elmAdr, 'postal-code', self.STRING, 0, 1) - sCountryName = self.getPropertyValue(elmAdr, 'country-name', self.STRING, 0, 1) - arLines.append(self.vcardFold('ADR;TYPE=' + ','.join(arType) + ':' + - sPostOfficeBox + ';' + - sExtendedAddress + ';' + - sStreetAddress + ';' + - sLocality + ';' + - sRegion + ';' + - sPostalCode + ';' + - sCountryName)) - - # LABEL - processTypeValue('label', ['intl','postal','parcel','work']) - - # TEL (phone number) - processTypeValue('tel', ['voice']) - - # EMAIL - processTypeValue('email', ['internet'], ['internet']) - - # MAILER - processSingleString('mailer') - - # TZ (timezone) - processSingleString('tz') - - # GEO (geographical information) - elmGeo = self.getPropertyValue(elmCard, 'geo') - if elmGeo: - sLatitude = self.getPropertyValue(elmGeo, 'latitude', self.STRING, 0, 1) - sLongitude = self.getPropertyValue(elmGeo, 'longitude', self.STRING, 0, 1) - arLines.append(self.vcardFold('GEO:' + sLatitude + ';' + sLongitude)) - - # TITLE - processSingleString('title') - - # ROLE - processSingleString('role') - - # LOGO - processSingleURI('logo') - - # ORG (organization) - elmOrg = self.getPropertyValue(elmCard, 'org') - if elmOrg: - sOrganizationName = self.getPropertyValue(elmOrg, 'organization-name', self.STRING, 0, 1) - if not sOrganizationName: - # implied "organization-name" optimization - # http://microformats.org/wiki/hcard#Implied_.22organization-name.22_Optimization - sOrganizationName = self.getPropertyValue(elmCard, 'org', self.STRING, 0, 1) - if sOrganizationName: - arLines.append(self.vcardFold('ORG:' + sOrganizationName)) - else: - arOrganizationUnit = self.getPropertyValue(elmOrg, 'organization-unit', self.STRING, 1, 1) - arLines.append(self.vcardFold('ORG:' + sOrganizationName + ';' + ';'.join(arOrganizationUnit))) - - # CATEGORY - arCategory = self.getPropertyValue(elmCard, 'category', self.STRING, 1, 1) + self.getPropertyValue(elmCard, 'categories', self.STRING, 1, 1) - if arCategory: - arLines.append(self.vcardFold('CATEGORIES:' + ','.join(arCategory))) - - # NOTE - processSingleString('note') - - # REV - processSingleString('rev') - - # SOUND - processSingleURI('sound') - - # UID - processSingleString('uid') - - # URL - processSingleURI('url') - - # CLASS - processSingleString('class') - - # KEY - processSingleURI('key') - - if arLines: - arLines = [u'BEGIN:vCard',u'VERSION:3.0'] + arLines + [u'END:vCard'] - sVCards += u'\n'.join(arLines) + u'\n' - - return sVCards.strip() - - def isProbablyDownloadable(self, elm): - attrsD = elm.attrMap - if not attrsD.has_key('href'): return 0 - linktype = attrsD.get('type', '').strip() - if linktype.startswith('audio/') or \ - linktype.startswith('video/') or \ - (linktype.startswith('application/') and not linktype.endswith('xml')): - return 1 - path = urlparse.urlparse(attrsD['href'])[2] - if path.find('.') == -1: return 0 - fileext = path.split('.').pop().lower() - return fileext in self.known_binary_extensions - - def findTags(self): - all = lambda x: 1 - for elm in self.document(all, {'rel': re.compile(r'\btag\b')}): - href = elm.get('href') - if not href: continue - urlscheme, domain, path, params, query, fragment = \ - urlparse.urlparse(_urljoin(self.baseuri, href)) - segments = path.split('/') - tag = segments.pop() - if not tag: - tag = segments.pop() - tagscheme = urlparse.urlunparse((urlscheme, domain, '/'.join(segments), '', '', '')) - if not tagscheme.endswith('/'): - tagscheme += '/' - self.tags.append(FeedParserDict({"term": tag, "scheme": tagscheme, "label": elm.string or ''})) - - def findEnclosures(self): - all = lambda x: 1 - enclosure_match = re.compile(r'\benclosure\b') - for elm in self.document(all, {'href': re.compile(r'.+')}): - if not enclosure_match.search(elm.get('rel', '')) and not self.isProbablyDownloadable(elm): continue - if elm.attrMap not in self.enclosures: - self.enclosures.append(elm.attrMap) - if elm.string and not elm.get('title'): - self.enclosures[-1]['title'] = elm.string - - def findXFN(self): - all = lambda x: 1 - for elm in self.document(all, {'rel': re.compile('.+'), 'href': re.compile('.+')}): - rels = elm.get('rel', '').split() - xfn_rels = [] - for rel in rels: - if rel in self.known_xfn_relationships: - xfn_rels.append(rel) - if xfn_rels: - self.xfn.append({"relationships": xfn_rels, "href": elm.get('href', ''), "name": elm.string}) - -def _parseMicroformats(htmlSource, baseURI, encoding): - if not BeautifulSoup: return - if _debug: sys.stderr.write('entering _parseMicroformats\n') - try: - p = _MicroformatsParser(htmlSource, baseURI, encoding) - except UnicodeEncodeError: - # sgmllib throws this exception when performing lookups of tags - # with non-ASCII characters in them. - return - p.vcard = p.findVCards(p.document) - p.findTags() - p.findEnclosures() - p.findXFN() - return {"tags": p.tags, "enclosures": p.enclosures, "xfn": p.xfn, "vcard": p.vcard} - -class _RelativeURIResolver(_BaseHTMLProcessor): - relative_uris = [('a', 'href'), - ('applet', 'codebase'), - ('area', 'href'), - ('blockquote', 'cite'), - ('body', 'background'), - ('del', 'cite'), - ('form', 'action'), - ('frame', 'longdesc'), - ('frame', 'src'), - ('iframe', 'longdesc'), - ('iframe', 'src'), - ('head', 'profile'), - ('img', 'longdesc'), - ('img', 'src'), - ('img', 'usemap'), - ('input', 'src'), - ('input', 'usemap'), - ('ins', 'cite'), - ('link', 'href'), - ('object', 'classid'), - ('object', 'codebase'), - ('object', 'data'), - ('object', 'usemap'), - ('q', 'cite'), - ('script', 'src')] - - def __init__(self, baseuri, encoding, _type): - _BaseHTMLProcessor.__init__(self, encoding, _type) - self.baseuri = baseuri - - def resolveURI(self, uri): - return _makeSafeAbsoluteURI(_urljoin(self.baseuri, uri.strip())) - - def unknown_starttag(self, tag, attrs): - if _debug: - sys.stderr.write('tag: [%s] with attributes: [%s]\n' % (tag, str(attrs))) - attrs = self.normalize_attrs(attrs) - attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs] - _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) - -def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type): - if _debug: - sys.stderr.write('entering _resolveRelativeURIs\n') - - p = _RelativeURIResolver(baseURI, encoding, _type) - p.feed(htmlSource) - return p.output() - -def _makeSafeAbsoluteURI(base, rel=None): - # bail if ACCEPTABLE_URI_SCHEMES is empty - if not ACCEPTABLE_URI_SCHEMES: - return _urljoin(base, rel or u'') - if not base: - return rel or u'' - if not rel: - scheme = urlparse.urlparse(base)[0] - if not scheme or scheme in ACCEPTABLE_URI_SCHEMES: - return base - return u'' - uri = _urljoin(base, rel) - if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES: - return u'' - return uri - -class _HTMLSanitizer(_BaseHTMLProcessor): - acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', - 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', - 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', - 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', - 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', - 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1', - 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', - 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', - 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', - 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', - 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', - 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', - 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript'] - - acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', - 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis', - 'background', 'balance', 'bgcolor', 'bgproperties', 'border', - 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding', - 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff', - 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', 'cols', - 'colspan', 'compact', 'contenteditable', 'controls', 'coords', 'data', - 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', 'delay', - 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', 'face', 'for', - 'form', 'frame', 'galleryimg', 'gutter', 'headers', 'height', 'hidefocus', - 'hidden', 'high', 'href', 'hreflang', 'hspace', 'icon', 'id', 'inputmode', - 'ismap', 'keytype', 'label', 'leftspacing', 'lang', 'list', 'longdesc', - 'loop', 'loopcount', 'loopend', 'loopstart', 'low', 'lowsrc', 'max', - 'maxlength', 'media', 'method', 'min', 'multiple', 'name', 'nohref', - 'noshade', 'nowrap', 'open', 'optimum', 'pattern', 'ping', 'point-size', - 'prompt', 'pqg', 'radiogroup', 'readonly', 'rel', 'repeat-max', - 'repeat-min', 'replace', 'required', 'rev', 'rightspacing', 'rows', - 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', - 'start', 'step', 'summary', 'suppress', 'tabindex', 'target', 'template', - 'title', 'toppadding', 'type', 'unselectable', 'usemap', 'urn', 'valign', - 'value', 'variable', 'volume', 'vspace', 'vrml', 'width', 'wrap', - 'xml:lang'] - - unacceptable_elements_with_end_tag = ['script', 'applet', 'style'] - - acceptable_css_properties = ['azimuth', 'background-color', - 'border-bottom-color', 'border-collapse', 'border-color', - 'border-left-color', 'border-right-color', 'border-top-color', 'clear', - 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', - 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', - 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', - 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', - 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', - 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', - 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', - 'white-space', 'width'] - - # survey of common keywords found in feeds - acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue', - 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', - 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', - 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', - 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', - 'transparent', 'underline', 'white', 'yellow'] - - valid_css_values = re.compile('^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' + - '\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$') - - mathml_elements = ['annotation', 'annotation-xml', 'maction', 'math', - 'merror', 'mfenced', 'mfrac', 'mi', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', - 'mphantom', 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', - 'msub', 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', - 'munderover', 'none', 'semantics'] - - mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign', - 'columnalign', 'close', 'columnlines', 'columnspacing', 'columnspan', 'depth', - 'display', 'displaystyle', 'encoding', 'equalcolumns', 'equalrows', - 'fence', 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', - 'lspace', 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', - 'maxsize', 'minsize', 'open', 'other', 'rowalign', 'rowalign', 'rowalign', - 'rowlines', 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection', - 'separator', 'separators', 'stretchy', 'width', 'width', 'xlink:href', - 'xlink:show', 'xlink:type', 'xmlns', 'xmlns:xlink'] - - # svgtiny - foreignObject + linearGradient + radialGradient + stop - svg_elements = ['a', 'animate', 'animateColor', 'animateMotion', - 'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'foreignObject', - 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', - 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', 'mpath', - 'path', 'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop', - 'svg', 'switch', 'text', 'title', 'tspan', 'use'] - - # svgtiny + class + opacity + offset + xmlns + xmlns:xlink - svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic', - 'arabic-form', 'ascent', 'attributeName', 'attributeType', - 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height', - 'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd', 'dx', - 'dy', 'descent', 'display', 'dur', 'end', 'fill', 'fill-opacity', - 'fill-rule', 'font-family', 'font-size', 'font-stretch', 'font-style', - 'font-variant', 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', - 'glyph-name', 'gradientUnits', 'hanging', 'height', 'horiz-adv-x', - 'horiz-origin-x', 'id', 'ideographic', 'k', 'keyPoints', 'keySplines', - 'keyTimes', 'lang', 'mathematical', 'marker-end', 'marker-mid', - 'marker-start', 'markerHeight', 'markerUnits', 'markerWidth', 'max', - 'min', 'name', 'offset', 'opacity', 'orient', 'origin', - 'overline-position', 'overline-thickness', 'panose-1', 'path', - 'pathLength', 'points', 'preserveAspectRatio', 'r', 'refX', 'refY', - 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures', - 'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', - 'stop-color', 'stop-opacity', 'strikethrough-position', - 'strikethrough-thickness', 'stroke', 'stroke-dasharray', - 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', - 'stroke-miterlimit', 'stroke-opacity', 'stroke-width', 'systemLanguage', - 'target', 'text-anchor', 'to', 'transform', 'type', 'u1', 'u2', - 'underline-position', 'underline-thickness', 'unicode', 'unicode-range', - 'units-per-em', 'values', 'version', 'viewBox', 'visibility', 'width', - 'widths', 'x', 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole', - 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type', - 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', - 'y2', 'zoomAndPan'] - - svg_attr_map = None - svg_elem_map = None - - acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule', - 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', - 'stroke-opacity'] - - def reset(self): - _BaseHTMLProcessor.reset(self) - self.unacceptablestack = 0 - self.mathmlOK = 0 - self.svgOK = 0 - - def unknown_starttag(self, tag, attrs): - acceptable_attributes = self.acceptable_attributes - keymap = {} - if not tag in self.acceptable_elements or self.svgOK: - if tag in self.unacceptable_elements_with_end_tag: - self.unacceptablestack += 1 - - # add implicit namespaces to html5 inline svg/mathml - if self._type.endswith('html'): - if not dict(attrs).get('xmlns'): - if tag=='svg': - attrs.append( ('xmlns','http://www.w3.org/2000/svg') ) - if tag=='math': - attrs.append( ('xmlns','http://www.w3.org/1998/Math/MathML') ) - - # not otherwise acceptable, perhaps it is MathML or SVG? - if tag=='math' and ('xmlns','http://www.w3.org/1998/Math/MathML') in attrs: - self.mathmlOK += 1 - if tag=='svg' and ('xmlns','http://www.w3.org/2000/svg') in attrs: - self.svgOK += 1 - - # chose acceptable attributes based on tag class, else bail - if self.mathmlOK and tag in self.mathml_elements: - acceptable_attributes = self.mathml_attributes - elif self.svgOK and tag in self.svg_elements: - # for most vocabularies, lowercasing is a good idea. Many - # svg elements, however, are camel case - if not self.svg_attr_map: - lower=[attr.lower() for attr in self.svg_attributes] - mix=[a for a in self.svg_attributes if a not in lower] - self.svg_attributes = lower - self.svg_attr_map = dict([(a.lower(),a) for a in mix]) - - lower=[attr.lower() for attr in self.svg_elements] - mix=[a for a in self.svg_elements if a not in lower] - self.svg_elements = lower - self.svg_elem_map = dict([(a.lower(),a) for a in mix]) - acceptable_attributes = self.svg_attributes - tag = self.svg_elem_map.get(tag,tag) - keymap = self.svg_attr_map - elif not tag in self.acceptable_elements: - return - - # declare xlink namespace, if needed - if self.mathmlOK or self.svgOK: - if filter(lambda (n,v): n.startswith('xlink:'),attrs): - if not ('xmlns:xlink','http://www.w3.org/1999/xlink') in attrs: - attrs.append(('xmlns:xlink','http://www.w3.org/1999/xlink')) - - clean_attrs = [] - for key, value in self.normalize_attrs(attrs): - if key in acceptable_attributes: - key=keymap.get(key,key) - # make sure the uri uses an acceptable uri scheme - if key == u'href': - value = _makeSafeAbsoluteURI(value) - clean_attrs.append((key,value)) - elif key=='style': - clean_value = self.sanitize_style(value) - if clean_value: clean_attrs.append((key,clean_value)) - _BaseHTMLProcessor.unknown_starttag(self, tag, clean_attrs) - - def unknown_endtag(self, tag): - if not tag in self.acceptable_elements: - if tag in self.unacceptable_elements_with_end_tag: - self.unacceptablestack -= 1 - if self.mathmlOK and tag in self.mathml_elements: - if tag == 'math' and self.mathmlOK: self.mathmlOK -= 1 - elif self.svgOK and tag in self.svg_elements: - tag = self.svg_elem_map.get(tag,tag) - if tag == 'svg' and self.svgOK: self.svgOK -= 1 - else: - return - _BaseHTMLProcessor.unknown_endtag(self, tag) - - def handle_pi(self, text): - pass - - def handle_decl(self, text): - pass - - def handle_data(self, text): - if not self.unacceptablestack: - _BaseHTMLProcessor.handle_data(self, text) - - def sanitize_style(self, style): - # disallow urls - style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style) - - # gauntlet - if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return '' - # This replaced a regexp that used re.match and was prone to pathological back-tracking. - if re.sub("\s*[-\w]+\s*:\s*[^:;]*;?", '', style).strip(): return '' - - clean = [] - for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style): - if not value: continue - if prop.lower() in self.acceptable_css_properties: - clean.append(prop + ': ' + value + ';') - elif prop.split('-')[0].lower() in ['background','border','margin','padding']: - for keyword in value.split(): - if not keyword in self.acceptable_css_keywords and \ - not self.valid_css_values.match(keyword): - break - else: - clean.append(prop + ': ' + value + ';') - elif self.svgOK and prop.lower() in self.acceptable_svg_properties: - clean.append(prop + ': ' + value + ';') - - return ' '.join(clean) - - def parse_comment(self, i, report=1): - ret = _BaseHTMLProcessor.parse_comment(self, i, report) - if ret >= 0: - return ret - # if ret == -1, this may be a malicious attempt to circumvent - # sanitization, or a page-destroying unclosed comment - match = re.compile(r'--[^>]*>').search(self.rawdata, i+4) - if match: - return match.end() - # unclosed comment; deliberately fail to handle_data() - return len(self.rawdata) - - -def _sanitizeHTML(htmlSource, encoding, _type): - p = _HTMLSanitizer(encoding, _type) - htmlSource = htmlSource.replace(''): - data = data.split('>', 1)[1] - if data.count('= '2.3.3' - assert base64 != None - user, passw = _base64decode(req.headers['Authorization'].split(' ')[1]).split(':') - realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0] - self.add_password(realm, host, user, passw) - retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) - self.reset_retry_count() - return retry - except: - return self.http_error_default(req, fp, code, msg, headers) - -def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers): - """URL, filename, or string --> stream - - This function lets you define parsers that take any input source - (URL, pathname to local or network file, or actual data as a string) - and deal with it in a uniform manner. Returned object is guaranteed - to have all the basic stdio read methods (read, readline, readlines). - Just .close() the object when you're done with it. - - If the etag argument is supplied, it will be used as the value of an - If-None-Match request header. - - If the modified argument is supplied, it can be a tuple of 9 integers - (as returned by gmtime() in the standard Python time module) or a date - string in any format supported by feedparser. Regardless, it MUST - be in GMT (Greenwich Mean Time). It will be reformatted into an - RFC 1123-compliant date and used as the value of an If-Modified-Since - request header. - - If the agent argument is supplied, it will be used as the value of a - User-Agent request header. - - If the referrer argument is supplied, it will be used as the value of a - Referer[sic] request header. - - If handlers is supplied, it is a list of handlers used to build a - urllib2 opener. - - if request_headers is supplied it is a dictionary of HTTP request headers - that will override the values generated by FeedParser. - """ - - if hasattr(url_file_stream_or_string, 'read'): - return url_file_stream_or_string - - if url_file_stream_or_string == '-': - return sys.stdin - - if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'): - # Deal with the feed URI scheme - if url_file_stream_or_string.startswith('feed:http'): - url_file_stream_or_string = url_file_stream_or_string[5:] - elif url_file_stream_or_string.startswith('feed:'): - url_file_stream_or_string = 'http:' + url_file_stream_or_string[5:] - if not agent: - agent = USER_AGENT - # test for inline user:password for basic auth - auth = None - if base64: - urltype, rest = urllib.splittype(url_file_stream_or_string) - realhost, rest = urllib.splithost(rest) - if realhost: - user_passwd, realhost = urllib.splituser(realhost) - if user_passwd: - url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest) - auth = base64.standard_b64encode(user_passwd).strip() - - # iri support - try: - if isinstance(url_file_stream_or_string,unicode): - url_file_stream_or_string = url_file_stream_or_string.encode('idna').decode('utf-8') - else: - url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna').decode('utf-8') - except: - pass - - # try to open with urllib2 (to use optional headers) - request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, request_headers) - opener = apply(urllib2.build_opener, tuple(handlers + [_FeedURLHandler()])) - opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent - try: - return opener.open(request) - finally: - opener.close() # JohnD - - # try to open with native open function (if url_file_stream_or_string is a filename) - try: - return open(url_file_stream_or_string, 'rb') - except: - pass - - # treat url_file_stream_or_string as string - return _StringIO(str(url_file_stream_or_string)) - -def _build_urllib2_request(url, agent, etag, modified, referrer, auth, request_headers): - request = urllib2.Request(url) - request.add_header('User-Agent', agent) - if etag: - request.add_header('If-None-Match', etag) - if type(modified) == type(''): - modified = _parse_date(modified) - elif isinstance(modified, datetime.datetime): - modified = modified.utctimetuple() - if modified: - # format into an RFC 1123-compliant timestamp. We can't use - # time.strftime() since the %a and %b directives can be affected - # by the current locale, but RFC 2616 states that dates must be - # in English. - short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - request.add_header('If-Modified-Since', '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5])) - if referrer: - request.add_header('Referer', referrer) - if gzip and zlib: - request.add_header('Accept-encoding', 'gzip, deflate') - elif gzip: - request.add_header('Accept-encoding', 'gzip') - elif zlib: - request.add_header('Accept-encoding', 'deflate') - else: - request.add_header('Accept-encoding', '') - if auth: - request.add_header('Authorization', 'Basic %s' % auth) - if ACCEPT_HEADER: - request.add_header('Accept', ACCEPT_HEADER) - # use this for whatever -- cookies, special headers, etc - # [('Cookie','Something'),('x-special-header','Another Value')] - for header_name, header_value in request_headers.items(): - request.add_header(header_name, header_value) - request.add_header('A-IM', 'feed') # RFC 3229 support - return request - -_date_handlers = [] -def registerDateHandler(func): - '''Register a date handler function (takes string, returns 9-tuple date in GMT)''' - _date_handlers.insert(0, func) - -# ISO-8601 date parsing routines written by Fazal Majid. -# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601 -# parser is beyond the scope of feedparser and would be a worthwhile addition -# to the Python library. -# A single regular expression cannot parse ISO 8601 date formats into groups -# as the standard is highly irregular (for instance is 030104 2003-01-04 or -# 0301-04-01), so we use templates instead. -# Please note the order in templates is significant because we need a -# greedy match. -_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-0MM?-?DD', 'YYYY-MM', 'YYYY-?OOO', - 'YY-?MM-?DD', 'YY-?OOO', 'YYYY', - '-YY-?MM', '-OOO', '-YY', - '--MM-?DD', '--MM', - '---DD', - 'CC', ''] -_iso8601_re = [ - tmpl.replace( - 'YYYY', r'(?P\d{4})').replace( - 'YY', r'(?P\d\d)').replace( - 'MM', r'(?P[01]\d)').replace( - 'DD', r'(?P[0123]\d)').replace( - 'OOO', r'(?P[0123]\d\d)').replace( - 'CC', r'(?P\d\d$)') - + r'(T?(?P\d{2}):(?P\d{2})' - + r'(:(?P\d{2}))?' - + r'(\.(?P\d+))?' - + r'(?P[+-](?P\d{2})(:(?P\d{2}))?|Z)?)?' - for tmpl in _iso8601_tmpl] -try: - del tmpl -except NameError: - pass -_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re] -try: - del regex -except NameError: - pass -def _parse_date_iso8601(dateString): - '''Parse a variety of ISO-8601-compatible formats like 20040105''' - m = None - for _iso8601_match in _iso8601_matches: - m = _iso8601_match(dateString) - if m: break - if not m: return - if m.span() == (0, 0): return - params = m.groupdict() - ordinal = params.get('ordinal', 0) - if ordinal: - ordinal = int(ordinal) - else: - ordinal = 0 - year = params.get('year', '--') - if not year or year == '--': - year = time.gmtime()[0] - elif len(year) == 2: - # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993 - year = 100 * int(time.gmtime()[0] / 100) + int(year) - else: - year = int(year) - month = params.get('month', '-') - if not month or month == '-': - # ordinals are NOT normalized by mktime, we simulate them - # by setting month=1, day=ordinal - if ordinal: - month = 1 - else: - month = time.gmtime()[1] - month = int(month) - day = params.get('day', 0) - if not day: - # see above - if ordinal: - day = ordinal - elif params.get('century', 0) or \ - params.get('year', 0) or params.get('month', 0): - day = 1 - else: - day = time.gmtime()[2] - else: - day = int(day) - # special case of the century - is the first year of the 21st century - # 2000 or 2001 ? The debate goes on... - if 'century' in params.keys(): - year = (int(params['century']) - 1) * 100 + 1 - # in ISO 8601 most fields are optional - for field in ['hour', 'minute', 'second', 'tzhour', 'tzmin']: - if not params.get(field, None): - params[field] = 0 - hour = int(params.get('hour', 0)) - minute = int(params.get('minute', 0)) - second = int(float(params.get('second', 0))) - # weekday is normalized by mktime(), we can ignore it - weekday = 0 - daylight_savings_flag = -1 - tm = [year, month, day, hour, minute, second, weekday, - ordinal, daylight_savings_flag] - # ISO 8601 time zone adjustments - tz = params.get('tz') - if tz and tz != 'Z': - if tz[0] == '-': - tm[3] += int(params.get('tzhour', 0)) - tm[4] += int(params.get('tzmin', 0)) - elif tz[0] == '+': - tm[3] -= int(params.get('tzhour', 0)) - tm[4] -= int(params.get('tzmin', 0)) - else: - return None - # Python's time.mktime() is a wrapper around the ANSI C mktime(3c) - # which is guaranteed to normalize d/m/y/h/m/s. - # Many implementations have bugs, but we'll pretend they don't. - return time.localtime(time.mktime(tuple(tm))) -registerDateHandler(_parse_date_iso8601) - -# 8-bit date handling routines written by ytrewq1. -_korean_year = u'\ub144' # b3e2 in euc-kr -_korean_month = u'\uc6d4' # bff9 in euc-kr -_korean_day = u'\uc77c' # c0cf in euc-kr -_korean_am = u'\uc624\uc804' # bfc0 c0fc in euc-kr -_korean_pm = u'\uc624\ud6c4' # bfc0 c8c4 in euc-kr - -_korean_onblog_date_re = \ - re.compile('(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})' % \ - (_korean_year, _korean_month, _korean_day)) -_korean_nate_date_re = \ - re.compile(u'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})' % \ - (_korean_am, _korean_pm)) -def _parse_date_onblog(dateString): - '''Parse a string according to the OnBlog 8-bit date format''' - m = _korean_onblog_date_re.match(dateString) - if not m: return - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ - {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ - 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ - 'zonediff': '+09:00'} - if _debug: sys.stderr.write('OnBlog date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_onblog) - -def _parse_date_nate(dateString): - '''Parse a string according to the Nate 8-bit date format''' - m = _korean_nate_date_re.match(dateString) - if not m: return - hour = int(m.group(5)) - ampm = m.group(4) - if (ampm == _korean_pm): - hour += 12 - hour = str(hour) - if len(hour) == 1: - hour = '0' + hour - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ - {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ - 'hour': hour, 'minute': m.group(6), 'second': m.group(7),\ - 'zonediff': '+09:00'} - if _debug: sys.stderr.write('Nate date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_nate) - -_mssql_date_re = \ - re.compile('(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?') -def _parse_date_mssql(dateString): - '''Parse a string according to the MS SQL date format''' - m = _mssql_date_re.match(dateString) - if not m: return - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ - {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ - 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ - 'zonediff': '+09:00'} - if _debug: sys.stderr.write('MS SQL date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_mssql) - -# Unicode strings for Greek date strings -_greek_months = \ - { \ - u'\u0399\u03b1\u03bd': u'Jan', # c9e1ed in iso-8859-7 - u'\u03a6\u03b5\u03b2': u'Feb', # d6e5e2 in iso-8859-7 - u'\u039c\u03ac\u03ce': u'Mar', # ccdcfe in iso-8859-7 - u'\u039c\u03b1\u03ce': u'Mar', # cce1fe in iso-8859-7 - u'\u0391\u03c0\u03c1': u'Apr', # c1f0f1 in iso-8859-7 - u'\u039c\u03ac\u03b9': u'May', # ccdce9 in iso-8859-7 - u'\u039c\u03b1\u03ca': u'May', # cce1fa in iso-8859-7 - u'\u039c\u03b1\u03b9': u'May', # cce1e9 in iso-8859-7 - u'\u0399\u03bf\u03cd\u03bd': u'Jun', # c9effded in iso-8859-7 - u'\u0399\u03bf\u03bd': u'Jun', # c9efed in iso-8859-7 - u'\u0399\u03bf\u03cd\u03bb': u'Jul', # c9effdeb in iso-8859-7 - u'\u0399\u03bf\u03bb': u'Jul', # c9f9eb in iso-8859-7 - u'\u0391\u03cd\u03b3': u'Aug', # c1fde3 in iso-8859-7 - u'\u0391\u03c5\u03b3': u'Aug', # c1f5e3 in iso-8859-7 - u'\u03a3\u03b5\u03c0': u'Sep', # d3e5f0 in iso-8859-7 - u'\u039f\u03ba\u03c4': u'Oct', # cfeaf4 in iso-8859-7 - u'\u039d\u03bf\u03ad': u'Nov', # cdefdd in iso-8859-7 - u'\u039d\u03bf\u03b5': u'Nov', # cdefe5 in iso-8859-7 - u'\u0394\u03b5\u03ba': u'Dec', # c4e5ea in iso-8859-7 - } - -_greek_wdays = \ - { \ - u'\u039a\u03c5\u03c1': u'Sun', # caf5f1 in iso-8859-7 - u'\u0394\u03b5\u03c5': u'Mon', # c4e5f5 in iso-8859-7 - u'\u03a4\u03c1\u03b9': u'Tue', # d4f1e9 in iso-8859-7 - u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7 - u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7 - u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7 - u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7 - } - -_greek_date_format_re = \ - re.compile(u'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)') - -def _parse_date_greek(dateString): - '''Parse a string according to a Greek 8-bit date format.''' - m = _greek_date_format_re.match(dateString) - if not m: return - try: - wday = _greek_wdays[m.group(1)] - month = _greek_months[m.group(3)] - except: - return - rfc822date = '%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)s' % \ - {'wday': wday, 'day': m.group(2), 'month': month, 'year': m.group(4),\ - 'hour': m.group(5), 'minute': m.group(6), 'second': m.group(7),\ - 'zonediff': m.group(8)} - if _debug: sys.stderr.write('Greek date parsed as: %s\n' % rfc822date) - return _parse_date_rfc822(rfc822date) -registerDateHandler(_parse_date_greek) - -# Unicode strings for Hungarian date strings -_hungarian_months = \ - { \ - u'janu\u00e1r': u'01', # e1 in iso-8859-2 - u'febru\u00e1ri': u'02', # e1 in iso-8859-2 - u'm\u00e1rcius': u'03', # e1 in iso-8859-2 - u'\u00e1prilis': u'04', # e1 in iso-8859-2 - u'm\u00e1ujus': u'05', # e1 in iso-8859-2 - u'j\u00fanius': u'06', # fa in iso-8859-2 - u'j\u00falius': u'07', # fa in iso-8859-2 - u'augusztus': u'08', - u'szeptember': u'09', - u'okt\u00f3ber': u'10', # f3 in iso-8859-2 - u'november': u'11', - u'december': u'12', - } - -_hungarian_date_format_re = \ - re.compile(u'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))') - -def _parse_date_hungarian(dateString): - '''Parse a string according to a Hungarian 8-bit date format.''' - m = _hungarian_date_format_re.match(dateString) - if not m: return - try: - month = _hungarian_months[m.group(2)] - day = m.group(3) - if len(day) == 1: - day = '0' + day - hour = m.group(4) - if len(hour) == 1: - hour = '0' + hour - except: - return - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s%(zonediff)s' % \ - {'year': m.group(1), 'month': month, 'day': day,\ - 'hour': hour, 'minute': m.group(5),\ - 'zonediff': m.group(6)} - if _debug: sys.stderr.write('Hungarian date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_hungarian) - -# W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by -# Drake and licensed under the Python license. Removed all range checking -# for month, day, hour, minute, and second, since mktime will normalize -# these later -def _parse_date_w3dtf(dateString): - def __extract_date(m): - year = int(m.group('year')) - if year < 100: - year = 100 * int(time.gmtime()[0] / 100) + int(year) - if year < 1000: - return 0, 0, 0 - julian = m.group('julian') - if julian: - julian = int(julian) - month = julian / 30 + 1 - day = julian % 30 + 1 - jday = None - while jday != julian: - t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) - jday = time.gmtime(t)[-2] - diff = abs(jday - julian) - if jday > julian: - if diff < day: - day = day - diff - else: - month = month - 1 - day = 31 - elif jday < julian: - if day + diff < 28: - day = day + diff - else: - month = month + 1 - return year, month, day - month = m.group('month') - day = 1 - if month is None: - month = 1 - else: - month = int(month) - day = m.group('day') - if day: - day = int(day) - else: - day = 1 - return year, month, day - - def __extract_time(m): - if not m: - return 0, 0, 0 - hours = m.group('hours') - if not hours: - return 0, 0, 0 - hours = int(hours) - minutes = int(m.group('minutes')) - seconds = m.group('seconds') - if seconds: - seconds = int(seconds) - else: - seconds = 0 - return hours, minutes, seconds - - def __extract_tzd(m): - '''Return the Time Zone Designator as an offset in seconds from UTC.''' - if not m: - return 0 - tzd = m.group('tzd') - if not tzd: - return 0 - if tzd == 'Z': - return 0 - hours = int(m.group('tzdhours')) - minutes = m.group('tzdminutes') - if minutes: - minutes = int(minutes) - else: - minutes = 0 - offset = (hours*60 + minutes) * 60 - if tzd[0] == '+': - return -offset - return offset - - __date_re = ('(?P\d\d\d\d)' - '(?:(?P-|)' - '(?:(?P\d\d)(?:(?P=dsep)(?P\d\d))?' - '|(?P\d\d\d)))?') - __tzd_re = '(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)' - __tzd_rx = re.compile(__tzd_re) - __time_re = ('(?P\d\d)(?P:|)(?P\d\d)' - '(?:(?P=tsep)(?P\d\d)(?:[.,]\d+)?)?' - + __tzd_re) - __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) - __datetime_rx = re.compile(__datetime_re) - m = __datetime_rx.match(dateString) - if (m is None) or (m.group() != dateString): return - gmt = __extract_date(m) + __extract_time(m) + (0, 0, 0) - if gmt[0] == 0: return - return time.gmtime(time.mktime(gmt) + __extract_tzd(m) - time.timezone) -registerDateHandler(_parse_date_w3dtf) - -def _parse_date_rfc822(dateString): - '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' - data = dateString.split() - if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames: - del data[0] - if len(data) == 4: - s = data[3] - i = s.find('+') - if i > 0: - data[3:] = [s[:i], s[i+1:]] - else: - data.append('') - dateString = " ".join(data) - # Account for the Etc/GMT timezone by stripping 'Etc/' - elif len(data) == 5 and data[4].lower().startswith('etc/'): - data[4] = data[4][4:] - dateString = " ".join(data) - if len(data) < 5: - dateString += ' 00:00:00 GMT' - tm = rfc822.parsedate_tz(dateString) - if tm: - return time.gmtime(rfc822.mktime_tz(tm)) -# rfc822.py defines several time zones, but we define some extra ones. -# 'ET' is equivalent to 'EST', etc. -_additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -800} -rfc822._timezones.update(_additional_timezones) -registerDateHandler(_parse_date_rfc822) - -def _parse_date_perforce(aDateString): - """parse a date in yyyy/mm/dd hh:mm:ss TTT format""" - # Fri, 2006/09/15 08:19:53 EDT - _my_date_pattern = re.compile( \ - r'(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})') - - dow, year, month, day, hour, minute, second, tz = \ - _my_date_pattern.search(aDateString).groups() - months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - dateString = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz) - tm = rfc822.parsedate_tz(dateString) - if tm: - return time.gmtime(rfc822.mktime_tz(tm)) -registerDateHandler(_parse_date_perforce) - -def _parse_date(dateString): - '''Parses a variety of date formats into a 9-tuple in GMT''' - for handler in _date_handlers: - try: - date9tuple = handler(dateString) - if not date9tuple: continue - if len(date9tuple) != 9: - if _debug: sys.stderr.write('date handler function must return 9-tuple\n') - raise ValueError - map(int, date9tuple) - return date9tuple - except Exception, e: - if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e))) - pass - return None - -def _getCharacterEncoding(http_headers, xml_data): - '''Get the character encoding of the XML document - - http_headers is a dictionary - xml_data is a raw string (not Unicode) - - This is so much trickier than it sounds, it's not even funny. - According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type - is application/xml, application/*+xml, - application/xml-external-parsed-entity, or application/xml-dtd, - the encoding given in the charset parameter of the HTTP Content-Type - takes precedence over the encoding given in the XML prefix within the - document, and defaults to 'utf-8' if neither are specified. But, if - the HTTP Content-Type is text/xml, text/*+xml, or - text/xml-external-parsed-entity, the encoding given in the XML prefix - within the document is ALWAYS IGNORED and only the encoding given in - the charset parameter of the HTTP Content-Type header should be - respected, and it defaults to 'us-ascii' if not specified. - - Furthermore, discussion on the atom-syntax mailing list with the - author of RFC 3023 leads me to the conclusion that any document - served with a Content-Type of text/* and no charset parameter - must be treated as us-ascii. (We now do this.) And also that it - must always be flagged as non-well-formed. (We now do this too.) - - If Content-Type is unspecified (input was local file or non-HTTP source) - or unrecognized (server just got it totally wrong), then go by the - encoding given in the XML prefix of the document and default to - 'iso-8859-1' as per the HTTP specification (RFC 2616). - - Then, assuming we didn't find a character encoding in the HTTP headers - (and the HTTP Content-type allowed us to look in the body), we need - to sniff the first few bytes of the XML data and try to determine - whether the encoding is ASCII-compatible. Section F of the XML - specification shows the way here: - http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info - - If the sniffed encoding is not ASCII-compatible, we need to make it - ASCII compatible so that we can sniff further into the XML declaration - to find the encoding attribute, which will tell us the true encoding. - - Of course, none of this guarantees that we will be able to parse the - feed in the declared character encoding (assuming it was declared - correctly, which many are not). CJKCodecs and iconv_codec help a lot; - you should definitely install them if you can. - http://cjkpython.i18n.org/ - ''' - - def _parseHTTPContentType(content_type): - '''takes HTTP Content-Type header and returns (content type, charset) - - If no charset is specified, returns (content type, '') - If no content type is specified, returns ('', '') - Both return parameters are guaranteed to be lowercase strings - ''' - content_type = content_type or '' - content_type, params = cgi.parse_header(content_type) - return content_type, params.get('charset', '').replace("'", '') - - sniffed_xml_encoding = '' - xml_encoding = '' - true_encoding = '' - http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type', http_headers.get('Content-type'))) - # Must sniff for non-ASCII-compatible character encodings before - # searching for XML declaration. This heuristic is defined in - # section F of the XML specification: - # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info - try: - if xml_data[:4] == _l2bytes([0x4c, 0x6f, 0xa7, 0x94]): - # EBCDIC - xml_data = _ebcdic_to_ascii(xml_data) - elif xml_data[:4] == _l2bytes([0x00, 0x3c, 0x00, 0x3f]): - # UTF-16BE - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xfe, 0xff])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): - # UTF-16BE with BOM - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x3f, 0x00]): - # UTF-16LE - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xff, 0xfe])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): - # UTF-16LE with BOM - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x00, 0x00, 0x00, 0x3c]): - # UTF-32BE - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x00, 0x00]): - # UTF-32LE - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): - # UTF-32BE with BOM - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') - elif xml_data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): - # UTF-32LE with BOM - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') - elif xml_data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): - # UTF-8 with BOM - sniffed_xml_encoding = 'utf-8' - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') - else: - # ASCII-compatible - pass - xml_encoding_match = re.compile(_s2bytes('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')).match(xml_data) - except: - xml_encoding_match = None - if xml_encoding_match: - xml_encoding = xml_encoding_match.groups()[0].decode('utf-8').lower() - if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')): - xml_encoding = sniffed_xml_encoding - acceptable_content_type = 0 - application_content_types = ('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity') - text_content_types = ('text/xml', 'text/xml-external-parsed-entity') - if (http_content_type in application_content_types) or \ - (http_content_type.startswith('application/') and http_content_type.endswith('+xml')): - acceptable_content_type = 1 - true_encoding = http_encoding or xml_encoding or 'utf-8' - elif (http_content_type in text_content_types) or \ - (http_content_type.startswith('text/')) and http_content_type.endswith('+xml'): - acceptable_content_type = 1 - true_encoding = http_encoding or 'us-ascii' - elif http_content_type.startswith('text/'): - true_encoding = http_encoding or 'us-ascii' - elif http_headers and (not (http_headers.has_key('content-type') or http_headers.has_key('Content-type'))): - true_encoding = xml_encoding or 'iso-8859-1' - else: - true_encoding = xml_encoding or 'utf-8' - # some feeds claim to be gb2312 but are actually gb18030. - # apparently MSIE and Firefox both do the following switch: - if true_encoding.lower() == 'gb2312': - true_encoding = 'gb18030' - return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type - -def _toUTF8(data, encoding): - '''Changes an XML data stream on the fly to specify a new encoding - - data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already - encoding is a string recognized by encodings.aliases - ''' - if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding) - # strip Byte Order Mark (if present) - if (len(data) >= 4) and (data[:2] == _l2bytes([0xfe, 0xff])) and (data[2:4] != _l2bytes([0x00, 0x00])): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-16be': - sys.stderr.write('trying utf-16be instead\n') - encoding = 'utf-16be' - data = data[2:] - elif (len(data) >= 4) and (data[:2] == _l2bytes([0xff, 0xfe])) and (data[2:4] != _l2bytes([0x00, 0x00])): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-16le': - sys.stderr.write('trying utf-16le instead\n') - encoding = 'utf-16le' - data = data[2:] - elif data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-8': - sys.stderr.write('trying utf-8 instead\n') - encoding = 'utf-8' - data = data[3:] - elif data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-32be': - sys.stderr.write('trying utf-32be instead\n') - encoding = 'utf-32be' - data = data[4:] - elif data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-32le': - sys.stderr.write('trying utf-32le instead\n') - encoding = 'utf-32le' - data = data[4:] - newdata = unicode(data, encoding) - if _debug: sys.stderr.write('successfully converted %s data to unicode\n' % encoding) - declmatch = re.compile('^<\?xml[^>]*?>') - newdecl = '''''' - if declmatch.search(newdata): - newdata = declmatch.sub(newdecl, newdata) - else: - newdata = newdecl + u'\n' + newdata - return newdata.encode('utf-8') - -def _stripDoctype(data): - '''Strips DOCTYPE from XML document, returns (rss_version, stripped_data) - - rss_version may be 'rss091n' or None - stripped_data is the same XML document, minus the DOCTYPE - ''' - start = re.search(_s2bytes('<\w'), data) - start = start and start.start() or -1 - head,data = data[:start+1], data[start+1:] - - entity_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE) - entity_results=entity_pattern.findall(head) - head = entity_pattern.sub(_s2bytes(''), head) - doctype_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE) - doctype_results = doctype_pattern.findall(head) - doctype = doctype_results and doctype_results[0] or _s2bytes('') - if doctype.lower().count(_s2bytes('netscape')): - version = 'rss091n' - else: - version = None - - # only allow in 'safe' inline entity definitions - replacement=_s2bytes('') - if len(doctype_results)==1 and entity_results: - safe_pattern=re.compile(_s2bytes('\s+(\w+)\s+"(&#\w+;|[^&"]*)"')) - safe_entities=filter(lambda e: safe_pattern.match(e),entity_results) - if safe_entities: - replacement=_s2bytes('\n \n]>') - data = doctype_pattern.sub(replacement, head) + data - - return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)]) - -def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}): - '''Parse a feed from a URL, file, stream, or string. - - request_headers, if given, is a dict from http header name to value to add - to the request; this overrides internally generated values. - ''' - result = FeedParserDict() - result['feed'] = FeedParserDict() - result['entries'] = [] - if _XML_AVAILABLE: - result['bozo'] = 0 - if not isinstance(handlers, list): - handlers = [handlers] - try: - f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers) - data = f.read() - except Exception, e: - result['bozo'] = 1 - result['bozo_exception'] = e - data = None - f = None - - if hasattr(f, 'headers'): - result['headers'] = dict(f.headers) - # overwrite existing headers using response_headers - if 'headers' in result: - result['headers'].update(response_headers) - elif response_headers: - result['headers'] = copy.deepcopy(response_headers) - - # if feed is gzip-compressed, decompress it - if f and data and 'headers' in result: - if gzip and result['headers'].get('content-encoding') == 'gzip': - try: - data = gzip.GzipFile(fileobj=_StringIO(data)).read() - except Exception, e: - # Some feeds claim to be gzipped but they're not, so - # we get garbage. Ideally, we should re-request the - # feed without the 'Accept-encoding: gzip' header, - # but we don't. - result['bozo'] = 1 - result['bozo_exception'] = e - data = '' - elif zlib and result['headers'].get('content-encoding') == 'deflate': - try: - data = zlib.decompress(data, -zlib.MAX_WBITS) - except Exception, e: - result['bozo'] = 1 - result['bozo_exception'] = e - data = '' - - # save HTTP headers - if 'headers' in result: - if 'etag' in result['headers'] or 'ETag' in result['headers']: - etag = result['headers'].get('etag', result['headers'].get('ETag')) - if etag: - result['etag'] = etag - if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']: - modified = result['headers'].get('last-modified', result['headers'].get('Last-Modified')) - if modified: - result['modified'] = _parse_date(modified) - if hasattr(f, 'url'): - result['href'] = f.url - result['status'] = 200 - if hasattr(f, 'status'): - result['status'] = f.status - if hasattr(f, 'close'): - f.close() - - # there are four encodings to keep track of: - # - http_encoding is the encoding declared in the Content-Type HTTP header - # - xml_encoding is the encoding declared in the latestrelease[0][1]: - logger.log(results.title + u" is an upcoming album. Setting its status to 'Wanted'...") - c.execute('UPDATE albums SET Status = "Wanted" WHERE AlbumID="%s"' % u.extractUuid(results.id)) - else: - pass - - for track in results.tracks: - c.execute('INSERT INTO tracks VALUES( ?, ?, ?, ?, ?, ?, ?, ?)', (artistid, results.artist.name, results.title, results.asin, u.extractUuid(results.id), track.title, track.duration, u.extractUuid(track.id))) - time.sleep(1) - time.sleep(1) - - conn.commit() - c.close() \ No newline at end of file diff --git a/logger.py b/logger.py deleted file mode 100644 index 251bfca6..00000000 --- a/logger.py +++ /dev/null @@ -1,177 +0,0 @@ -# Author: Nic Wolfe -# URL: http://code.google.com/p/sickbeard/ -# -# This file is part of Sick Beard. -# -# Sick Beard is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Sick Beard is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Sick Beard. If not, see . - -from __future__ import with_statement - -import os -import threading - -import headphones - -import logging - - -# number of log files to keep -NUM_LOGS = 3 - -# log size in bytes -LOG_SIZE = 10000000 # 10 megs - -ERROR = logging.ERROR -WARNING = logging.WARNING -MESSAGE = logging.INFO -DEBUG = logging.DEBUG - -reverseNames = {u'ERROR': ERROR, - u'WARNING': WARNING, - u'INFO': MESSAGE, - u'DEBUG': DEBUG} - -class SBRotatingLogHandler(object): - - def __init__(self, log_file, num_files, num_bytes): - self.num_files = num_files - self.num_bytes = num_bytes - - self.log_file = log_file - self.cur_handler = None - - self.writes_since_check = 0 - - self.log_lock = threading.Lock() - - def initLogging(self, consoleLogging=True): - - self.log_file = os.path.join(headphones.LOG_DIR, self.log_file) - - self.cur_handler = self._config_handler() - - logging.getLogger('headphones').addHandler(self.cur_handler) - - # define a Handler which writes INFO messages or higher to the sys.stderr - if consoleLogging: - console = logging.StreamHandler() - - console.setLevel(logging.INFO) - - # set a format which is simpler for console use - console.setFormatter(logging.Formatter('%(asctime)s %(levelname)s::%(message)s', '%H:%M:%S')) - - # add the handler to the root logger - logging.getLogger('headphones').addHandler(console) - - logging.getLogger('headphones').setLevel(logging.DEBUG) - - def _config_handler(self): - """ - Configure a file handler to log at file_name and return it. - """ - - file_handler = logging.FileHandler(self.log_file) - file_handler.setLevel(logging.DEBUG) - file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)-8s %(message)s', '%b-%d %H:%M:%S')) - return file_handler - - def _log_file_name(self, i): - """ - Returns a numbered log file name depending on i. If i==0 it just uses logName, if not it appends - it to the extension (blah.log.3 for i == 3) - - i: Log number to ues - """ - return self.log_file + ('.' + str(i) if i else '') - - def _num_logs(self): - """ - Scans the log folder and figures out how many log files there are already on disk - - Returns: The number of the last used file (eg. mylog.log.3 would return 3). If there are no logs it returns -1 - """ - cur_log = 0 - while os.path.isfile(self._log_file_name(cur_log)): - cur_log += 1 - return cur_log - 1 - - def _rotate_logs(self): - - sb_logger = logging.getLogger('headphones') - - # delete the old handler - if self.cur_handler: - self.cur_handler.flush() - self.cur_handler.close() - sb_logger.removeHandler(self.cur_handler) - - # rename or delete all the old log files - for i in range(self._num_logs(), -1, -1): - cur_file_name = self._log_file_name(i) - try: - if i >= NUM_LOGS: - os.remove(cur_file_name) - else: - os.rename(cur_file_name, self._log_file_name(i+1)) - except WindowsError: - pass - - # the new log handler will always be on the un-numbered .log file - new_file_handler = self._config_handler() - - self.cur_handler = new_file_handler - - sb_logger.addHandler(new_file_handler) - - def log(self, toLog, logLevel=MESSAGE): - - with self.log_lock: - - # check the size and see if we need to rotate - if self.writes_since_check >= 10: - if os.path.isfile(self.log_file) and os.path.getsize(self.log_file) >= LOG_SIZE: - self._rotate_logs() - self.writes_since_check = 0 - else: - self.writes_since_check += 1 - - meThread = threading.currentThread().getName() - message = meThread + u" :: " + toLog - - out_line = message.encode('utf-8') - - sb_logger = logging.getLogger('headphones') - - try: - if logLevel == DEBUG: - sb_logger.debug(out_line) - elif logLevel == MESSAGE: - sb_logger.info(out_line) - elif logLevel == WARNING: - sb_logger.warning(out_line) - elif logLevel == ERROR: - sb_logger.error(out_line) - - # add errors to the UI logger - #classes.ErrorViewer.add(classes.UIError(message)) - else: - sb_logger.log(logLevel, out_line) - except ValueError: - pass - -sb_log_instance = SBRotatingLogHandler('headphones.log', NUM_LOGS, LOG_SIZE) - -def log(toLog, logLevel=MESSAGE): - sb_log_instance.log(toLog, logLevel) \ No newline at end of file diff --git a/mb.py b/mb.py deleted file mode 100644 index 73d5edc2..00000000 --- a/mb.py +++ /dev/null @@ -1,84 +0,0 @@ -import time - -import musicbrainz2.webservice as ws -import musicbrainz2.model as m -import musicbrainz2.utils as u - -from musicbrainz2.webservice import WebServiceError - -from helpers import multikeysort - -q = ws.Query() - - -def findArtist(name, limit=1): - - artistlist = [] - - artistResults = q.getArtists(ws.ArtistFilter(name=name, limit=limit)) - - for result in artistResults: - - artistid = u.extractUuid(result.artist.id) - artistlist.append([result.artist.name, artistid]) - - return artistlist - -def getArtist(artistid): - - - rglist = [] - - #Get all official release groups - inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), ratings=False, releaseGroups=True) - artist = q.getArtistById(artistid, inc) - - for rg in artist.getReleaseGroups(): - - rgid = u.extractUuid(rg.id) - rglist.append([rg.title, rgid]) - - return rglist - -def getReleaseGroup(rgid): - - releaselist = [] - - inc = ws.ReleaseGroupIncludes(releases=True) - releaseGroup = q.getReleaseGroupById(rgid, inc) - - # I think for now we have to make separate queries for each release, in order - # to get more detailed release info (ASIN, track count, etc.) - for release in releaseGroup.releases: - - releaseid = u.extractUuid(release.id) - inc = ws.ReleaseIncludes(tracks=True) - - releaseResult = q.getReleaseById(releaseid, inc) - - release_dict = { - 'asin': bool(releaseResult.asin), - 'tracks': len(releaseResult.getTracks()), - 'releaseid': u.extractUuid(releaseResult.id) - } - - releaselist.append(release_dict) - time.sleep(1) - - a = multikeysort(releaselist, ['-asin', '-tracks']) - - releaseid = a[0]['releaseid'] - - return releaseid - -def getRelease(releaseid): - """ - Given a release id, gather all the info and return it as a list - """ - inc = ws.ReleaseIncludes(artist=True, tracks=True, releaseGroup=True) - release = q.getReleaseById(releaseid, inc) - - releasedetail = [] - - releasedetail.append(release.id) - diff --git a/mover.py b/mover.py deleted file mode 100644 index 4bc4858c..00000000 --- a/mover.py +++ /dev/null @@ -1,21 +0,0 @@ -import glob, os, shutil -from configobj import ConfigObj -from headphones import config_file - -config = ConfigObj(config_file) - -General = config['General'] -move_to_itunes = General['move_to_itunes'] -path_to_itunes = General['path_to_itunes'] -rename_mp3s = General['rename_mp3s'] -cleanup = General['cleanup'] -add_album_art = General['add_album_art'] -music_download_dir = General['music_download_dir'] - -def moveFiles(): - for root, dirs, files in os.walk(music_download_dir): - for file in files: - if file[-4:].lower() == '.mp3' and os.path.isfile(file): - print file - shutil.copy2(os.path.join(root, file), - os.path.join(path_to_itunes, file)) diff --git a/musicbrainz2/__init__.py b/musicbrainz2/__init__.py deleted file mode 100644 index f2edb508..00000000 --- a/musicbrainz2/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -"""A collection of classes for MusicBrainz. - -To get started quickly, have a look at L{webservice.Query} and the examples -there. The source distribution also contains example code you might find -interesting. - -This package contains the following modules: - - 1. L{model}: The MusicBrainz domain model, containing classes like - L{Artist }, L{Release }, or - L{Track } - - 2. L{webservice}: An interface to the MusicBrainz XML web service. - - 3. L{wsxml}: A parser for the web service XML format (MMD). - - 4. L{disc}: Functions for creating and submitting DiscIDs. - - 5. L{utils}: Utilities for working with URIs and other commonly needed tools. - -@author: Matthias Friedrich -""" -__revision__ = '$Id: __init__.py 12974 2011-05-01 08:43:54Z luks $' -__version__ = '0.7.3' - -# EOF diff --git a/musicbrainz2/data/__init__.py b/musicbrainz2/data/__init__.py deleted file mode 100644 index 3067fabc..00000000 --- a/musicbrainz2/data/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Support data for the musicbrainz2 package. - -This package is I{not} part of the public API, it has been added to work -around shortcomings in python and may thus be removed at any time. - -Please use the L{musicbrainz2.utils} module instead. -""" -__revision__ = '$Id: __init__.py 7386 2006-04-30 11:12:55Z matt $' - -# EOF diff --git a/musicbrainz2/data/countrynames.py b/musicbrainz2/data/countrynames.py deleted file mode 100644 index 7c4ab023..00000000 --- a/musicbrainz2/data/countrynames.py +++ /dev/null @@ -1,253 +0,0 @@ -# -*- coding: utf-8 -*- - -__revision__ = '$Id: countrynames.py 7386 2006-04-30 11:12:55Z matt $' - -countryNames = { - u'BD': u'Bangladesh', - u'BE': u'Belgium', - u'BF': u'Burkina Faso', - u'BG': u'Bulgaria', - u'BB': u'Barbados', - u'WF': u'Wallis and Futuna Islands', - u'BM': u'Bermuda', - u'BN': u'Brunei Darussalam', - u'BO': u'Bolivia', - u'BH': u'Bahrain', - u'BI': u'Burundi', - u'BJ': u'Benin', - u'BT': u'Bhutan', - u'JM': u'Jamaica', - u'BV': u'Bouvet Island', - u'BW': u'Botswana', - u'WS': u'Samoa', - u'BR': u'Brazil', - u'BS': u'Bahamas', - u'BY': u'Belarus', - u'BZ': u'Belize', - u'RU': u'Russian Federation', - u'RW': u'Rwanda', - u'RE': u'Reunion', - u'TM': u'Turkmenistan', - u'TJ': u'Tajikistan', - u'RO': u'Romania', - u'TK': u'Tokelau', - u'GW': u'Guinea-Bissau', - u'GU': u'Guam', - u'GT': u'Guatemala', - u'GR': u'Greece', - u'GQ': u'Equatorial Guinea', - u'GP': u'Guadeloupe', - u'JP': u'Japan', - u'GY': u'Guyana', - u'GF': u'French Guiana', - u'GE': u'Georgia', - u'GD': u'Grenada', - u'GB': u'United Kingdom', - u'GA': u'Gabon', - u'SV': u'El Salvador', - u'GN': u'Guinea', - u'GM': u'Gambia', - u'GL': u'Greenland', - u'GI': u'Gibraltar', - u'GH': u'Ghana', - u'OM': u'Oman', - u'TN': u'Tunisia', - u'JO': u'Jordan', - u'HT': u'Haiti', - u'HU': u'Hungary', - u'HK': u'Hong Kong', - u'HN': u'Honduras', - u'HM': u'Heard and Mc Donald Islands', - u'VE': u'Venezuela', - u'PR': u'Puerto Rico', - u'PW': u'Palau', - u'PT': u'Portugal', - u'SJ': u'Svalbard and Jan Mayen Islands', - u'PY': u'Paraguay', - u'IQ': u'Iraq', - u'PA': u'Panama', - u'PF': u'French Polynesia', - u'PG': u'Papua New Guinea', - u'PE': u'Peru', - u'PK': u'Pakistan', - u'PH': u'Philippines', - u'PN': u'Pitcairn', - u'PL': u'Poland', - u'PM': u'St. Pierre and Miquelon', - u'ZM': u'Zambia', - u'EH': u'Western Sahara', - u'EE': u'Estonia', - u'EG': u'Egypt', - u'ZA': u'South Africa', - u'EC': u'Ecuador', - u'IT': u'Italy', - u'VN': u'Viet Nam', - u'SB': u'Solomon Islands', - u'ET': u'Ethiopia', - u'SO': u'Somalia', - u'ZW': u'Zimbabwe', - u'SA': u'Saudi Arabia', - u'ES': u'Spain', - u'ER': u'Eritrea', - u'MD': u'Moldova, Republic of', - u'MG': u'Madagascar', - u'MA': u'Morocco', - u'MC': u'Monaco', - u'UZ': u'Uzbekistan', - u'MM': u'Myanmar', - u'ML': u'Mali', - u'MO': u'Macau', - u'MN': u'Mongolia', - u'MH': u'Marshall Islands', - u'MK': u'Macedonia, The Former Yugoslav Republic of', - u'MU': u'Mauritius', - u'MT': u'Malta', - u'MW': u'Malawi', - u'MV': u'Maldives', - u'MQ': u'Martinique', - u'MP': u'Northern Mariana Islands', - u'MS': u'Montserrat', - u'MR': u'Mauritania', - u'UG': u'Uganda', - u'MY': u'Malaysia', - u'MX': u'Mexico', - u'IL': u'Israel', - u'FR': u'France', - u'IO': u'British Indian Ocean Territory', - u'SH': u'St. Helena', - u'FI': u'Finland', - u'FJ': u'Fiji', - u'FK': u'Falkland Islands (Malvinas)', - u'FM': u'Micronesia, Federated States of', - u'FO': u'Faroe Islands', - u'NI': u'Nicaragua', - u'NL': u'Netherlands', - u'NO': u'Norway', - u'NA': u'Namibia', - u'VU': u'Vanuatu', - u'NC': u'New Caledonia', - u'NE': u'Niger', - u'NF': u'Norfolk Island', - u'NG': u'Nigeria', - u'NZ': u'New Zealand', - u'ZR': u'Zaire', - u'NP': u'Nepal', - u'NR': u'Nauru', - u'NU': u'Niue', - u'CK': u'Cook Islands', - u'CI': u'Cote d\'Ivoire', - u'CH': u'Switzerland', - u'CO': u'Colombia', - u'CN': u'China', - u'CM': u'Cameroon', - u'CL': u'Chile', - u'CC': u'Cocos (Keeling) Islands', - u'CA': u'Canada', - u'CG': u'Congo', - u'CF': u'Central African Republic', - u'CZ': u'Czech Republic', - u'CY': u'Cyprus', - u'CX': u'Christmas Island', - u'CR': u'Costa Rica', - u'CV': u'Cape Verde', - u'CU': u'Cuba', - u'SZ': u'Swaziland', - u'SY': u'Syrian Arab Republic', - u'KG': u'Kyrgyzstan', - u'KE': u'Kenya', - u'SR': u'Suriname', - u'KI': u'Kiribati', - u'KH': u'Cambodia', - u'KN': u'Saint Kitts and Nevis', - u'KM': u'Comoros', - u'ST': u'Sao Tome and Principe', - u'SI': u'Slovenia', - u'KW': u'Kuwait', - u'SN': u'Senegal', - u'SM': u'San Marino', - u'SL': u'Sierra Leone', - u'SC': u'Seychelles', - u'KZ': u'Kazakhstan', - u'KY': u'Cayman Islands', - u'SG': u'Singapore', - u'SE': u'Sweden', - u'SD': u'Sudan', - u'DO': u'Dominican Republic', - u'DM': u'Dominica', - u'DJ': u'Djibouti', - u'DK': u'Denmark', - u'VG': u'Virgin Islands (British)', - u'DE': u'Germany', - u'YE': u'Yemen', - u'DZ': u'Algeria', - u'US': u'United States', - u'UY': u'Uruguay', - u'YT': u'Mayotte', - u'UM': u'United States Minor Outlying Islands', - u'LB': u'Lebanon', - u'LC': u'Saint Lucia', - u'LA': u'Lao People\'s Democratic Republic', - u'TV': u'Tuvalu', - u'TW': u'Taiwan', - u'TT': u'Trinidad and Tobago', - u'TR': u'Turkey', - u'LK': u'Sri Lanka', - u'LI': u'Liechtenstein', - u'LV': u'Latvia', - u'TO': u'Tonga', - u'LT': u'Lithuania', - u'LU': u'Luxembourg', - u'LR': u'Liberia', - u'LS': u'Lesotho', - u'TH': u'Thailand', - u'TF': u'French Southern Territories', - u'TG': u'Togo', - u'TD': u'Chad', - u'TC': u'Turks and Caicos Islands', - u'LY': u'Libyan Arab Jamahiriya', - u'VA': u'Vatican City State (Holy See)', - u'VC': u'Saint Vincent and The Grenadines', - u'AE': u'United Arab Emirates', - u'AD': u'Andorra', - u'AG': u'Antigua and Barbuda', - u'AF': u'Afghanistan', - u'AI': u'Anguilla', - u'VI': u'Virgin Islands (U.S.)', - u'IS': u'Iceland', - u'IR': u'Iran (Islamic Republic of)', - u'AM': u'Armenia', - u'AL': u'Albania', - u'AO': u'Angola', - u'AN': u'Netherlands Antilles', - u'AQ': u'Antarctica', - u'AS': u'American Samoa', - u'AR': u'Argentina', - u'AU': u'Australia', - u'AT': u'Austria', - u'AW': u'Aruba', - u'IN': u'India', - u'TZ': u'Tanzania, United Republic of', - u'AZ': u'Azerbaijan', - u'IE': u'Ireland', - u'ID': u'Indonesia', - u'UA': u'Ukraine', - u'QA': u'Qatar', - u'MZ': u'Mozambique', - u'BA': u'Bosnia and Herzegovina', - u'CD': u'Congo, The Democratic Republic of the', - u'CS': u'Serbia and Montenegro', - u'HR': u'Croatia', - u'KP': u'Korea (North), Democratic People\'s Republic of', - u'KR': u'Korea (South), Republic of', - u'SK': u'Slovakia', - u'SU': u'Soviet Union (historical, 1922-1991)', - u'TL': u'East Timor', - u'XC': u'Czechoslovakia (historical, 1918-1992)', - u'XE': u'Europe', - u'XG': u'East Germany (historical, 1949-1990)', - u'XU': u'[Unknown Country]', - u'XW': u'[Worldwide]', - u'YU': u'Yugoslavia (historical, 1918-1992)', -} - -# EOF diff --git a/musicbrainz2/data/languagenames.py b/musicbrainz2/data/languagenames.py deleted file mode 100644 index 7f4252dc..00000000 --- a/musicbrainz2/data/languagenames.py +++ /dev/null @@ -1,400 +0,0 @@ -# -*- coding: utf-8 -*- - -__revision__ = '$Id: languagenames.py 8725 2006-12-17 22:39:07Z luks $' - -languageNames = { - u'ART': u'Artificial (Other)', - u'ROH': u'Raeto-Romance', - u'SCO': u'Scots', - u'SCN': u'Sicilian', - u'ROM': u'Romany', - u'RON': u'Romanian', - u'OSS': u'Ossetian; Ossetic', - u'ALE': u'Aleut', - u'MNI': u'Manipuri', - u'NWC': u'Classical Newari; Old Newari; Classical Nepal Bhasa', - u'OSA': u'Osage', - u'MNC': u'Manchu', - u'MWR': u'Marwari', - u'VEN': u'Venda', - u'MWL': u'Mirandese', - u'FAS': u'Persian', - u'FAT': u'Fanti', - u'FAN': u'Fang', - u'FAO': u'Faroese', - u'DIN': u'Dinka', - u'HYE': u'Armenian', - u'DSB': u'Lower Sorbian', - u'CAR': u'Carib', - u'DIV': u'Divehi', - u'TEL': u'Telugu', - u'TEM': u'Timne', - u'NBL': u'Ndebele, South; South Ndebele', - u'TER': u'Tereno', - u'TET': u'Tetum', - u'SUN': u'Sundanese', - u'KUT': u'Kutenai', - u'SUK': u'Sukuma', - u'KUR': u'Kurdish', - u'KUM': u'Kumyk', - u'SUS': u'Susu', - u'NEW': u'Newari; Nepal Bhasa', - u'KUA': u'Kuanyama; Kwanyama', - u'MEN': u'Mende', - u'LEZ': u'Lezghian', - u'GLA': u'Gaelic; Scottish Gaelic', - u'BOS': u'Bosnian', - u'GLE': u'Irish', - u'EKA': u'Ekajuk', - u'GLG': u'Gallegan', - u'AKA': u'Akan', - u'BOD': u'Tibetan', - u'GLV': u'Manx', - u'JRB': u'Judeo-Arabic', - u'VIE': u'Vietnamese', - u'IPK': u'Inupiaq', - u'UZB': u'Uzbek', - u'BRE': u'Breton', - u'BRA': u'Braj', - u'AYM': u'Aymara', - u'CHA': u'Chamorro', - u'CHB': u'Chibcha', - u'CHE': u'Chechen', - u'CHG': u'Chagatai', - u'CHK': u'Chuukese', - u'CHM': u'Mari', - u'CHN': u'Chinook jargon', - u'CHO': u'Choctaw', - u'CHP': u'Chipewyan', - u'CHR': u'Cherokee', - u'CHU': u'Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic', - u'CHV': u'Chuvash', - u'CHY': u'Cheyenne', - u'MSA': u'Malay', - u'III': u'Sichuan Yi', - u'ACE': u'Achinese', - u'IBO': u'Igbo', - u'IBA': u'Iban', - u'XHO': u'Xhosa', - u'DEU': u'German', - u'CAT': u'Catalan; Valencian', - u'DEL': u'Delaware', - u'DEN': u'Slave (Athapascan)', - u'CAD': u'Caddo', - u'TAT': u'Tatar', - u'RAJ': u'Rajasthani', - u'SPA': u'Spanish; Castilian', - u'TAM': u'Tamil', - u'TAH': u'Tahitian', - u'AFH': u'Afrihili', - u'ENG': u'English', - u'CSB': u'Kashubian', - u'NYN': u'Nyankole', - u'NYO': u'Nyoro', - u'SID': u'Sidamo', - u'NYA': u'Chichewa; Chewa; Nyanja', - u'SIN': u'Sinhala; Sinhalese', - u'AFR': u'Afrikaans', - u'LAM': u'Lamba', - u'SND': u'Sindhi', - u'MAR': u'Marathi', - u'LAH': u'Lahnda', - u'NYM': u'Nyamwezi', - u'SNA': u'Shona', - u'LAD': u'Ladino', - u'SNK': u'Soninke', - u'MAD': u'Madurese', - u'MAG': u'Magahi', - u'MAI': u'Maithili', - u'MAH': u'Marshallese', - u'LAV': u'Latvian', - u'MAL': u'Malayalam', - u'MAN': u'Mandingo', - u'ZND': u'Zande', - u'ZEN': u'Zenaga', - u'KBD': u'Kabardian', - u'ITA': u'Italian', - u'VAI': u'Vai', - u'TSN': u'Tswana', - u'TSO': u'Tsonga', - u'TSI': u'Tsimshian', - u'BYN': u'Blin; Bilin', - u'FIJ': u'Fijian', - u'FIN': u'Finnish', - u'EUS': u'Basque', - u'CEB': u'Cebuano', - u'DAN': u'Danish', - u'NOG': u'Nogai', - u'NOB': u'Norwegian BokmÃ¥l; BokmÃ¥l, Norwegian', - u'DAK': u'Dakota', - u'CES': u'Czech', - u'DAR': u'Dargwa', - u'DAY': u'Dayak', - u'NOR': u'Norwegian', - u'KPE': u'Kpelle', - u'GUJ': u'Gujarati', - u'MDF': u'Moksha', - u'MAS': u'Masai', - u'LAO': u'Lao', - u'MDR': u'Mandar', - u'GON': u'Gondi', - u'SMS': u'Skolt Sami', - u'SMO': u'Samoan', - u'SMN': u'Inari Sami', - u'SMJ': u'Lule Sami', - u'GOT': u'Gothic', - u'SME': u'Northern Sami', - u'BLA': u'Siksika', - u'SMA': u'Southern Sami', - u'GOR': u'Gorontalo', - u'AST': u'Asturian; Bable', - u'ORM': u'Oromo', - u'QUE': u'Quechua', - u'ORI': u'Oriya', - u'CRH': u'Crimean Tatar; Crimean Turkish', - u'ASM': u'Assamese', - u'PUS': u'Pushto', - u'DGR': u'Dogrib', - u'LTZ': u'Luxembourgish; Letzeburgesch', - u'NDO': u'Ndonga', - u'GEZ': u'Geez', - u'ISL': u'Icelandic', - u'LAT': u'Latin', - u'MAK': u'Makasar', - u'ZAP': u'Zapotec', - u'YID': u'Yiddish', - u'KOK': u'Konkani', - u'KOM': u'Komi', - u'KON': u'Kongo', - u'UKR': u'Ukrainian', - u'TON': u'Tonga (Tonga Islands)', - u'KOS': u'Kosraean', - u'KOR': u'Korean', - u'TOG': u'Tonga (Nyasa)', - u'HUN': u'Hungarian', - u'HUP': u'Hupa', - u'CYM': u'Welsh', - u'UDM': u'Udmurt', - u'BEJ': u'Beja', - u'BEN': u'Bengali', - u'BEL': u'Belarusian', - u'BEM': u'Bemba', - u'AAR': u'Afar', - u'NZI': u'Nzima', - u'SAH': u'Yakut', - u'SAN': u'Sanskrit', - u'SAM': u'Samaritan Aramaic', - u'SAG': u'Sango', - u'SAD': u'Sandawe', - u'RAR': u'Rarotongan', - u'RAP': u'Rapanui', - u'SAS': u'Sasak', - u'SAT': u'Santali', - u'MIN': u'Minangkabau', - u'LIM': u'Limburgan; Limburger; Limburgish', - u'LIN': u'Lingala', - u'LIT': u'Lithuanian', - u'EFI': u'Efik', - u'BTK': u'Batak (Indonesia)', - u'KAC': u'Kachin', - u'KAB': u'Kabyle', - u'KAA': u'Kara-Kalpak', - u'KAN': u'Kannada', - u'KAM': u'Kamba', - u'KAL': u'Kalaallisut; Greenlandic', - u'KAS': u'Kashmiri', - u'KAR': u'Karen', - u'KAU': u'Kanuri', - u'KAT': u'Georgian', - u'KAZ': u'Kazakh', - u'TYV': u'Tuvinian', - u'AWA': u'Awadhi', - u'URD': u'Urdu', - u'DOI': u'Dogri', - u'TPI': u'Tok Pisin', - u'MRI': u'Maori', - u'ABK': u'Abkhazian', - u'TKL': u'Tokelau', - u'NLD': u'Dutch; Flemish', - u'OJI': u'Ojibwa', - u'OCI': u'Occitan (post 1500); Provençal', - u'WOL': u'Wolof', - u'JAV': u'Javanese', - u'HRV': u'Croatian', - u'DYU': u'Dyula', - u'SSW': u'Swati', - u'MUL': u'Multiple languages', - u'HIL': u'Hiligaynon', - u'HIM': u'Himachali', - u'HIN': u'Hindi', - u'BAS': u'Basa', - u'GBA': u'Gbaya', - u'WLN': u'Walloon', - u'BAD': u'Banda', - u'NEP': u'Nepali', - u'CRE': u'Cree', - u'BAN': u'Balinese', - u'BAL': u'Baluchi', - u'BAM': u'Bambara', - u'BAK': u'Bashkir', - u'SHN': u'Shan', - u'ARP': u'Arapaho', - u'ARW': u'Arawak', - u'ARA': u'Arabic', - u'ARC': u'Aramaic', - u'ARG': u'Aragonese', - u'SEL': u'Selkup', - u'ARN': u'Araucanian', - u'LUS': u'Lushai', - u'MUS': u'Creek', - u'LUA': u'Luba-Lulua', - u'LUB': u'Luba-Katanga', - u'LUG': u'Ganda', - u'LUI': u'Luiseno', - u'LUN': u'Lunda', - u'LUO': u'Luo (Kenya and Tanzania)', - u'IKU': u'Inuktitut', - u'TUR': u'Turkish', - u'TUK': u'Turkmen', - u'TUM': u'Tumbuka', - u'COP': u'Coptic', - u'COS': u'Corsican', - u'COR': u'Cornish', - u'ILO': u'Iloko', - u'GWI': u'Gwich´in', - u'TLI': u'Tlingit', - u'TLH': u'Klingon; tlhIngan-Hol', - u'POR': u'Portuguese', - u'PON': u'Pohnpeian', - u'POL': u'Polish', - u'TGK': u'Tajik', - u'TGL': u'Tagalog', - u'FRA': u'French', - u'BHO': u'Bhojpuri', - u'SWA': u'Swahili', - u'DUA': u'Duala', - u'SWE': u'Swedish', - u'YAP': u'Yapese', - u'TIV': u'Tiv', - u'YAO': u'Yao', - u'XAL': u'Kalmyk', - u'FRY': u'Frisian', - u'GAY': u'Gayo', - u'OTA': u'Turkish, Ottoman (1500-1928)', - u'HMN': u'Hmong', - u'HMO': u'Hiri Motu', - u'GAA': u'Ga', - u'FUR': u'Friulian', - u'MLG': u'Malagasy', - u'SLV': u'Slovenian', - u'FIL': u'Filipino; Pilipino', - u'MLT': u'Maltese', - u'SLK': u'Slovak', - u'FUL': u'Fulah', - u'JPN': u'Japanese', - u'VOL': u'Volapük', - u'VOT': u'Votic', - u'IND': u'Indonesian', - u'AVE': u'Avestan', - u'JPR': u'Judeo-Persian', - u'AVA': u'Avaric', - u'PAP': u'Papiamento', - u'EWO': u'Ewondo', - u'PAU': u'Palauan', - u'EWE': u'Ewe', - u'PAG': u'Pangasinan', - u'PAM': u'Pampanga', - u'PAN': u'Panjabi; Punjabi', - u'KIR': u'Kirghiz', - u'NIA': u'Nias', - u'KIK': u'Kikuyu; Gikuyu', - u'SYR': u'Syriac', - u'KIN': u'Kinyarwanda', - u'NIU': u'Niuean', - u'EPO': u'Esperanto', - u'JBO': u'Lojban', - u'MIC': u'Mi\'kmaq; Micmac', - u'THA': u'Thai', - u'HAI': u'Haida', - u'ELL': u'Greek, Modern (1453-)', - u'ADY': u'Adyghe; Adygei', - u'ELX': u'Elamite', - u'ADA': u'Adangme', - u'GRB': u'Grebo', - u'HAT': u'Haitian; Haitian Creole', - u'HAU': u'Hausa', - u'HAW': u'Hawaiian', - u'BIN': u'Bini', - u'AMH': u'Amharic', - u'BIK': u'Bikol', - u'BIH': u'Bihari', - u'MOS': u'Mossi', - u'MOH': u'Mohawk', - u'MON': u'Mongolian', - u'MOL': u'Moldavian', - u'BIS': u'Bislama', - u'TVL': u'Tuvalu', - u'IJO': u'Ijo', - u'EST': u'Estonian', - u'KMB': u'Kimbundu', - u'UMB': u'Umbundu', - u'TMH': u'Tamashek', - u'FON': u'Fon', - u'HSB': u'Upper Sorbian', - u'RUN': u'Rundi', - u'RUS': u'Russian', - u'PLI': u'Pali', - u'SRD': u'Sardinian', - u'ACH': u'Acoli', - u'NDE': u'Ndebele, North; North Ndebele', - u'DZO': u'Dzongkha', - u'KRU': u'Kurukh', - u'SRR': u'Serer', - u'IDO': u'Ido', - u'SRP': u'Serbian', - u'KRO': u'Kru', - u'KRC': u'Karachay-Balkar', - u'NDS': u'Low German; Low Saxon; German, Low; Saxon, Low', - u'ZUN': u'Zuni', - u'ZUL': u'Zulu', - u'TWI': u'Twi', - u'NSO': u'Northern Sotho, Pedi; Sepedi', - u'SOM': u'Somali', - u'SON': u'Songhai', - u'SOT': u'Sotho, Southern', - u'MKD': u'Macedonian', - u'HER': u'Herero', - u'LOL': u'Mongo', - u'HEB': u'Hebrew', - u'LOZ': u'Lozi', - u'GIL': u'Gilbertese', - u'WAS': u'Washo', - u'WAR': u'Waray', - u'BUL': u'Bulgarian', - u'WAL': u'Walamo', - u'BUA': u'Buriat', - u'BUG': u'Buginese', - u'AZE': u'Azerbaijani', - u'ZHA': u'Zhuang; Chuang', - u'ZHO': u'Chinese', - u'NNO': u'Norwegian Nynorsk; Nynorsk, Norwegian', - u'UIG': u'Uighur; Uyghur', - u'MYV': u'Erzya', - u'INH': u'Ingush', - u'KHM': u'Khmer', - u'MYA': u'Burmese', - u'KHA': u'Khasi', - u'INA': u'Interlingua (International Auxiliary Language Association)', - u'NAH': u'Nahuatl', - u'TIR': u'Tigrinya', - u'NAP': u'Neapolitan', - u'NAV': u'Navajo; Navaho', - u'NAU': u'Nauru', - u'GRN': u'Guarani', - u'TIG': u'Tigre', - u'YOR': u'Yoruba', - u'ILE': u'Interlingue', - u'SQI': u'Albanian', -} - -# EOF diff --git a/musicbrainz2/data/releasetypenames.py b/musicbrainz2/data/releasetypenames.py deleted file mode 100644 index f16ed19e..00000000 --- a/musicbrainz2/data/releasetypenames.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - -__revision__ = '$Id: releasetypenames.py 8728 2006-12-17 23:42:30Z luks $' - -releaseTypeNames = { - u'http://musicbrainz.org/ns/mmd-1.0#None': u'None', - u'http://musicbrainz.org/ns/mmd-1.0#Album': u'Album', - u'http://musicbrainz.org/ns/mmd-1.0#Single': u'Single', - u'http://musicbrainz.org/ns/mmd-1.0#EP': u'EP', - u'http://musicbrainz.org/ns/mmd-1.0#Compilation': u'Compilation', - u'http://musicbrainz.org/ns/mmd-1.0#Soundtrack': u'Soundtrack', - u'http://musicbrainz.org/ns/mmd-1.0#Spokenword': u'Spokenword', - u'http://musicbrainz.org/ns/mmd-1.0#Interview': u'Interview', - u'http://musicbrainz.org/ns/mmd-1.0#Audiobook': u'Audiobook', - u'http://musicbrainz.org/ns/mmd-1.0#Live': u'Live', - u'http://musicbrainz.org/ns/mmd-1.0#Remix': u'Remix', - u'http://musicbrainz.org/ns/mmd-1.0#Other': u'Other', - u'http://musicbrainz.org/ns/mmd-1.0#Official': u'Official', - u'http://musicbrainz.org/ns/mmd-1.0#Promotion': u'Promotion', - u'http://musicbrainz.org/ns/mmd-1.0#Bootleg': u'Bootleg', - u'http://musicbrainz.org/ns/mmd-1.0#Pseudo-Release': u'Pseudo-Release', -} - -# EOF diff --git a/musicbrainz2/data/scriptnames.py b/musicbrainz2/data/scriptnames.py deleted file mode 100644 index 30a55bd7..00000000 --- a/musicbrainz2/data/scriptnames.py +++ /dev/null @@ -1,59 +0,0 @@ -# -*- coding: utf-8 -*- - -__revision__ = '$Id: scriptnames.py 7386 2006-04-30 11:12:55Z matt $' - -scriptNames = { - u'Yiii': u'Yi', - u'Telu': u'Telugu', - u'Taml': u'Tamil', - u'Guru': u'Gurmukhi', - u'Hebr': u'Hebrew', - u'Geor': u'Georgian (Mkhedruli)', - u'Ugar': u'Ugaritic', - u'Cyrl': u'Cyrillic', - u'Hrkt': u'Kanji & Kana', - u'Armn': u'Armenian', - u'Runr': u'Runic', - u'Khmr': u'Khmer', - u'Latn': u'Latin', - u'Hani': u'Han (Hanzi, Kanji, Hanja)', - u'Ital': u'Old Italic (Etruscan, Oscan, etc.)', - u'Hano': u'Hanunoo (Hanunóo)', - u'Ethi': u'Ethiopic (Ge\'ez)', - u'Gujr': u'Gujarati', - u'Hang': u'Hangul', - u'Arab': u'Arabic', - u'Thaa': u'Thaana', - u'Buhd': u'Buhid', - u'Sinh': u'Sinhala', - u'Orya': u'Oriya', - u'Hans': u'Han (Simplified variant)', - u'Thai': u'Thai', - u'Cprt': u'Cypriot', - u'Linb': u'Linear B', - u'Hant': u'Han (Traditional variant)', - u'Osma': u'Osmanya', - u'Mong': u'Mongolian', - u'Deva': u'Devanagari (Nagari)', - u'Laoo': u'Lao', - u'Tagb': u'Tagbanwa', - u'Hira': u'Hiragana', - u'Bopo': u'Bopomofo', - u'Goth': u'Gothic', - u'Tale': u'Tai Le', - u'Mymr': u'Myanmar (Burmese)', - u'Tglg': u'Tagalog', - u'Grek': u'Greek', - u'Mlym': u'Malayalam', - u'Cher': u'Cherokee', - u'Tibt': u'Tibetan', - u'Kana': u'Katakana', - u'Syrc': u'Syriac', - u'Cans': u'Unified Canadian Aboriginal Syllabics', - u'Beng': u'Bengali', - u'Limb': u'Limbu', - u'Ogam': u'Ogham', - u'Knda': u'Kannada', -} - -# EOF diff --git a/musicbrainz2/disc.py b/musicbrainz2/disc.py deleted file mode 100644 index 8d283115..00000000 --- a/musicbrainz2/disc.py +++ /dev/null @@ -1,221 +0,0 @@ -"""Utilities for working with Audio CDs. - -This module contains utilities for working with Audio CDs. - -The functions in this module need both a working ctypes package (already -included in python-2.5) and an installed libdiscid. If you don't have -libdiscid, it can't be loaded, or your platform isn't supported by either -ctypes or this module, a C{NotImplementedError} is raised when using the -L{readDisc()} function. - -@author: Matthias Friedrich -""" -__revision__ = '$Id: disc.py 11987 2009-08-22 11:57:51Z matt $' - -import sys -import urllib -import urlparse -import ctypes -import ctypes.util -from musicbrainz2.model import Disc - -__all__ = [ 'DiscError', 'readDisc', 'getSubmissionUrl' ] - - -class DiscError(IOError): - """The Audio CD could not be read. - - This may be simply because no disc was in the drive, the device name - was wrong or the disc can't be read. Reading errors can occur in case - of a damaged disc or a copy protection mechanism, for example. - """ - pass - - -def _openLibrary(): - """Tries to open libdiscid. - - @return: a C{ctypes.CDLL} object, representing the opened library - - @raise NotImplementedError: if the library can't be opened - """ - # This only works for ctypes >= 0.9.9.3. Any libdiscid is found, - # no matter how it's called on this platform. - try: - if hasattr(ctypes.cdll, 'find'): - libDiscId = ctypes.cdll.find('discid') - _setPrototypes(libDiscId) - return libDiscId - except OSError, e: - raise NotImplementedError('Error opening library: ' + str(e)) - - # Try to find the library using ctypes.util - libName = ctypes.util.find_library('discid') - if libName != None: - try: - libDiscId = ctypes.cdll.LoadLibrary(libName) - _setPrototypes(libDiscId) - return libDiscId - except OSError, e: - raise NotImplementedError('Error opening library: ' + - str(e)) - - # For compatibility with ctypes < 0.9.9.3 try to figure out the library - # name without the help of ctypes. We use cdll.LoadLibrary() below, - # which isn't available for ctypes == 0.9.9.3. - # - if sys.platform == 'linux2': - libName = 'libdiscid.so.0' - elif sys.platform == 'darwin': - libName = 'libdiscid.0.dylib' - elif sys.platform == 'win32': - libName = 'discid.dll' - else: - # This should at least work for Un*x-style operating systems - libName = 'libdiscid.so.0' - - try: - libDiscId = ctypes.cdll.LoadLibrary(libName) - _setPrototypes(libDiscId) - return libDiscId - except OSError, e: - raise NotImplementedError('Error opening library: ' + str(e)) - - assert False # not reached - - -def _setPrototypes(libDiscId): - ct = ctypes - libDiscId.discid_new.argtypes = ( ) - libDiscId.discid_new.restype = ct.c_void_p - - libDiscId.discid_free.argtypes = (ct.c_void_p, ) - - libDiscId.discid_read.argtypes = (ct.c_void_p, ct.c_char_p) - - libDiscId.discid_get_error_msg.argtypes = (ct.c_void_p, ) - libDiscId.discid_get_error_msg.restype = ct.c_char_p - - libDiscId.discid_get_id.argtypes = (ct.c_void_p, ) - libDiscId.discid_get_id.restype = ct.c_char_p - - libDiscId.discid_get_first_track_num.argtypes = (ct.c_void_p, ) - libDiscId.discid_get_first_track_num.restype = ct.c_int - - libDiscId.discid_get_last_track_num.argtypes = (ct.c_void_p, ) - libDiscId.discid_get_last_track_num.restype = ct.c_int - - libDiscId.discid_get_sectors.argtypes = (ct.c_void_p, ) - libDiscId.discid_get_sectors.restype = ct.c_int - - libDiscId.discid_get_track_offset.argtypes = (ct.c_void_p, ct.c_int) - libDiscId.discid_get_track_offset.restype = ct.c_int - - libDiscId.discid_get_track_length.argtypes = (ct.c_void_p, ct.c_int) - libDiscId.discid_get_track_length.restype = ct.c_int - - -def getSubmissionUrl(disc, host='mm.musicbrainz.org', port=80): - """Returns a URL for adding a disc to the MusicBrainz database. - - A fully initialized L{musicbrainz2.model.Disc} object is needed, as - returned by L{readDisc}. A disc object returned by the web service - doesn't provide the necessary information. - - Note that the created URL is intended for interactive use and points - to the MusicBrainz disc submission wizard by default. This method - just returns a URL, no network connection is needed. The disc drive - isn't used. - - @param disc: a fully initialized L{musicbrainz2.model.Disc} object - @param host: a string containing a host name - @param port: an integer containing a port number - - @return: a string containing the submission URL - - @see: L{readDisc} - """ - assert isinstance(disc, Disc), 'musicbrainz2.model.Disc expected' - discid = disc.getId() - first = disc.getFirstTrackNum() - last = disc.getLastTrackNum() - sectors = disc.getSectors() - assert None not in (discid, first, last, sectors) - - tracks = last - first + 1 - toc = "%d %d %d " % (first, last, sectors) - toc = toc + ' '.join( map(lambda x: str(x[0]), disc.getTracks()) ) - - query = urllib.urlencode({ 'id': discid, 'toc': toc, 'tracks': tracks }) - - if port == 80: - netloc = host - else: - netloc = host + ':' + str(port) - - url = ('http', netloc, '/bare/cdlookup.html', '', query, '') - - return urlparse.urlunparse(url) - - -def readDisc(deviceName=None): - """Reads an Audio CD in the disc drive. - - This reads a CD's table of contents (TOC) and calculates the MusicBrainz - DiscID, which is a 28 character ASCII string. This DiscID can be used - to retrieve a list of matching releases from the web service (see - L{musicbrainz2.webservice.Query}). - - Note that an Audio CD has to be in drive for this to work. The - C{deviceName} argument may be used to set the device. The default - depends on the operating system (on linux, it's C{'/dev/cdrom'}). - No network connection is needed for this function. - - If the device doesn't exist or there's no valid Audio CD in the drive, - a L{DiscError} exception is raised. - - @param deviceName: a string containing the CD drive's device name - - @return: a L{musicbrainz2.model.Disc} object - - @raise DiscError: if there was a problem reading the disc - @raise NotImplementedError: if DiscID generation isn't supported - """ - libDiscId = _openLibrary() - - handle = libDiscId.discid_new() - assert handle != 0, "libdiscid: discid_new() returned NULL" - - # Access the CD drive. This also works if deviceName is None because - # ctypes passes a NULL pointer in this case. - # - res = libDiscId.discid_read(handle, deviceName) - if res == 0: - raise DiscError(libDiscId.discid_get_error_msg(handle)) - - - # Now extract the data from the result. - # - disc = Disc() - - disc.setId( libDiscId.discid_get_id(handle) ) - - firstTrackNum = libDiscId.discid_get_first_track_num(handle) - lastTrackNum = libDiscId.discid_get_last_track_num(handle) - - disc.setSectors(libDiscId.discid_get_sectors(handle)) - - for i in range(firstTrackNum, lastTrackNum+1): - trackOffset = libDiscId.discid_get_track_offset(handle, i) - trackSectors = libDiscId.discid_get_track_length(handle, i) - - disc.addTrack( (trackOffset, trackSectors) ) - - disc.setFirstTrackNum(firstTrackNum) - disc.setLastTrackNum(lastTrackNum) - - libDiscId.discid_free(handle) - - return disc - -# EOF diff --git a/musicbrainz2/model.py b/musicbrainz2/model.py deleted file mode 100644 index fe8f05df..00000000 --- a/musicbrainz2/model.py +++ /dev/null @@ -1,2488 +0,0 @@ -"""The MusicBrainz domain model. - -These classes are part of the MusicBrainz domain model. They may be used -by other modules and don't contain any network or other I/O code. If you -want to request data from the web service, please have a look at -L{musicbrainz2.webservice}. - -The most important classes, usually acting as entry points, are -L{Artist}, L{Release}, and L{Track}. - -@var VARIOUS_ARTISTS_ID: The ID of the special 'Various Artists' artist. - -@var NS_MMD_1: Default namespace prefix for all MusicBrainz metadata. -@var NS_REL_1: Namespace prefix for relations. -@var NS_EXT_1: Namespace prefix for MusicBrainz extensions. - -@see: L{musicbrainz2.webservice} - -@author: Matthias Friedrich -""" -try: - set -except NameError: - from sets import Set as set - -__revision__ = '$Id: model.py 12829 2010-09-15 12:00:11Z luks $' - -__all__ = [ - 'VARIOUS_ARTISTS_ID', 'NS_MMD_1', 'NS_REL_1', 'NS_EXT_1', - 'Entity', 'Artist', 'Release', 'Track', 'User', 'ReleaseGroup', - 'Relation', 'Disc', 'ReleaseEvent', 'Label', 'Tag', 'Rating', - 'AbstractAlias', 'ArtistAlias', 'LabelAlias', -] - - -VARIOUS_ARTISTS_ID = 'http://musicbrainz.org/artist/89ad4ac3-39f7-470e-963a-56509c546377' - -# Namespace URI prefixes -# -NS_MMD_1 = 'http://musicbrainz.org/ns/mmd-1.0#' -NS_REL_1 = 'http://musicbrainz.org/ns/rel-1.0#' -NS_EXT_1 = 'http://musicbrainz.org/ns/ext-1.0#' - - -class Entity(object): - """A first-level MusicBrainz class. - - All entities in MusicBrainz have unique IDs (which are absolute URIs) - as well as any number of L{relations } to other entities - and free text tags. This class is abstract and should not be - instantiated. - - Relations are differentiated by their I{target type}, that means, - where they link to. MusicBrainz currently supports four target types - (artists, releases, tracks, and URLs) each identified using a URI. - To get all relations with a specific target type, you can use - L{getRelations} and pass one of the following constants as the - parameter: - - - L{Relation.TO_ARTIST} - - L{Relation.TO_RELEASE} - - L{Relation.TO_TRACK} - - L{Relation.TO_URL} - - @see: L{Relation} - """ - - def __init__(self, id_=None): - """Constructor. - - This should only used by derived classes. - - @param id_: a string containing an absolute URI - """ - self._id = id_ - self._relations = { } - self._tags = { } - self._rating = Rating() - - def getId(self): - """Returns a MusicBrainz ID. - - @return: a string containing a URI, or None - """ - return self._id - - def setId(self, value): - """Sets a MusicBrainz ID. - - @param value: a string containing an absolute URI - """ - self._id = value - - id = property(getId, setId, doc='The MusicBrainz ID.') - - def getRelations(self, targetType=None, relationType=None, - requiredAttributes=(), direction=None): - """Returns a list of relations. - - If C{targetType} is given, only relations of that target - type are returned. For MusicBrainz, the following target - types are defined: - - L{Relation.TO_ARTIST} - - L{Relation.TO_RELEASE} - - L{Relation.TO_TRACK} - - L{Relation.TO_URL} - - If C{targetType} is L{Relation.TO_ARTIST}, for example, - this method returns all relations between this Entity and - artists. - - You may use the C{relationType} parameter to further restrict - the selection. If it is set, only relations with the given - relation type are returned. The C{requiredAttributes} sequence - lists attributes that have to be part of all returned relations. - - If C{direction} is set, only relations with the given reading - direction are returned. You can use the L{Relation.DIR_FORWARD}, - L{Relation.DIR_BACKWARD}, and L{Relation.DIR_NONE} constants - for this. - - @param targetType: a string containing an absolute URI, or None - @param relationType: a string containing an absolute URI, or None - @param requiredAttributes: a sequence containing absolute URIs - @param direction: one of L{Relation}'s direction constants - @return: a list of L{Relation} objects - - @see: L{Entity} - """ - allRels = [ ] - if targetType is not None: - allRels = self._relations.setdefault(targetType, [ ]) - else: - for (k, relList) in self._relations.items(): - for rel in relList: - allRels.append(rel) - - # Filter for direction. - # - if direction is not None: - allRels = [r for r in allRels if r.getDirection() == direction] - - # Filter for relation type. - # - if relationType is None: - return allRels - else: - allRels = [r for r in allRels if r.getType() == relationType] - - # Now filer for attribute type. - # - tmp = [] - required = set(iter(requiredAttributes)) - - for r in allRels: - attrs = set(iter(r.getAttributes())) - if required.issubset(attrs): - tmp.append(r) - return tmp - - - def getRelationTargets(self, targetType=None, relationType=None, - requiredAttributes=(), direction=None): - """Returns a list of relation targets. - - The arguments work exactly like in L{getRelations}, but - instead of L{Relation} objects, the matching relation - targets are returned. This can be L{Artist}, L{Release}, - or L{Track} objects, depending on the relations. - - As a special case, URL strings are returned if the target - is an URL. - - @param targetType: a string containing an absolute URI, or None - @param relationType: a string containing an absolute URI, or None - @param requiredAttributes: a sequence containing absolute URIs - @param direction: one of L{Relation}'s direction constants - @return: a list of objects, depending on the relation - - @see: L{getRelations} - """ - ret = [ ] - rels = self.getRelations(targetType, relationType, - requiredAttributes, direction) - - for r in rels: - if r.getTargetType() == Relation.TO_URL: - ret.append(r.getTargetId()) - else: - ret.append(r.getTarget()) - - return ret - - - def addRelation(self, relation): - """Adds a relation. - - This method adds C{relation} to the list of relations. The - given relation has to be initialized, at least the target - type has to be set. - - @param relation: the L{Relation} object to add - - @see: L{Entity} - """ - assert relation.getType is not None - assert relation.getTargetType is not None - assert relation.getTargetId is not None - l = self._relations.setdefault(relation.getTargetType(), [ ]) - l.append(relation) - - - def getRelationTargetTypes(self): - """Returns a list of target types available for this entity. - - Use this to find out to which types of targets this entity - has relations. If the entity only has relations to tracks and - artists, for example, then a list containg the strings - L{Relation.TO_TRACK} and L{Relation.TO_ARTIST} is returned. - - @return: a list of strings containing URIs - - @see: L{getRelations} - """ - return self._relations.keys() - - def getTag(self, value): - """Return the tag with the given value (aka the tag's name). - - @return: the L{Tag} with the given name or raises a KeyError - """ - return self._tags[value] - - def getTags(self): - """Return all tags attached to this Entity. - - @return: a list of L{Tag} objects - """ - return self._tags.values() - - tags = property(getTags, doc='The tags for this entity.') - - def addTag(self, tag): - """Add a new tag. - - This merges an existing tag with the same name. - - @param tag: the L{Tag} object to add - - @see: L{getTags} - """ - if self._tags.has_key(tag.value): - existing = self._tags[tag.value] - existing.count += tag.count - else: - self._tags[tag.value] = tag - - def getRating(self): - """Return the rating of this Entity. - 0 = Unrated - 1 - 5 = Rating - - @return: rating - """ - return self._rating - - rating = property(getRating, doc='The rating for this entity.') - - def setRating(self, value): - self._rating = value - - -class Artist(Entity): - """Represents an artist. - - Artists in MusicBrainz can have a type. Currently, this type can - be either Person or Group for which the following URIs are assigned: - - - C{http://musicbrainz.org/ns/mmd-1.0#Person} - - C{http://musicbrainz.org/ns/mmd-1.0#Group} - - Use the L{TYPE_PERSON} and L{TYPE_GROUP} constants for comparison. - """ - TYPE_PERSON = NS_MMD_1 + 'Person' - TYPE_GROUP = NS_MMD_1 + 'Group' - - def __init__(self, id_=None, type_=None, name=None, sortName=None): - """Constructor. - - @param id_: a string containing an absolute URI - @param type_: a string containing an absolute URI - @param name: a string containing the artist's name - @param sortName: a string containing the artist's sort name - """ - Entity.__init__(self, id_) - self._type = type_ - self._name = name - self._sortName = sortName - self._disambiguation = None - self._beginDate = None - self._endDate = None - self._aliases = [ ] - self._releases = [ ] - self._releasesCount = None - self._releasesOffset = None - self._releaseGroups = [ ] - self._releaseGroupsCount = None - self._releaseGroupsOffset = None - - def getType(self): - """Returns the artist's type. - - @return: a string containing an absolute URI, or None - """ - return self._type - - def setType(self, type_): - """Sets the artist's type. - - @param type_: a string containing an absolute URI - """ - self._type = type_ - - type = property(getType, setType, doc="The artist's type.") - - def getName(self): - """Returns the artist's name. - - @return: a string containing the artist's name, or None - """ - return self._name - - def setName(self, name): - """Sets the artist's name. - - @param name: a string containing the artist's name - """ - self._name = name - - name = property(getName, setName, doc="The artist's name.") - - def getSortName(self): - """Returns the artist's sort name. - - The sort name is the artist's name in a special format which - is better suited for lexicographic sorting. The MusicBrainz - style guide specifies this format. - - @see: U{The MusicBrainz Style Guidelines - } - """ - return self._sortName - - def setSortName(self, sortName): - """Sets the artist's sort name. - - @param sortName: a string containing the artist's sort name - - @see: L{getSortName} - """ - self._sortName = sortName - - sortName = property(getSortName, setSortName, - doc="The artist's sort name.") - - def getDisambiguation(self): - """Returns the disambiguation attribute. - - This attribute may be used if there is more than one artist - with the same name. In this case, disambiguation attributes - are added to the artists' names to keep them apart. - - For example, there are at least three bands named 'Vixen'. - Each band has a different disambiguation in the MusicBrainz - database, like 'Hip-hop' or 'all-female rock/glam band'. - - @return: a disambiguation string, or None - - @see: L{getUniqueName} - """ - return self._disambiguation - - def setDisambiguation(self, disambiguation): - """Sets the disambiguation attribute. - - @param disambiguation: a disambiguation string - - @see: L{getDisambiguation}, L{getUniqueName} - """ - self._disambiguation = disambiguation - - disambiguation = property(getDisambiguation, setDisambiguation, - doc="The disambiguation comment.") - - def getUniqueName(self): - """Returns a unique artist name (using disambiguation). - - This method returns the artist name together with the - disambiguation attribute in parenthesis if it exists. - Example: 'Vixen (Hip-hop)'. - - @return: a string containing the unique name - - @see: L{getDisambiguation} - """ - d = self.getDisambiguation() - if d is not None and d.strip() != '': - return '%s (%s)' % (self.getName(), d) - else: - return self.getName() - - def getBeginDate(self): - """Returns the birth/foundation date. - - The definition of the I{begin date} depends on the artist's - type. For persons, this is the day of birth, for groups it - is the day the group was founded. - - The returned date has the format 'YYYY', 'YYYY-MM', or - 'YYYY-MM-DD', depending on how much detail is known. - - @return: a string containing the date, or None - - @see: L{getType} - """ - return self._beginDate - - def setBeginDate(self, dateStr): - """Sets the begin/foundation date. - - @param dateStr: a date string - - @see: L{getBeginDate} - """ - self._beginDate = dateStr - - beginDate = property(getBeginDate, setBeginDate, - doc="The begin/foundation date.") - - def getEndDate(self): - """Returns the death/dissolving date. - - The definition of the I{end date} depends on the artist's - type. For persons, this is the day of death, for groups it - is the day the group was dissolved. - - @return: a string containing a date, or None - - @see: L{getBeginDate} - """ - return self._endDate - - def setEndDate(self, dateStr): - """Sets the death/dissolving date. - - @param dateStr: a string containing a date - - @see: L{setEndDate}, L{getBeginDate} - """ - self._endDate = dateStr - - endDate = property(getEndDate, setEndDate, - doc="The death/dissolving date.") - - def getAliases(self): - """Returns the list of aliases for this artist. - - @return: a list of L{ArtistAlias} objects - """ - return self._aliases - - aliases = property(getAliases, doc='The list of aliases.') - - def addAlias(self, alias): - """Adds an alias for this artist. - - @param alias: an L{ArtistAlias} object - """ - self._aliases.append(alias) - - def getReleases(self): - """Returns a list of releases from this artist. - - This may also include releases where this artist isn't the - I{main} artist but has just contributed one or more tracks - (aka VA-Releases). - - @return: a list of L{Release} objects - """ - return self._releases - - releases = property(getReleases, doc='The list of releases') - - def addRelease(self, release): - """Adds a release to this artist's list of releases. - - @param release: a L{Release} object - """ - self._releases.append(release) - - def getReleasesOffset(self): - """Returns the offset of the release list. - - This is used if the release list is incomplete (ie. the web - service only returned part of the release for this artist). - Note that the offset value is zero-based, which means release - C{0} is the first release. - - @return: an integer containing the offset, or None - - @see: L{getReleases}, L{getReleasesCount} - """ - return self._releasesOffset - - def setReleasesOffset(self, offset): - """Sets the offset of the release list. - - @param offset: an integer containing the offset, or None - - @see: L{getReleasesOffset} - """ - self._releasesOffset = offset - - releasesOffset = property(getReleasesOffset, setReleasesOffset, - doc='The offset of the release list.') - - def getReleasesCount(self): - """Returns the number of existing releases. - - This may or may not match with the number of elements that - L{getReleases} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setReleasesCount}, L{getReleasesOffset} - """ - return self._releasesCount - - def setReleasesCount(self, value): - """Sets the number of existing releases. - - @param value: an integer containing the count, or None - - @see: L{getReleasesCount}, L{setReleasesOffset} - """ - self._releasesCount = value - - releasesCount = property(getReleasesCount, setReleasesCount, - doc='The total number of releases') - - def getReleaseGroups(self): - """Returns a list of release groups from this artist. - - @return: a list of L{ReleaseGroup} objects - """ - return self._releaseGroups - - releaseGroups = property(getReleaseGroups, doc='The list of release groups') - - def addReleaseGroup(self, releaseGroup): - """Adds a release group to this artist's list of release groups. - - @param releaseGroup: a L{ReleaseGroup} object - """ - self._releaseGroups.append(releaseGroup) - - def getReleaseGroupsOffset(self): - """Returns the offset of the release group list. - - This is used if the release group list is incomplete (ie. the - web service only returned part of the result for this artist). - Note that the offset value is zero-based, which means release - group C{0} is the first release group. - - @return: an integer containing the offset, or None - - @see: L{getReleaseGroups}, L{getReleaseGroupsCount} - """ - return self._releaseGroupsOffset - - def setReleaseGroupsOffset(self, offset): - """Sets the offset of the release group list. - - @param offset: an integer containing the offset, or None - - @see: L{getReleaseGroupsOffset} - """ - self._releaseGroupsOffset = offset - - releaseGroupsOffset = property(getReleaseGroupsOffset, setReleaseGroupsOffset, - doc='The offset of the release group list.') - - def getReleaseGroupsCount(self): - """Returns the number of existing release groups. - - This may or may not match with the number of elements that - L{getReleaseGroups} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setReleaseGroupsCount}, L{getReleaseGroupsOffset} - """ - return self._releaseGroupsCount - - def setReleaseGroupsCount(self, value): - """Sets the number of existing release groups. - - @param value: an integer containing the count, or None - - @see: L{getReleaseGroupsCount}, L{setReleaseGroupsOffset} - """ - self._releaseGroupsCount = value - - releasesCount = property(getReleaseGroupsCount, setReleaseGroupsCount, - doc='The total number of release groups') - - -class Rating(object): - """The representation of a MusicBrain rating. - - The rating can have the following values: - - 0 = Unrated - [1..5] = Rating - """ - def __init__(self, value=None, count=None): - """Constructor. - - @param value: a string containing the tag's value - @param count: the number of users who added this tag - """ - self._value = value - self._count = count - - def getValue(self): - """Returns a string with the tag's value. - - @return: an integer containing the rating's value, or None - """ - return self._value - - def setValue(self, value): - """ Set the value of this rating. - - 0 or None = Clear your rating - 1 - 5 = Rating - - @param value: the rating to apply - - @raise ValueError: if value is not a double or not in the - range 0 - 5 or None. - """ - if value == None: - value = 0 - try: - value = float(value) - except ValueError, e: - raise ValueError("Value for rating needs to be an" \ - "float.") - if value < 0.0 or value > 5.0: - raise ValueError("Value needs to be in the range [0..5]") - self._value = value - - value = property(getValue, setValue, doc='The value of the rating.') - - def getCount(self): - """Returns an integer containing the rating's frequency count. - - @return: an integer containing the rating's frequency count, - or None - """ - return self._count - - def setCount(self, count): - """Sets the frequency count of this rating. - - @param count: an integer containing the tag's frequency count - """ - self._count = count - - count = property(getCount, setCount, doc="This tag's frequency count.") - - def __str__(self): - return str(self._value) - - def __unicode__(self): - return unicode(self._value) - - -class Tag(object): - """The representation of a MusicBrainz folksonomy tag. - - The tag's value is the text that's displayed in the tag cloud. - The count attribute keeps track of how many users added the tag - to its owning entity. - """ - def __init__(self, value=None, count=None): - """Constructor. - - @param value: a string containing the tag's value - @param count: the number of users who added this tag - """ - self._value = value - self._count = count - - def getValue(self): - """Returns a string with the tag's value. - - @return: a string containing the tags's value, or None - """ - return self._value - - def setValue(self, value): - """Sets the value of this tag. - - @param value: A string containing the value of the tag - """ - self._value = value - - value = property(getValue, setValue, doc='The value of the text.') - - def getCount(self): - """Returns an integer containing the tag's frequency count. - - @return: an integer containing the tags's frequency count, or None - """ - return self._count - - def setCount(self, count): - """Sets the frequency count of this tag. - - @param count: an integer containing the tag's frequency count - """ - self._count = count - - count = property(getCount, setCount, doc="This tag's frequency count.") - - def __str__(self): - return str(self._value) - - def __unicode__(self): - return unicode(self._value) - - -class Label(Entity): - """Represents a record label. - - A label within MusicBrainz is an L{Entity}. It contains information - about the label like when it was established, its name, label code and - other relationships. All release events may be assigned a label. - """ - TYPE_UNKNOWN = NS_MMD_1 + 'Unknown' - - TYPE_DISTRIBUTOR = NS_MMD_1 + 'Distributor' - TYPE_HOLDING = NS_MMD_1 + 'Holding' - TYPE_PRODUCTION = NS_MMD_1 + 'Production' - - TYPE_ORIGINAL = NS_MMD_1 + 'OriginalProduction' - TYPE_BOOTLEG = NS_MMD_1 + 'BootlegProduction' - TYPE_REISSUE = NS_MMD_1 + 'ReissueProduction' - - def __init__(self, id_=None): - """Constructor. - - @param id_: a string containing an absolute URI - """ - Entity.__init__(self, id_) - self._type = None - self._name = None - self._sortName = None - self._disambiguation = None - self._countryId = None - self._code = None - self._beginDate = None - self._endDate = None - self._aliases = [ ] - - def getType(self): - """Returns the type of this label. - - @return: a string containing an absolute URI - """ - return self._type - - def setType(self, type_): - """Sets the type of this label. - - @param type_: A string containing the absolute URI of the type of label. - """ - self._type = type_ - - type = property(getType, setType, doc='The type of label') - - def getName(self): - """Returns a string with the name of the label. - - @return: a string containing the label's name, or None - """ - return self._name - - def setName(self, name): - """Sets the name of this label. - - @param name: A string containing the name of the label - """ - self._name = name - - name = property(getName, setName, doc='The name of the label.') - - def getSortName(self): - """Returns the label's sort name. - - The sort name is the label's name in a special format which - is better suited for lexicographic sorting. The MusicBrainz - style guide specifies this format. - - @see: U{The MusicBrainz Style Guidelines - } - """ - return self._sortName - - def setSortName(self, sortName): - """Sets the label's sort name. - - @param sortName: a string containing the label's sort name - - @see: L{getSortName} - """ - self._sortName = sortName - - sortName = property(getSortName, setSortName, - doc="The label's sort name.") - - def getDisambiguation(self): - """Returns the disambiguation attribute. - - This attribute may be used if there is more than one label - with the same name. In this case, disambiguation attributes - are added to the labels' names to keep them apart. - - @return: a disambiguation string, or None - - @see: L{getUniqueName} - """ - return self._disambiguation - - def setDisambiguation(self, disambiguation): - """Sets the disambiguation attribute. - - @param disambiguation: a disambiguation string - - @see: L{getDisambiguation}, L{getUniqueName} - """ - self._disambiguation = disambiguation - - disambiguation = property(getDisambiguation, setDisambiguation, - doc="The disambiguation comment.") - - def getUniqueName(self): - """Returns a unique label name (using disambiguation). - - This method returns the label's name together with the - disambiguation attribute in parenthesis if it exists. - - @return: a string containing the unique name - - @see: L{getDisambiguation} - """ - d = self.getDisambiguation() - if d is not None and d.strip() != '': - return '%s (%s)' % (self.getName(), d) - else: - return self.getName() - - def getBeginDate(self): - """Returns the date this label was established. - - @return: A string contained the start date, or None - """ - return self._beginDate - - def setBeginDate(self, date): - """Set the date this label was established. - - @param date: A string in the format of YYYY-MM-DD - """ - self._beginDate = date - - beginDate = property(getBeginDate, setBeginDate, - doc='The date this label was established.') - - def getEndDate(self): - """Returns the date this label closed. - - The returned date has the format 'YYYY', 'YYYY-MM', or - 'YYYY-MM-DD', depending on how much detail is known. - - @return: A string containing the date, or None - """ - return self._endDate - - def setEndDate(self, date): - """Set the date this label closed. - - The date may have the format 'YYYY', 'YYYY-MM', or - 'YYYY-MM-DD', depending on how much detail is known. - - @param date: A string containing the date, or None - """ - self._endDate = date - - endDate = property(getEndDate, setEndDate, - doc='The date this label closed.') - - def getCountry(self): - """Returns the country the label is located. - - @return: a string containing an ISO-3166 country code, or None - - @see: L{musicbrainz2.utils.getCountryName} - """ - return self._countryId - - def setCountry(self, country): - """Sets the country the label is located. - - @param country: a string containing an ISO-3166 country code - """ - self._countryId = country - - country = property(getCountry, setCountry, - doc='The country the label is located.') - - def getCode(self): - """Returns the label code. - - Label codes have been introduced by the IFPI (International - Federation of Phonogram and Videogram Industries) to uniquely - identify record labels. The label code consists of 'LC-' and 4 - figures (currently being extended to 5 figures). - - @return: a string containing the label code, or None - """ - return self._code - - def setCode(self, code): - """Sets the label code. - - @param code: a string containing the label code - """ - self._code = code - - code = property(getCode, setCode, - doc='The label code.') - - def getAliases(self): - """Returns the list of aliases for this label. - - @return: a list of L{LabelAlias} objects - """ - return self._aliases - - aliases = property(getAliases, doc='The list of aliases.') - - def addAlias(self, alias): - """Adds an alias for this label. - - @param alias: a L{LabelAlias} object - """ - self._aliases.append(alias) - - -class Release(Entity): - """Represents a Release. - - A release within MusicBrainz is an L{Entity} which contains L{Track} - objects. Releases may be of more than one type: There can be albums, - singles, compilations, live recordings, official releases, bootlegs - etc. - - @note: The current MusicBrainz server implementation supports only a - limited set of types. - """ - TYPE_NONE = NS_MMD_1 + 'None' - TYPE_NON_ALBUM_TRACKS = NS_MMD_1 + "NonAlbum Track" - - TYPE_ALBUM = NS_MMD_1 + 'Album' - TYPE_SINGLE = NS_MMD_1 + 'Single' - TYPE_EP = NS_MMD_1 + 'EP' - TYPE_COMPILATION = NS_MMD_1 + 'Compilation' - TYPE_SOUNDTRACK = NS_MMD_1 + 'Soundtrack' - TYPE_SPOKENWORD = NS_MMD_1 + 'Spokenword' - TYPE_INTERVIEW = NS_MMD_1 + 'Interview' - TYPE_AUDIOBOOK = NS_MMD_1 + 'Audiobook' - TYPE_LIVE = NS_MMD_1 + 'Live' - TYPE_REMIX = NS_MMD_1 + 'Remix' - TYPE_OTHER = NS_MMD_1 + 'Other' - - TYPE_OFFICIAL = NS_MMD_1 + 'Official' - TYPE_PROMOTION = NS_MMD_1 + 'Promotion' - TYPE_BOOTLEG = NS_MMD_1 + 'Bootleg' - TYPE_PSEUDO_RELEASE = NS_MMD_1 + 'Pseudo-Release' - - def __init__(self, id_=None, title=None): - """Constructor. - - @param id_: a string containing an absolute URI - @param title: a string containing the title - """ - Entity.__init__(self, id_) - self._types = [ ] - self._title = title - self._textLanguage = None - self._textScript = None - self._asin = None - self._artist = None - self._releaseEvents = [ ] - #self._releaseEventsCount = None - self._releaseGroup = None - self._discs = [ ] - #self._discIdsCount = None - self._tracks = [ ] - self._tracksOffset = None - self._tracksCount = None - - - def getTypes(self): - """Returns the types of this release. - - To test for release types, you can use the constants - L{TYPE_ALBUM}, L{TYPE_SINGLE}, etc. - - @return: a list of strings containing absolute URIs - - @see: L{musicbrainz2.utils.getReleaseTypeName} - """ - return self._types - - types = property(getTypes, doc='The list of types for this release.') - - def addType(self, type_): - """Add a type to the list of types. - - @param type_: a string containing absolute URIs - - @see: L{getTypes} - """ - self._types.append(type_) - - def getTitle(self): - """Returns the release's title. - - @return: a string containing the release's title - """ - return self._title - - def setTitle(self, title): - """Sets the release's title. - - @param title: a string containing the release's title, or None - """ - self._title = title - - title = property(getTitle, setTitle, doc='The title of this release.') - - def getTextLanguage(self): - """Returns the language used in release and track titles. - - To represent the language, the ISO-639-2/T standard is used, - which provides three-letter terminological language codes like - 'ENG', 'DEU', 'JPN', 'KOR', 'ZHO' or 'YID'. - - Note that this refers to release and track I{titles}, not - lyrics. - - @return: a string containing the language code, or None - - @see: L{musicbrainz2.utils.getLanguageName} - """ - return self._textLanguage - - def setTextLanguage(self, language): - """Sets the language used in releaes and track titles. - - @param language: a string containing a language code - - @see: L{getTextLanguage} - """ - self._textLanguage = language - - textLanguage = property(getTextLanguage, setTextLanguage, - doc='The language used in release and track titles.') - - def getTextScript(self): - """Returns the script used in release and track titles. - - To represent the script, ISO-15924 script codes are used. - Valid codes are, among others: 'Latn', 'Cyrl', 'Hans', 'Hebr' - - Note that this refers to release and track I{titles}, not - lyrics. - - @return: a string containing the script code, or None - - @see: L{musicbrainz2.utils.getScriptName} - """ - return self._textScript - - def setTextScript(self, script): - """Sets the script used in releaes and track titles. - - @param script: a string containing a script code - - @see: L{getTextScript} - """ - self._textScript = script - - textScript = property(getTextScript, setTextScript, - doc='The script used in release and track titles.') - - def getAsin(self): - """Returns the amazon shop identifier (ASIN). - - The ASIN is a 10-letter code (except for books) assigned - by Amazon, which looks like 'B000002IT2' or 'B00006I4YD'. - - @return: a string containing the ASIN, or None - """ - return self._asin - - def setAsin(self, asin): - """Sets the amazon shop identifier (ASIN). - - @param asin: a string containing the ASIN - - @see: L{getAsin} - """ - self._asin = asin - - asin = property(getAsin, setAsin, doc='The amazon shop identifier.') - - def getArtist(self): - """Returns the main artist of this release. - - @return: an L{Artist} object, or None - """ - return self._artist - - def setArtist(self, artist): - """Sets this release's main artist. - - @param artist: an L{Artist} object - """ - self._artist = artist - - artist = property(getArtist, setArtist, - doc='The main artist of this release.') - - def getReleaseGroup(self): - """Returns the release group to which this release belongs. - - @return: a L{ReleaseGroup} object, or None. - """ - return self._releaseGroup - - def setReleaseGroup(self, releaseGroup): - """Sets the release's release group. - - @param releaseGroup: a L{ReleaseGroup} object, or None. - """ - self._releaseGroup = releaseGroup - - releaseGroup = property(getReleaseGroup, setReleaseGroup, - doc='The release group this release belongs to.') - - def isSingleArtistRelease(self): - """Checks if this is a single artist's release. - - Returns C{True} if the release's main artist (L{getArtist}) is - also the main artist for all of the tracks. This is checked by - comparing the artist IDs. - - Note that the release's artist has to be set (see L{setArtist}) - for this. The track artists may be unset. - - @return: True, if this is a single artist's release - """ - releaseArtist = self.getArtist() - assert releaseArtist is not None, 'Release Artist may not be None!' - for track in self.getTracks(): - if track.getArtist() is None: - continue - if track.getArtist().getId() != releaseArtist.getId(): - return False - - return True - - def getTracks(self): - """Returns the tracks this release contains. - - @return: a list containing L{Track} objects - - @see: L{getTracksOffset}, L{getTracksCount} - """ - return self._tracks - - tracks = property(getTracks, doc='The list of tracks.') - - def addTrack(self, track): - """Adds a track to this release. - - This appends a track at the end of this release's track list. - - @param track: a L{Track} object - """ - self._tracks.append(track) - - def getTracksOffset(self): - """Returns the offset of the track list. - - This is used if the track list is incomplete (ie. the web - service only returned part of the tracks on this release). - Note that the offset value is zero-based, which means track - C{0} is the first track. - - @return: an integer containing the offset, or None - - @see: L{getTracks}, L{getTracksCount} - """ - return self._tracksOffset - - def setTracksOffset(self, offset): - """Sets the offset of the track list. - - @param offset: an integer containing the offset, or None - - @see: L{getTracksOffset}, L{setTracksCount} - """ - self._tracksOffset = offset - - tracksOffset = property(getTracksOffset, setTracksOffset, - doc='The offset of the track list.') - - def getTracksCount(self): - """Returns the number of tracks on this release. - - This may or may not match with the number of elements that - L{getTracks} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setTracksCount}, L{getTracks}, L{getTracksOffset} - """ - return self._tracksCount - - def setTracksCount(self, value): - """Sets the number of tracks on this release. - - @param value: an integer containing the count, or None - - @see: L{getTracksCount}, L{setTracksOffset} - """ - self._tracksCount = value - - tracksCount = property(getTracksCount, setTracksCount, - doc='The total number of releases') - - - def getReleaseEvents(self): - """Returns the list of release events. - - A L{Release} may contain a list of so-called release events, - each represented using a L{ReleaseEvent} object. Release - evens specify where and when this release was, well, released. - - @return: a list of L{ReleaseEvent} objects - - @see: L{getReleaseEventsAsDict} - """ - return self._releaseEvents - - releaseEvents = property(getReleaseEvents, - doc='The list of release events.') - - def addReleaseEvent(self, event): - """Adds a release event to this release. - - @param event: a L{ReleaseEvent} object - - @see: L{getReleaseEvents} - """ - self._releaseEvents.append(event) - - def getReleaseEventsAsDict(self): - """Returns the release events represented as a dict. - - Keys are ISO-3166 country codes like 'DE', 'UK', 'FR' etc. - Values are dates in 'YYYY', 'YYYY-MM' or 'YYYY-MM-DD' format. - - @return: a dict containing (countryCode, date) entries - - @see: L{getReleaseEvents}, L{musicbrainz2.utils.getCountryName} - """ - d = { } - for event in self.getReleaseEvents(): - d[event.getCountry()] = event.getDate() - return d - - def getEarliestReleaseDate(self): - """Returns the earliest release date. - - This favours complete dates. For example, '2006-09' is - returned if there is '2000', too. If there is no release - event associated with this release, None is returned. - - @return: a string containing the date, or None - - @see: L{getReleaseEvents}, L{getReleaseEventsAsDict} - """ - event = self.getEarliestReleaseEvent() - - if event is None: - return None - else: - return event.getDate() - - def getEarliestReleaseEvent(self): - """Returns the earliest release event. - - This works like L{getEarliestReleaseDate}, but instead of - just the date, this returns a L{ReleaseEvent} object. - - @return: a L{ReleaseEvent} object, or None - - @see: L{getReleaseEvents}, L{getEarliestReleaseDate} - """ - dates = [ ] - for event in self.getReleaseEvents(): - date = event.getDate() - if len(date) == 10: # 'YYYY-MM-DD' - dates.append( (date, event) ) - elif len(date) == 7: # 'YYYY-MM' - dates.append( (date + '-99', event) ) - else: - dates.append( (date + '-99-99', event) ) - - dates.sort(lambda x, y: cmp(x[0], y[0])) - - if len(dates) > 0: - return dates[0][1] - else: - return None - - - #def getReleaseEventsCount(self): - # """Returns the number of release events. - # - # This may or may not match with the number of elements that - # getReleaseEvents() returns. If the count is higher than - # the list, it indicates that the list is incomplete. - # """ - # return self._releaseEventsCount - - #def setReleaseEventsCount(self, value): - # self._releaseEventsCount = value - - def getDiscs(self): - """Returns the discs associated with this release. - - Discs are currently containers for MusicBrainz DiscIDs. - Note that under rare circumstances (identical TOCs), a - DiscID could be associated with more than one release. - - @return: a list of L{Disc} objects - """ - return self._discs - - discs = property(getDiscs, doc='The list of associated discs.') - - def addDisc(self, disc): - """Adds a disc to this release. - - @param disc: a L{Disc} object - """ - self._discs.append(disc) - - #def getDiscIdsCount(self): - # return self._discIdsCount - - #def setDiscIdsCount(self, value): - # self._discIdsCount = value - - -class ReleaseGroup(Entity): - """Represents a ReleaseGroup. - - A ReleaseGroup in MusicBrainz is an L{Entity} which groups several different - versions of L{Release} objects (e.g., different editions of the same album). - - @see: L{Release} - @see: L{Entity} - """ - - def __init__(self, id_=None, title=None): - """Constructor. - - @param id_: a string containing an absolute URI - @param title: a string containing the title - """ - Entity.__init__(self, id_) - self._title = title - self._id = id_ - self._type = None - self._releases = [ ] - self._artist = None - self._releasesOffset = 0 - self._releasesCount = 0 - - def getType(self): - """Returns the type of this release group. - - To test for release types, you can use the constants - L{Release.TYPE_ALBUM}, L{Release.TYPE_SINGLE}, etc. - - @return: a string containing an absolute URI, or None. - - @see: L{musicbrainz2.utils.getReleaseTypeName} - """ - return self._type - - def setType(self, type_): - """Sets the type of this release group. - - Use a constant from the L{Release} class, such as - L{Release.TYPE_ALBUM} or L{Release.TYPE_SINGLE} to - set the value. - - @param type_: a string containing an absolute URI, or None. - - @see: L{musicbrainz2.utils.getReleaseTypeName} - """ - self._type = type_ - - type = property(getType, setType, - doc = 'The type of this release group.') - - def getReleases(self): - """Gets the releases in this release group. - - @return: a list of L{Release} objects - @see: L{Release} - """ - return self._releases - - releases = property(getReleases, - doc = 'The list of releases in this release group.') - - def addRelease(self, release): - """Adds a L{Release} to this release group. - - @param release: a L{Release} object - """ - self._releases.append(release) - - def getReleasesOffset(self): - """Returns the offset of the release list. - - This is used if the release list is incomplete (i.e., the web - service only returned a portion of the releases in this release - group). - - @return: an integer containing the offset, or None. - @see: L{getReleases}, L{getReleasesCount} - """ - return self._releasesOffset - - def setReleasesOffset(self, offset): - """Sets the offset of the release list. - - @param offset: an integer containing the offset, or None. - @see: L{getReleases}, L{getReleasesOffset} - """ - self._releasesOffset = offset - - releasesOffset = property(getReleasesOffset, setReleasesOffset, - doc='The offset of the release list.') - - def getReleasesCount(self): - """Returns the number of releases in this release group. - - This may or may not match the number of elements returned by - L{getReleases}. If the count is higher than the length of that - list, then the list is incomplete. - - @return: an integer containing the count, or None - @see: L{getReleases}, L{setReleasesCount}, L{getReleasesOffset} - """ - return self._releasesCount - - def setReleasesCount(self, value): - """Sets the number of releases in this release group. - - @param value: an integer containing the count, or None. - @see: L{getReleases}, L{getReleasesCount}, L{getReleasesOffset} - """ - self._releasesCount = value - - releasesCount = property(getReleasesCount, setReleasesCount, - doc = 'The total number of releases') - - def getTitle(self): - """Returns this release group's title. - - @return: a string containing the release group's title - """ - return self._title - - def setTitle(self, title): - """Sets the release group's title. - - @param title: a string containing the release group's title. - """ - self._title = title - - title = property(getTitle, setTitle, - doc = 'The title of this release group.') - - def getArtist(self): - """Returns the main artist of this release group. - - @return: an L{Artist} object, or None - """ - return self._artist - - def setArtist(self, artist): - """Sets the release group's main artist. - - @param artist: an L{Artist} object - """ - self._artist = artist - - artist = property(getArtist, setArtist, - doc = 'The main artist of this release group') - - -class Track(Entity): - """Represents a track. - - This class represents a track which may appear on one or more releases. - A track may be associated with exactly one artist (the I{main} artist). - - Using L{getReleases}, you can find out on which releases this track - appears. To get the track number, too, use the - L{Release.getTracksOffset} method. - - @note: Currently, the MusicBrainz server doesn't support tracks to - be on more than one release. - - @see: L{Release}, L{Artist} - """ - def __init__(self, id_=None, title=None): - """Constructor. - - @param id_: a string containing an absolute URI - @param title: a string containing the title - """ - Entity.__init__(self, id_) - self._title = title - self._artist = None - self._duration = None - self._puids = [ ] - self._releases = [ ] - self._isrcs = [ ] - - def getTitle(self): - """Returns the track's title. - - The style and format of this attribute is specified by the - style guide. - - @return: a string containing the title, or None - - @see: U{The MusicBrainz Style Guidelines - } - """ - return self._title - - def setTitle(self, title): - """Sets the track's title. - - @param title: a string containing the title - - @see: L{getTitle} - """ - self._title = title - - title = property(getTitle, setTitle, doc="The track's title.") - - def getArtist(self): - """Returns the main artist of this track. - - @return: an L{Artist} object, or None - """ - return self._artist - - def setArtist(self, artist): - """Sets this track's main artist. - - @param artist: an L{Artist} object - """ - self._artist = artist - - artist = property(getArtist, setArtist, doc="The track's main artist.") - - def getDuration(self): - """Returns the duration of this track in milliseconds. - - @return: an int containing the duration in milliseconds, or None - """ - return self._duration - - def setDuration(self, duration): - """Sets the duration of this track in milliseconds. - - @param duration: an int containing the duration in milliseconds - """ - self._duration = duration - - duration = property(getDuration, setDuration, - doc='The duration in milliseconds.') - - def getDurationSplit(self): - """Returns the duration as a (minutes, seconds) tuple. - - If no duration is set, (0, 0) is returned. Seconds are - rounded towards the ceiling if at least 500 milliseconds - are left. - - @return: a (minutes, seconds) tuple, both entries being ints - """ - duration = self.getDuration() - if duration is None: - return (0, 0) - else: - seconds = int( round(duration / 1000.0) ) - return (seconds / 60, seconds % 60) - - def getPuids(self): - """Returns the PUIDs associated with this track. - - Please note that a PUID may be associated with more than one - track. - - @return: a list of strings, each containing one PUID - """ - return self._puids - - puids = property(getPuids, doc='The list of associated PUIDs.') - - def addPuid(self, puid): - """Add a PUID to this track. - - @param puid: a string containing a PUID - """ - self._puids.append(puid) - - def getISRCs(self): - """Returns the ISRCs associated with this track. - - @return: a list of strings, each containing one ISRC - """ - return self._isrcs - - isrcs = property(getISRCs, doc='The list of associated ISRCs') - - def addISRC(self, isrc): - """Add a ISRC to this track. - - @param isrc: a string containing an ISRC - """ - self._isrcs.append(isrc) - - def getReleases(self): - """Returns the list of releases this track appears on. - - @return: a list of L{Release} objects - """ - return self._releases - - releases = property(getReleases, - doc='The releases on which this track appears.') - - def addRelease(self, release): - """Add a release on which this track appears. - - @param release: a L{Release} object - """ - self._releases.append(release) - - -class Relation(object): - """Represents a relation between two Entities. - - There may be an arbitrary number of relations between all first - class objects in MusicBrainz. The Relation itself has multiple - attributes, which may or may not be used for a given relation - type. - - Note that a L{Relation} object only contains the target but not - the source end of the relation. - - @todo: Add some examples. - - @cvar TO_ARTIST: Identifies relations linking to an artist. - @cvar TO_RELEASE: Identifies relations linking to a release. - @cvar TO_TRACK: Identifies relations linking to a track. - @cvar TO_URL: Identifies relations linking to an URL. - - @cvar DIR_NONE: Relation reading direction doesn't matter. - @cvar DIR_FORWARD: Relation reading direction is from source to target. - @cvar DIR_BACKWARD: Relation reading direction is from target to source. - @cvar DIR_BOTH: Relation reading direction doesn't matter (no longer used!). - """ - # Relation target types - # - TO_ARTIST = NS_REL_1 + 'Artist' - TO_RELEASE = NS_REL_1 + 'Release' - TO_TRACK = NS_REL_1 + 'Track' - TO_URL = NS_REL_1 + 'Url' - - # Relation reading directions - # - DIR_BOTH = 'both' - DIR_FORWARD = 'forward' - DIR_BACKWARD = 'backward' - DIR_NONE = 'none' - - def __init__(self, relationType=None, targetType=None, targetId=None, - direction=DIR_NONE, attributes=None, - beginDate=None, endDate=None, target=None): - """Constructor. - - @param relationType: a string containing an absolute URI - @param targetType: a string containing an absolute URI - @param targetId: a string containing an absolute URI - @param direction: one of C{Relation.DIR_FORWARD}, - C{Relation.DIR_BACKWARD}, or C{Relation.DIR_NONE} - @param attributes: a list of strings containing absolute URIs - @param beginDate: a string containing a date - @param endDate: a string containing a date - @param target: an instance of a subclass of L{Entity} - """ - self._relationType = relationType - self._targetType = targetType - self._targetId = targetId - self._direction = direction - self._beginDate = beginDate - self._endDate = endDate - self._target = target - self._attributes = attributes - if self._attributes is None: - self._attributes = [ ] - - def getType(self): - """Returns this relation's type. - - @return: a string containing an absolute URI, or None - """ - return self._relationType - - def setType(self, type_): - """Sets this relation's type. - - @param type_: a string containing an absolute URI - """ - self._relationType = type_ - - type = property(getType, setType, doc="The relation's type.") - - def getTargetId(self): - """Returns the target's ID. - - This is the ID the relation points to. It is an absolute - URI, and in case of an URL relation, it is a URL. - - @return: a string containing an absolute URI - """ - return self._targetId - - def setTargetId(self, targetId): - """Sets the target's ID. - - @param targetId: a string containing an absolute URI - - @see: L{getTargetId} - """ - self._targetId = targetId - - targetId = property(getTargetId, setTargetId, doc="The target's ID.") - - def getTargetType(self): - """Returns the target's type. - - For MusicBrainz data, the following target types are defined: - - artists: L{Relation.TO_ARTIST} - - releases: L{Relation.TO_RELEASE} - - tracks: L{Relation.TO_TRACK} - - urls: L{Relation.TO_URL} - - @return: a string containing an absolute URI - """ - return self._targetType - - def setTargetType(self, targetType): - """Sets the target's type. - - @param targetType: a string containing an absolute URI - - @see: L{getTargetType} - """ - self._targetType = targetType - - targetId = property(getTargetId, setTargetId, - doc="The type of target this relation points to.") - - def getAttributes(self): - """Returns a list of attributes describing this relation. - - The attributes permitted depend on the relation type. - - @return: a list of strings containing absolute URIs - """ - return self._attributes - - attributes = property(getAttributes, - doc='The list of attributes describing this relation.') - - def addAttribute(self, attribute): - """Adds an attribute to the list. - - @param attribute: a string containing an absolute URI - """ - self._attributes.append(attribute) - - def getBeginDate(self): - """Returns the begin date. - - The definition depends on the relation's type. It may for - example be the day of a marriage or the year an artist - joined a band. For other relation types this may be - undefined. - - @return: a string containing a date - """ - return self._beginDate - - def setBeginDate(self, dateStr): - """Sets the begin date. - - @param dateStr: a string containing a date - - @see: L{getBeginDate} - """ - self._beginDate = dateStr - - beginDate = property(getBeginDate, setBeginDate, doc="The begin date.") - - def getEndDate(self): - """Returns the end date. - - As with the begin date, the definition depends on the - relation's type. Depending on the relation type, this may - or may not be defined. - - @return: a string containing a date - - @see: L{getBeginDate} - """ - return self._endDate - - def setEndDate(self, dateStr): - """Sets the end date. - - @param dateStr: a string containing a date - - @see: L{getBeginDate} - """ - self._endDate = dateStr - - endDate = property(getEndDate, setEndDate, doc="The end date.") - - def getDirection(self): - """Returns the reading direction. - - The direction may be one of L{Relation.DIR_FORWARD}, - L{Relation.DIR_BACKWARD}, or L{Relation.DIR_NONE}, - depending on how the relation should be read. For example, - if direction is L{Relation.DIR_FORWARD} for a cover relation, - it is read as "X is a cover of Y". For some relations there is - no reading direction (like marriages) and the web service doesn't - send a direction. In these cases, the direction is set to - L{Relation.DIR_NONE}. - - @return: L{Relation.DIR_FORWARD}, L{Relation.DIR_BACKWARD}, - or L{Relation.DIR_NONE} - """ - return self._direction - - def setDirection(self, direction): - """Sets the reading direction. - - @param direction: L{Relation.DIR_FORWARD}, - L{Relation.DIR_BACKWARD}, or L{Relation.DIR_NONE} - - @see: L{getDirection} - """ - self._direction = direction - - direction = property(getDirection, setDirection, - doc="The reading direction.") - - def getTarget(self): - """Returns this relation's target object. - - Note that URL relations never have a target object. Use the - L{getTargetId} method to get the URL. - - @return: a subclass of L{Entity}, or None - """ - return self._target - - def setTarget(self, target): - """Sets this relation's target object. - - Note that URL relations never have a target object, they - are set using L{setTargetId}. - - @param target: a subclass of L{Entity} - """ - self._target = target - - target = property(getTarget, setTarget, - doc="The relation's target object.") - - -class ReleaseEvent(object): - """A release event, indicating where and when a release took place. - - All country codes used must be valid ISO-3166 country codes (i.e. 'DE', - 'UK' or 'FR'). The dates are strings and must have the format 'YYYY', - 'YYYY-MM' or 'YYYY-MM-DD'. - - The format of the release medium is a URI that can be compared to the - constants on this class (L{FORMAT_CD}, L{FORMAT_DVD} and others). - """ - FORMAT_CD = NS_MMD_1 + 'CD' - FORMAT_DVD = NS_MMD_1 + 'DVD' - FORMAT_SACD = NS_MMD_1 + 'SACD' - FORMAT_DUALDISC = NS_MMD_1 + 'DualDisc' - FORMAT_LASERDISC = NS_MMD_1 + 'LaserDisc' - FORMAT_MINIDISC = NS_MMD_1 + 'MiniDisc' - FORMAT_VINYL = NS_MMD_1 + 'Vinyl' - FORMAT_CASSETTE = NS_MMD_1 + 'Cassette' - FORMAT_CARTRIDGE = NS_MMD_1 + 'Cartridge' - FORMAT_REEL_TO_REEL = NS_MMD_1 + 'ReelToReel' - FORMAT_DAT = NS_MMD_1 + 'DAT' - FORMAT_DIGITAL = NS_MMD_1 + 'Digital' - FORMAT_WAX_CYLINDER = NS_MMD_1 + 'WaxCylinder' - FORMAT_PIANO_ROLL = NS_MMD_1 + 'PianoRoll' - FORMAT_OTHER = NS_MMD_1 + 'Other' - - def __init__(self, country=None, dateStr=None): - """Constructor. - - @param country: a string containing an ISO-3166 country code - @param dateStr: a string containing a date string - """ - self._countryId = country - self._dateStr = dateStr - self._catalogNumber = None - self._barcode = None - self._label = None - self._format = None - - def getCountry(self): - """Returns the country a release took place. - - @note: Due to a server limitation, the web service does not - return country IDs for release collection queries. This only - affects the L{musicbrainz2.webservice.Query.getReleases} query. - - @return: a string containing an ISO-3166 country code, or None - - @see: L{musicbrainz2.utils.getCountryName} - """ - return self._countryId - - def setCountry(self, country): - """Sets the country a release took place. - - @param country: a string containing an ISO-3166 country code - """ - self._countryId = country - - country = property(getCountry, setCountry, - doc='The country a release took place.') - - def getCatalogNumber(self): - """Returns the catalog number of this release event. - - @return: A string containing the catalog number, or None - """ - return self._catalogNumber - - def setCatalogNumber(self, catalogNumber): - """Sets the catalog number of this release event. - - @param catalogNumber: A string containing the catalog number - """ - self._catalogNumber = catalogNumber - - catalogNumber = property(getCatalogNumber, setCatalogNumber, - doc='The catalog number of the release event') - - def getBarcode(self): - """Returns the barcode of this release event. - - @return: A string containing the barcode, or None - """ - return self._barcode - - def setBarcode(self, barcode): - """Sets the barcode of this release event. - - @param barcode: A string containing the barcode - """ - self._barcode = barcode - - barcode = property(getBarcode, setBarcode, - doc='The barcode of the release event') - - def getLabel(self): - """Returns a L{Label} object for the label associated with this release. - - @return: a L{Label} object, or None - """ - return self._label - - def setLabel(self, label): - """Sets the label of this release event. - - @param label: A L{Label} object - """ - self._label = label - - label = property(getLabel, setLabel, doc='The label of the release') - - def getDate(self): - """Returns the date a release took place. - - @return: a string containing a date - """ - return self._dateStr - - def setDate(self, dateStr): - """Sets the date a release took place. - - @param dateStr: a string containing a date - """ - self._dateStr = dateStr - - date = property(getDate, setDate, doc='The date a release took place.') - - def getFormat(self): - """Returns the format of the release medium. - - @return: a string containing a URI, or None - """ - return self._format - - def setFormat(self, format): - """Sets the format of the release medium. - - @param format: a string containing a URI - """ - self._format = format - - format = property(getFormat, setFormat, - doc='The format of the release medium.') - - -class CDStub(object): - """Represents a CD Stub""" - - def __init__(self, disc): - """Constructor. - - @param disc: a L{Disc} object to create this CD Stub from - """ - assert isinstance(disc, Disc), 'musicbrainz2.model.Disc expected' - self._disc = disc - self._tracks = [ ] - self._title = "" - self._artist = "" - self._barcode = "" - self._comment = "" - - def setTitle(self, title): - """Sets the title of this release. - - @param title: a string containing the title - """ - self._title = title - - def getTitle(self): - """Returns the title of this release. - - @return: a string containing the title - """ - return self._title - - title = property(getTitle, setTitle, - doc='The title of the release') - - def setArtist(self, artist): - """Sets the artist of this release. - - @param artist: a string containing the artist - """ - self._artist = artist - - def getArtist(self): - """Returns the artist of this release. - - @return: a string containing the artist - """ - return self._artist - - artist = property(getArtist, setArtist, - doc='The artist of the release') - - def setComment(self, comment): - """Sets the comment for this release. - - @param comment: a string containing the comment - """ - self._comment = comment - - def getComment(self): - """Returns the comment for this release. - - @return: a string containing the comment - """ - return self._comment - - comment = property(getComment, setComment, - doc='Comment for the release (optional)') - - def setBarcode(self, barcode): - """Sets the barcode of this release. - - @param barcode: a string containing the barcode - """ - self._barcode = barcode - - def getBarcode(self): - """Returns the barcode of this release. - - @return: a string containing the barcode - """ - return self._barcode - - barcode = property(getBarcode, setBarcode, - doc='Barcode for the release (optional)') - - def addTrack(self, title, artist=''): - """Add a track to this release - - @param title: a string containing the title of the track - @param artist: a string containing the artist of the track, - if different to the album artist - """ - self._tracks.append((title, artist)) - - def getTracks(self): - """Return all the tracks on the release. - - @return: a list of tuples containing (title, artist) pairs - for each track - """ - return self._tracks - - tracks = property(getTracks, doc='The tracks of the release.') - -class Disc(object): - """Represents an Audio CD. - - This class represents an Audio CD. A disc can have an ID (the - MusicBrainz DiscID), which is calculated from the CD's table of - contents (TOC). There may also be data from the TOC like the length - of the disc in sectors, as well as position and length of the tracks. - - Note that different TOCs, maybe due to different pressings, lead to - different DiscIDs. Conversely, if two different discs have the same - TOC, they also have the same DiscID (which is unlikely but not - impossible). DiscIDs are always 28 characters long and look like this: - C{'J68I_CDcUFdCRCIbHSEbTBCbooA-'}. Sometimes they are also referred - to as CDIndex IDs. - - The L{MusicBrainz web service } only returns - the DiscID and the number of sectors. The DiscID calculation function - L{musicbrainz2.disc.readDisc}, however, can retrieve the other - attributes of L{Disc} from an Audio CD in the disc drive. - """ - def __init__(self, id_=None): - """Constructor. - - @param id_: a string containing a 28-character DiscID - """ - self._id = id_ - self._sectors = None - self._firstTrackNum = None - self._lastTrackNum = None - self._tracks = [ ] - - def getId(self): - """Returns the MusicBrainz DiscID. - - @return: a string containing a 28-character DiscID - """ - return self._id - - def setId(self, id_): - """Sets the MusicBrainz DiscId. - - @param id_: a string containing a 28-character DiscID - """ - self._id = id_ - - id = property(getId, setId, doc="The MusicBrainz DiscID.") - - def getSectors(self): - """Returns the length of the disc in sectors. - - @return: the length in sectors as an integer, or None - """ - return self._sectors - - def setSectors(self, sectors): - """Sets the length of the disc in sectors. - - @param sectors: the length in sectors as an integer - """ - self._sectors = sectors - - sectors = property(getSectors, setSectors, - doc="The length of the disc in sectors.") - - def getFirstTrackNum(self): - """Returns the number of the first track on this disc. - - @return: an int containing the track number, or None - """ - return self._firstTrackNum - - def setFirstTrackNum(self, trackNum): - """Sets the number of the first track on this disc. - - @param trackNum: an int containing the track number, or None - """ - self._firstTrackNum = trackNum - - firstTrackNum = property(getFirstTrackNum, setFirstTrackNum, - doc="The number of the first track on this disc.") - - def getLastTrackNum(self): - """Returns the number of the last track on this disc. - - @return: an int containing the track number, or None - """ - return self._lastTrackNum - - def setLastTrackNum(self, trackNum): - """Sets the number of the last track on this disc. - - @param trackNum: an int containing the track number, or None - """ - self._lastTrackNum = trackNum - - lastTrackNum = property(getLastTrackNum, setLastTrackNum, - doc="The number of the last track on this disc.") - - def getTracks(self): - """Returns the sector offset and length of this disc. - - This method returns a list of tuples containing the track - offset and length in sectors for all tracks on this disc. - The track offset is measured from the beginning of the disc, - the length is relative to the track's offset. Note that the - leadout track is I{not} included. - - @return: a list of (offset, length) tuples (values are ints) - """ - return self._tracks - - tracks = property(getTracks, - doc='Sector offset and length of all tracks.') - - def addTrack(self, track): - """Adds a track to the list. - - This method adds an (offset, length) tuple to the list of - tracks. The leadout track must I{not} be added. The total - length of the disc can be set using L{setSectors}. - - @param track: an (offset, length) tuple (values are ints) - - @see: L{getTracks} - """ - self._tracks.append(track) - - -class AbstractAlias(object): - """An abstract super class for all alias classes.""" - def __init__(self, value=None, type_=None, script=None): - """Constructor. - - @param value: a string containing the alias - @param type_: a string containing an absolute URI - @param script: a string containing an ISO-15924 script code - """ - self._value = value - self._type = type_ - self._script = script - - def getValue(self): - """Returns the alias. - - @return: a string containing the alias - """ - return self._value - - def setValue(self, value): - """Sets the alias. - - @param value: a string containing the alias - """ - self._value = value - - value = property(getValue, setValue, doc='The alias value.') - - def getType(self): - """Returns the alias type. - - @return: a string containing an absolute URI, or None - """ - return self._type - - def setType(self, type_): - """Sets the alias type. - - @param type_: a string containing an absolute URI, or None - """ - self._type = type_ - - type = property(getType, setType, doc='The alias type.') - - def getScript(self): - """Returns the alias script. - - @return: a string containing an ISO-15924 script code - """ - return self._script - - def setScript(self, script): - """Sets the alias script. - - @param script: a string containing an ISO-15924 script code - """ - self._script = script - - script = property(getScript, setScript, doc='The alias script.') - - -class ArtistAlias(AbstractAlias): - """Represents an artist alias. - - An alias (the I{alias value}) is a different representation of an - artist's name. This may be a common misspelling or a transliteration - (the I{alias type}). - - The I{alias script} is interesting mostly for transliterations and - indicates which script is used for the alias value. To represent the - script, ISO-15924 script codes like 'Latn', 'Cyrl', or 'Hebr' are used. - """ - pass - - -class LabelAlias(AbstractAlias): - """Represents a label alias. - - An alias (the I{alias value}) is a different representation of a - label's name. This may be a common misspelling or a transliteration - (the I{alias type}). - - The I{alias script} is interesting mostly for transliterations and - indicates which script is used for the alias value. To represent the - script, ISO-15924 script codes like 'Latn', 'Cyrl', or 'Hebr' are used. - """ - pass - - -class User(object): - """Represents a MusicBrainz user.""" - - def __init__(self): - """Constructor.""" - self._name = None - self._types = [ ] - self._showNag = None - - def getName(self): - """Returns the user name. - - @return: a string containing the user name - """ - return self._name - - def setName(self, name): - """Sets the user name. - - @param name: a string containing the user name - """ - self._name = name - - name = property(getName, setName, doc='The MusicBrainz user name.') - - def getTypes(self): - """Returns the types of this user. - - Most users' type list is empty. Currently, the following types - are defined: - - - 'http://musicbrainz.org/ns/ext-1.0#AutoEditor' - - 'http://musicbrainz.org/ns/ext-1.0#RelationshipEditor' - - 'http://musicbrainz.org/ns/ext-1.0#Bot' - - 'http://musicbrainz.org/ns/ext-1.0#NotNaggable' - - @return: a list of strings containing absolute URIs - """ - return self._types - - types = property(getTypes, doc="The user's types.") - - def addType(self, type_): - """Add a type to the list of types. - - @param type_: a string containing absolute URIs - - @see: L{getTypes} - """ - self._types.append(type_) - - def getShowNag(self): - """Returns true if a nag screen should be displayed to the user. - - @return: C{True}, C{False}, or None - """ - return self._showNag - - def setShowNag(self, value): - """Sets the value of the nag screen flag. - - If set to C{True}, - - @param value: C{True} or C{False} - - @see: L{getShowNag} - """ - self._showNag = value - - showNag = property(getShowNag, setShowNag, - doc='The value of the nag screen flag.') - -# EOF diff --git a/musicbrainz2/utils.py b/musicbrainz2/utils.py deleted file mode 100644 index 0eff7be8..00000000 --- a/musicbrainz2/utils.py +++ /dev/null @@ -1,204 +0,0 @@ -"""Various utilities to simplify common tasks. - -This module contains helper functions to make common tasks easier. - -@author: Matthias Friedrich -""" -__revision__ = '$Id: utils.py 11853 2009-07-21 09:26:50Z luks $' - -import re -import urlparse -import os.path - -__all__ = [ - 'extractUuid', 'extractFragment', 'extractEntityType', - 'getReleaseTypeName', 'getCountryName', 'getLanguageName', - 'getScriptName', -] - - -# A pattern to split the path part of an absolute MB URI. -PATH_PATTERN = '^/(artist|release|track|label|release-group)/([^/]*)$' - - -def extractUuid(uriStr, resType=None): - """Extract the UUID part from a MusicBrainz identifier. - - This function takes a MusicBrainz ID (an absolute URI) as the input - and returns the UUID part of the URI, thus turning it into a relative - URI. If C{uriStr} is None or a relative URI, then it is returned - unchanged. - - The C{resType} parameter can be used for error checking. Set it to - 'artist', 'release', or 'track' to make sure C{uriStr} is a - syntactically valid MusicBrainz identifier of the given resource - type. If it isn't, a C{ValueError} exception is raised. - This error checking only works if C{uriStr} is an absolute URI, of - course. - - Example: - - >>> from musicbrainz2.utils import extractUuid - >>> extractUuid('http://musicbrainz.org/artist/c0b2500e-0cef-4130-869d-732b23ed9df5', 'artist') - 'c0b2500e-0cef-4130-869d-732b23ed9df5' - >>> - - @param uriStr: a string containing a MusicBrainz ID (an URI), or None - @param resType: a string containing a resource type - - @return: a string containing a relative URI, or None - - @raise ValueError: the given URI is no valid MusicBrainz ID - """ - if uriStr is None: - return None - - (scheme, netloc, path) = urlparse.urlparse(uriStr)[:3] - - if scheme == '': - return uriStr # no URI, probably already the UUID - - if scheme != 'http' or netloc != 'musicbrainz.org': - raise ValueError('%s is no MB ID.' % uriStr) - - m = re.match(PATH_PATTERN, path) - - if m: - if resType is None: - return m.group(2) - else: - if m.group(1) == resType: - return m.group(2) - else: - raise ValueError('expected "%s" Id' % resType) - else: - raise ValueError('%s is no valid MB ID.' % uriStr) - - -def extractFragment(uriStr, uriPrefix=None): - """Extract the fragment part from a URI. - - If C{uriStr} is None or no absolute URI, then it is returned unchanged. - - The C{uriPrefix} parameter can be used for error checking. If C{uriStr} - is an absolute URI, then the function checks if it starts with - C{uriPrefix}. If it doesn't, a C{ValueError} exception is raised. - - @param uriStr: a string containing an absolute URI - @param uriPrefix: a string containing an URI prefix - - @return: a string containing the fragment, or None - - @raise ValueError: the given URI doesn't start with C{uriPrefix} - """ - if uriStr is None: - return None - - (scheme, netloc, path, params, query, frag) = urlparse.urlparse(uriStr) - if scheme == '': - return uriStr # this is no URI - - if uriPrefix is None or uriStr.startswith(uriPrefix): - return frag - else: - raise ValueError("prefix doesn't match URI %s" % uriStr) - - -def extractEntityType(uriStr): - """Returns the entity type an entity URI is referring to. - - @param uriStr: a string containing an absolute entity URI - - @return: a string containing 'artist', 'release', 'track', or 'label' - - @raise ValueError: if the given URI is no valid MusicBrainz ID - """ - if uriStr is None: - raise ValueError('None is no valid entity URI') - - (scheme, netloc, path) = urlparse.urlparse(uriStr)[:3] - - if scheme == '': - raise ValueError('%s is no absolute MB ID.' % uriStr) - - if scheme != 'http' or netloc != 'musicbrainz.org': - raise ValueError('%s is no MB ID.' % uriStr) - - m = re.match(PATH_PATTERN, path) - - if m: - return m.group(1) - else: - raise ValueError('%s is no valid MB ID.' % uriStr) - - -def getReleaseTypeName(releaseType): - """Returns the name of a release type URI. - - @param releaseType: a string containing a release type URI - - @return: a string containing a printable name for the release type - - @see: L{musicbrainz2.model.Release} - """ - from musicbrainz2.data.releasetypenames import releaseTypeNames - return releaseTypeNames.get(releaseType) - - -def getCountryName(id_): - """Returns a country's name based on an ISO-3166 country code. - - The country table this function is based on has been modified for - MusicBrainz purposes by using the extension mechanism defined in - ISO-3166. All IDs are still valid ISO-3166 country codes, but some - IDs have been added to include historic countries and some of the - country names have been modified to make them better suited for - display purposes. - - If the country ID is not found, None is returned. This may happen - for example, when new countries are added to the MusicBrainz web - service which aren't known to this library yet. - - @param id_: a two-letter upper case string containing an ISO-3166 code - - @return: a string containing the country's name, or None - - @see: L{musicbrainz2.model} - """ - from musicbrainz2.data.countrynames import countryNames - return countryNames.get(id_) - - -def getLanguageName(id_): - """Returns a language name based on an ISO-639-2/T code. - - This function uses a subset of the ISO-639-2/T code table to map - language IDs (terminologic, not bibliographic ones!) to names. - - @param id_: a three-letter upper case string containing an ISO-639-2/T code - - @return: a string containing the language's name, or None - - @see: L{musicbrainz2.model} - """ - from musicbrainz2.data.languagenames import languageNames - return languageNames.get(id_) - - -def getScriptName(id_): - """Returns a script name based on an ISO-15924 code. - - This function uses a subset of the ISO-15924 code table to map - script IDs to names. - - @param id_: a four-letter string containing an ISO-15924 script code - - @return: a string containing the script's name, or None - - @see: L{musicbrainz2.model} - """ - from musicbrainz2.data.scriptnames import scriptNames - return scriptNames.get(id_) - - -# EOF diff --git a/musicbrainz2/webservice.py b/musicbrainz2/webservice.py deleted file mode 100644 index a869530d..00000000 --- a/musicbrainz2/webservice.py +++ /dev/null @@ -1,1519 +0,0 @@ -"""Classes for interacting with the MusicBrainz XML web service. - -The L{WebService} class talks to a server implementing the MusicBrainz XML -web service. It mainly handles URL generation and network I/O. Use this -if maximum control is needed. - -The L{Query} class provides a convenient interface to the most commonly -used features of the web service. By default it uses L{WebService} to -retrieve data and the L{XML parser } to parse the -responses. The results are object trees using the L{MusicBrainz domain -model }. - -@author: Matthias Friedrich -""" -__revision__ = '$Id: webservice.py 12973 2011-04-29 11:49:31Z luks $' - -import re -import urllib -import urllib2 -import urlparse -import logging -import os.path -from StringIO import StringIO -import musicbrainz2 -from musicbrainz2.model import Artist, Release, Track -from musicbrainz2.wsxml import MbXmlParser, ParseError -import musicbrainz2.utils as mbutils - -__all__ = [ - 'WebServiceError', 'AuthenticationError', 'ConnectionError', - 'RequestError', 'ResourceNotFoundError', 'ResponseError', - 'IIncludes', 'ArtistIncludes', 'ReleaseIncludes', 'TrackIncludes', - 'LabelIncludes', 'ReleaseGroupIncludes', - 'IFilter', 'ArtistFilter', 'ReleaseFilter', 'TrackFilter', - 'UserFilter', 'LabelFilter', 'ReleaseGroupFilter', - 'IWebService', 'WebService', 'Query', -] - - -class IWebService(object): - """An interface all concrete web service classes have to implement. - - All web service classes have to implement this and follow the - method specifications. - """ - - def get(self, entity, id_, include, filter, version): - """Query the web service. - - Using this method, you can either get a resource by id (using - the C{id_} parameter, or perform a query on all resources of - a type. - - The C{filter} and the C{id_} parameter exclude each other. If - you are using a filter, you may not set C{id_} and vice versa. - - Returns a file-like object containing the result or raises a - L{WebServiceError} or one of its subclasses in case of an - error. Which one is used depends on the implementing class. - - @param entity: a string containing the entity's name - @param id_: a string containing a UUID, or the empty string - @param include: a tuple containing values for the 'inc' parameter - @param filter: parameters, depending on the entity - @param version: a string containing the web service version to use - - @return: a file-like object - - @raise WebServiceError: in case of errors - """ - raise NotImplementedError() - - - def post(self, entity, id_, data, version): - """Submit data to the web service. - - @param entity: a string containing the entity's name - @param id_: a string containing a UUID, or the empty string - @param data: A string containing the data to post - @param version: a string containing the web service version to use - - @return: a file-like object - - @raise WebServiceError: in case of errors - """ - raise NotImplementedError() - - -class WebServiceError(Exception): - """A web service error has occurred. - - This is the base class for several other web service related - exceptions. - """ - - def __init__(self, msg='Webservice Error', reason=None): - """Constructor. - - Set C{msg} to an error message which explains why this - exception was raised. The C{reason} parameter should be the - original exception which caused this L{WebService} exception - to be raised. If given, it has to be an instance of - C{Exception} or one of its child classes. - - @param msg: a string containing an error message - @param reason: another exception instance, or None - """ - Exception.__init__(self) - self.msg = msg - self.reason = reason - - def __str__(self): - """Makes this class printable. - - @return: a string containing an error message - """ - return self.msg - - -class ConnectionError(WebServiceError): - """Getting a server connection failed. - - This exception is mostly used if the client couldn't connect to - the server because of an invalid host name or port. It doesn't - make sense if the web service in question doesn't use the network. - """ - pass - - -class RequestError(WebServiceError): - """An invalid request was made. - - This exception is raised if the client made an invalid request. - That could be syntactically invalid identifiers or unknown or - invalid parameter values. - """ - pass - - -class ResourceNotFoundError(WebServiceError): - """No resource with the given ID exists. - - This is usually a wrapper around IOError (which is superclass of - HTTPError). - """ - pass - - -class AuthenticationError(WebServiceError): - """Authentication failed. - - This is thrown if user name, password or realm were invalid while - trying to access a protected resource. - """ - pass - - -class ResponseError(WebServiceError): - """The returned resource was invalid. - - This may be due to a malformed XML document or if the requested - data wasn't part of the response. It can only occur in case of - bugs in the web service itself. - """ - pass - -class DigestAuthHandler(urllib2.HTTPDigestAuthHandler): - """Patched DigestAuthHandler to correctly handle Digest Auth according to RFC 2617. - - This will allow multiple qop values in the WWW-Authenticate header (e.g. "auth,auth-int"). - The only supported qop value is still auth, though. - See http://bugs.python.org/issue9714 - - @author Kuno Woudt - """ - def get_authorization(self, req, chal): - qop = chal.get('qop') - if qop and ',' in qop and 'auth' in qop.split(','): - chal['qop'] = 'auth' - - return urllib2.HTTPDigestAuthHandler.get_authorization(self, req, chal) - -class WebService(IWebService): - """An interface to the MusicBrainz XML web service via HTTP. - - By default, this class uses the MusicBrainz server but may be - configured for accessing other servers as well using the - L{constructor <__init__>}. This implements L{IWebService}, so - additional documentation on method parameters can be found there. - """ - - def __init__(self, host='musicbrainz.org', port=80, pathPrefix='/ws', - username=None, password=None, realm='musicbrainz.org', - opener=None): - """Constructor. - - This can be used without parameters. In this case, the - MusicBrainz server will be used. - - @param host: a string containing a host name - @param port: an integer containing a port number - @param pathPrefix: a string prepended to all URLs - @param username: a string containing a MusicBrainz user name - @param password: a string containing the user's password - @param realm: a string containing the realm used for authentication - @param opener: an C{urllib2.OpenerDirector} object used for queries - """ - self._host = host - self._port = port - self._username = username - self._password = password - self._realm = realm - self._pathPrefix = pathPrefix - self._log = logging.getLogger(str(self.__class__)) - - if opener is None: - self._opener = urllib2.build_opener() - else: - self._opener = opener - - passwordMgr = self._RedirectPasswordMgr() - authHandler = DigestAuthHandler(passwordMgr) - authHandler.add_password(self._realm, (), # no host set - self._username, self._password) - self._opener.add_handler(authHandler) - - - def _makeUrl(self, entity, id_, include=( ), filter={ }, - version='1', type_='xml'): - params = dict(filter) - if type_ is not None: - params['type'] = type_ - if len(include) > 0: - params['inc'] = ' '.join(include) - - netloc = self._host - if self._port != 80: - netloc += ':' + str(self._port) - path = '/'.join((self._pathPrefix, version, entity, id_)) - - query = urllib.urlencode(params) - - url = urlparse.urlunparse(('http', netloc, path, '', query,'')) - - return url - - - def _openUrl(self, url, data=None): - userAgent = 'python-musicbrainz/' + musicbrainz2.__version__ - req = urllib2.Request(url) - req.add_header('User-Agent', userAgent) - return self._opener.open(req, data) - - - def get(self, entity, id_, include=( ), filter={ }, version='1'): - """Query the web service via HTTP-GET. - - Returns a file-like object containing the result or raises a - L{WebServiceError}. Conditions leading to errors may be - invalid entities, IDs, C{include} or C{filter} parameters - and unsupported version numbers. - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid IDs or parameters - @raise AuthenticationError: invalid user name and/or password - @raise ResourceNotFoundError: resource doesn't exist - - @see: L{IWebService.get} - """ - url = self._makeUrl(entity, id_, include, filter, version) - - self._log.debug('GET ' + url) - - try: - return self._openUrl(url) - except urllib2.HTTPError, e: - self._log.debug("GET failed: " + str(e)) - if e.code == 400: # in python 2.4: httplib.BAD_REQUEST - raise RequestError(str(e), e) - elif e.code == 401: # httplib.UNAUTHORIZED - raise AuthenticationError(str(e), e) - elif e.code == 404: # httplib.NOT_FOUND - raise ResourceNotFoundError(str(e), e) - else: - raise WebServiceError(str(e), e) - except urllib2.URLError, e: - self._log.debug("GET failed: " + str(e)) - raise ConnectionError(str(e), e) - - - def post(self, entity, id_, data, version='1'): - """Send data to the web service via HTTP-POST. - - Note that this may require authentication. You can set - user name, password and realm in the L{constructor <__init__>}. - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid IDs or parameters - @raise AuthenticationError: invalid user name and/or password - @raise ResourceNotFoundError: resource doesn't exist - - @see: L{IWebService.post} - """ - url = self._makeUrl(entity, id_, version=version, type_=None) - - self._log.debug('POST ' + url) - self._log.debug('POST-BODY: ' + data) - - try: - return self._openUrl(url, data) - except urllib2.HTTPError, e: - self._log.debug("POST failed: " + str(e)) - if e.code == 400: # in python 2.4: httplib.BAD_REQUEST - raise RequestError(str(e), e) - elif e.code == 401: # httplib.UNAUTHORIZED - raise AuthenticationError(str(e), e) - elif e.code == 404: # httplib.NOT_FOUND - raise ResourceNotFoundError(str(e), e) - else: - raise WebServiceError(str(e), e) - except urllib2.URLError, e: - self._log.debug("POST failed: " + str(e)) - raise ConnectionError(str(e), e) - - - # Special password manager which also works with redirects by simply - # ignoring the URI. As a consequence, only *ONE* (username, password) - # tuple per realm can be used for all URIs. - # - class _RedirectPasswordMgr(urllib2.HTTPPasswordMgr): - def __init__(self): - self._realms = { } - - def find_user_password(self, realm, uri): - # ignoring the uri parameter intentionally - try: - return self._realms[realm] - except KeyError: - return (None, None) - - def add_password(self, realm, uri, username, password): - # ignoring the uri parameter intentionally - self._realms[realm] = (username, password) - - -class IFilter(object): - """A filter for collections. - - This is the interface all filters have to implement. Filter classes - are initialized with a set of criteria and are then applied to - collections of items. The criteria are usually strings or integer - values, depending on the filter. - - Note that all strings passed to filters should be unicode strings - (python type C{unicode}). Standard strings are converted to unicode - internally, but have a limitation: Only 7 Bit pure ASCII characters - may be used, otherwise a C{UnicodeDecodeError} is raised. - """ - def createParameters(self): - """Create a list of query parameters. - - This method creates a list of (C{parameter}, C{value}) tuples, - based on the contents of the implementing subclass. - C{parameter} is a string containing a parameter name - and C{value} an arbitrary string. No escaping of those strings - is required. - - @return: a sequence of (key, value) pairs - """ - raise NotImplementedError() - - -class ArtistFilter(IFilter): - """A filter for the artist collection.""" - - def __init__(self, name=None, limit=None, offset=None, query=None): - """Constructor. - - The C{query} parameter may contain a query in U{Lucene syntax - }. - Note that the C{name} and C{query} may not be used together. - - @param name: a unicode string containing the artist's name - @param limit: the maximum number of artists to return - @param offset: start results at this zero-based offset - @param query: a string containing a query in Lucene syntax - """ - self._params = [ - ('name', name), - ('limit', limit), - ('offset', offset), - ('query', query), - ] - - if not _paramsValid(self._params): - raise ValueError('invalid combination of parameters') - - def createParameters(self): - return _createParameters(self._params) - - -class LabelFilter(IFilter): - """A filter for the label collection.""" - - def __init__(self, name=None, limit=None, offset=None, query=None): - """Constructor. - - The C{query} parameter may contain a query in U{Lucene syntax - }. - Note that the C{name} and C{query} may not be used together. - - @param name: a unicode string containing the label's name - @param limit: the maximum number of labels to return - @param offset: start results at this zero-based offset - @param query: a string containing a query in Lucene syntax - """ - self._params = [ - ('name', name), - ('limit', limit), - ('offset', offset), - ('query', query), - ] - - if not _paramsValid(self._params): - raise ValueError('invalid combination of parameters') - - def createParameters(self): - return _createParameters(self._params) - -class ReleaseGroupFilter(IFilter): - """A filter for the release group collection.""" - - def __init__(self, title=None, releaseTypes=None, artistName=None, - artistId=None, limit=None, offset=None, query=None): - """Constructor. - - If C{artistId} is set, only releases matching those IDs are - returned. The C{releaseTypes} parameter allows you to limit - the types of the release groups returned. You can set it to - C{(Release.TYPE_ALBUM, Release.TYPE_OFFICIAL)}, for example, - to only get officially released albums. Note that those values - are connected using the I{AND} operator. MusicBrainz' support - is currently very limited, so C{Release.TYPE_LIVE} and - C{Release.TYPE_COMPILATION} exclude each other (see U{the - documentation on release attributes - } for more - information and all valid values). - - If both the C{artistName} and the C{artistId} parameter are - given, the server will ignore C{artistName}. - - The C{query} parameter may contain a query in U{Lucene syntax - }. - Note that C{query} may not be used together with the other - parameters except for C{limit} and C{offset}. - - @param title: a unicode string containing the release group's title - @param releaseTypes: a sequence of release type URIs - @param artistName: a unicode string containing the artist's name - @param artistId: a unicode string containing the artist's ID - @param limit: the maximum number of release groups to return - @param offset: start results at this zero-based offset - @param query: a string containing a query in Lucene syntax - - @see: the constants in L{musicbrainz2.model.Release} - """ - if releaseTypes is None or len(releaseTypes) == 0: - releaseTypesStr = None - else: - releaseTypesStr = ' '.join(map(mbutils.extractFragment, releaseTypes)) - - self._params = [ - ('title', title), - ('releasetypes', releaseTypesStr), - ('artist', artistName), - ('artistid', mbutils.extractUuid(artistId)), - ('limit', limit), - ('offset', offset), - ('query', query), - ] - - if not _paramsValid(self._params): - raise ValueError('invalid combination of parameters') - - def createParameters(self): - return _createParameters(self._params) - - -class ReleaseFilter(IFilter): - """A filter for the release collection.""" - - def __init__(self, title=None, discId=None, releaseTypes=None, - artistName=None, artistId=None, limit=None, - offset=None, query=None, trackCount=None): - """Constructor. - - If C{discId} or C{artistId} are set, only releases matching - those IDs are returned. The C{releaseTypes} parameter allows - to limit the types of the releases returned. You can set it to - C{(Release.TYPE_ALBUM, Release.TYPE_OFFICIAL)}, for example, - to only get officially released albums. Note that those values - are connected using the I{AND} operator. MusicBrainz' support - is currently very limited, so C{Release.TYPE_LIVE} and - C{Release.TYPE_COMPILATION} exclude each other (see U{the - documentation on release attributes - } for more - information and all valid values). - - If both the C{artistName} and the C{artistId} parameter are - given, the server will ignore C{artistName}. - - The C{query} parameter may contain a query in U{Lucene syntax - }. - Note that C{query} may not be used together with the other - parameters except for C{limit} and C{offset}. - - @param title: a unicode string containing the release's title - @param discId: a unicode string containing the DiscID - @param releaseTypes: a sequence of release type URIs - @param artistName: a unicode string containing the artist's name - @param artistId: a unicode string containing the artist's ID - @param limit: the maximum number of releases to return - @param offset: start results at this zero-based offset - @param query: a string containing a query in Lucene syntax - @param trackCount: the number of tracks in the release - - @see: the constants in L{musicbrainz2.model.Release} - """ - if releaseTypes is None or len(releaseTypes) == 0: - releaseTypesStr = None - else: - tmp = [ mbutils.extractFragment(x) for x in releaseTypes ] - releaseTypesStr = ' '.join(tmp) - - self._params = [ - ('title', title), - ('discid', discId), - ('releasetypes', releaseTypesStr), - ('artist', artistName), - ('artistid', mbutils.extractUuid(artistId)), - ('limit', limit), - ('offset', offset), - ('query', query), - ('count', trackCount), - ] - - if not _paramsValid(self._params): - raise ValueError('invalid combination of parameters') - - def createParameters(self): - return _createParameters(self._params) - - -class TrackFilter(IFilter): - """A filter for the track collection.""" - - def __init__(self, title=None, artistName=None, artistId=None, - releaseTitle=None, releaseId=None, - duration=None, puid=None, limit=None, offset=None, - query=None): - """Constructor. - - If C{artistId}, C{releaseId} or C{puid} are set, only tracks - matching those IDs are returned. - - The server will ignore C{artistName} and C{releaseTitle} if - C{artistId} or ${releaseId} are set respectively. - - The C{query} parameter may contain a query in U{Lucene syntax - }. - Note that C{query} may not be used together with the other - parameters except for C{limit} and C{offset}. - - @param title: a unicode string containing the track's title - @param artistName: a unicode string containing the artist's name - @param artistId: a string containing the artist's ID - @param releaseTitle: a unicode string containing the release's title - @param releaseId: a string containing the release's title - @param duration: the track's length in milliseconds - @param puid: a string containing a PUID - @param limit: the maximum number of releases to return - @param offset: start results at this zero-based offset - @param query: a string containing a query in Lucene syntax - """ - self._params = [ - ('title', title), - ('artist', artistName), - ('artistid', mbutils.extractUuid(artistId)), - ('release', releaseTitle), - ('releaseid', mbutils.extractUuid(releaseId)), - ('duration', duration), - ('puid', puid), - ('limit', limit), - ('offset', offset), - ('query', query), - ] - - if not _paramsValid(self._params): - raise ValueError('invalid combination of parameters') - - def createParameters(self): - return _createParameters(self._params) - - -class UserFilter(IFilter): - """A filter for the user collection.""" - - def __init__(self, name=None): - """Constructor. - - @param name: a unicode string containing a MusicBrainz user name - """ - self._name = name - - def createParameters(self): - if self._name is not None: - return [ ('name', self._name.encode('utf-8')) ] - else: - return [ ] - - -class IIncludes(object): - """An interface implemented by include tag generators.""" - def createIncludeTags(self): - raise NotImplementedError() - - -class ArtistIncludes(IIncludes): - """A specification on how much data to return with an artist. - - Example: - - >>> from musicbrainz2.model import Release - >>> from musicbrainz2.webservice import ArtistIncludes - >>> inc = ArtistIncludes(artistRelations=True, releaseRelations=True, - ... releases=(Release.TYPE_ALBUM, Release.TYPE_OFFICIAL)) - >>> - - The MusicBrainz server only supports some combinations of release - types for the C{releases} and C{vaReleases} include tags. At the - moment, not more than two release types should be selected, while - one of them has to be C{Release.TYPE_OFFICIAL}, - C{Release.TYPE_PROMOTION} or C{Release.TYPE_BOOTLEG}. - - @note: Only one of C{releases} and C{vaReleases} may be given. - """ - def __init__(self, aliases=False, releases=(), vaReleases=(), - artistRelations=False, releaseRelations=False, - trackRelations=False, urlRelations=False, tags=False, - ratings=False, releaseGroups=False): - - assert not isinstance(releases, basestring) - assert not isinstance(vaReleases, basestring) - assert len(releases) == 0 or len(vaReleases) == 0 - - self._includes = { - 'aliases': aliases, - 'artist-rels': artistRelations, - 'release-groups': releaseGroups, - 'release-rels': releaseRelations, - 'track-rels': trackRelations, - 'url-rels': urlRelations, - 'tags': tags, - 'ratings': ratings, - } - - for elem in releases: - self._includes['sa-' + mbutils.extractFragment(elem)] = True - - for elem in vaReleases: - self._includes['va-' + mbutils.extractFragment(elem)] = True - - def createIncludeTags(self): - return _createIncludes(self._includes) - - -class ReleaseIncludes(IIncludes): - """A specification on how much data to return with a release.""" - def __init__(self, artist=False, counts=False, releaseEvents=False, - discs=False, tracks=False, - artistRelations=False, releaseRelations=False, - trackRelations=False, urlRelations=False, - labels=False, tags=False, ratings=False, isrcs=False, - releaseGroup=False): - self._includes = { - 'artist': artist, - 'counts': counts, - 'labels': labels, - 'release-groups': releaseGroup, - 'release-events': releaseEvents, - 'discs': discs, - 'tracks': tracks, - 'artist-rels': artistRelations, - 'release-rels': releaseRelations, - 'track-rels': trackRelations, - 'url-rels': urlRelations, - 'tags': tags, - 'ratings': ratings, - 'isrcs': isrcs, - } - - # Requesting labels without releaseEvents makes no sense, - # so we pull in releaseEvents, if necessary. - if labels and not releaseEvents: - self._includes['release-events'] = True - # Ditto for isrcs with no tracks - if isrcs and not tracks: - self._includes['tracks'] = True - - def createIncludeTags(self): - return _createIncludes(self._includes) - - -class ReleaseGroupIncludes(IIncludes): - """A specification on how much data to return with a release group.""" - - def __init__(self, artist=False, releases=False, tags=False): - """Constructor. - - @param artist: Whether to include the release group's main artist info. - @param releases: Whether to include the release group's releases. - """ - self._includes = { - 'artist': artist, - 'releases': releases, - } - - def createIncludeTags(self): - return _createIncludes(self._includes) - - -class TrackIncludes(IIncludes): - """A specification on how much data to return with a track.""" - def __init__(self, artist=False, releases=False, puids=False, - artistRelations=False, releaseRelations=False, - trackRelations=False, urlRelations=False, tags=False, - ratings=False, isrcs=False): - self._includes = { - 'artist': artist, - 'releases': releases, - 'puids': puids, - 'artist-rels': artistRelations, - 'release-rels': releaseRelations, - 'track-rels': trackRelations, - 'url-rels': urlRelations, - 'tags': tags, - 'ratings': ratings, - 'isrcs': isrcs, - } - - def createIncludeTags(self): - return _createIncludes(self._includes) - - -class LabelIncludes(IIncludes): - """A specification on how much data to return with a label.""" - def __init__(self, aliases=False, tags=False, ratings=False): - self._includes = { - 'aliases': aliases, - 'tags': tags, - 'ratings': ratings, - } - - def createIncludeTags(self): - return _createIncludes(self._includes) - - -class Query(object): - """A simple interface to the MusicBrainz web service. - - This is a facade which provides a simple interface to the MusicBrainz - web service. It hides all the details like fetching data from a server, - parsing the XML and creating an object tree. Using this class, you can - request data by ID or search the I{collection} of all resources - (artists, releases, or tracks) to retrieve those matching given - criteria. This document contains examples to get you started. - - - Working with Identifiers - ======================== - - MusicBrainz uses absolute URIs as identifiers. For example, the artist - 'Tori Amos' is identified using the following URI:: - http://musicbrainz.org/artist/c0b2500e-0cef-4130-869d-732b23ed9df5 - - In some situations it is obvious from the context what type of - resource an ID refers to. In these cases, abbreviated identifiers may - be used, which are just the I{UUID} part of the URI. Thus the ID above - may also be written like this:: - c0b2500e-0cef-4130-869d-732b23ed9df5 - - All methods in this class which require IDs accept both the absolute - URI and the abbreviated form (aka the relative URI). - - - Creating a Query Object - ======================= - - In most cases, creating a L{Query} object is as simple as this: - - >>> import musicbrainz2.webservice as ws - >>> q = ws.Query() - >>> - - The instantiated object uses the standard L{WebService} class to - access the MusicBrainz web service. If you want to use a different - server or you have to pass user name and password because one of - your queries requires authentication, you have to create the - L{WebService} object yourself and configure it appropriately. - This example uses the MusicBrainz test server and also sets - authentication data: - - >>> import musicbrainz2.webservice as ws - >>> service = ws.WebService(host='test.musicbrainz.org', - ... username='whatever', password='secret') - >>> q = ws.Query(service) - >>> - - - Querying for Individual Resources - ================================= - - If the MusicBrainz ID of a resource is known, then the L{getArtistById}, - L{getReleaseById}, or L{getTrackById} method can be used to retrieve - it. Example: - - >>> import musicbrainz2.webservice as ws - >>> q = ws.Query() - >>> artist = q.getArtistById('c0b2500e-0cef-4130-869d-732b23ed9df5') - >>> artist.name - u'Tori Amos' - >>> artist.sortName - u'Amos, Tori' - >>> print artist.type - http://musicbrainz.org/ns/mmd-1.0#Person - >>> - - This returned just the basic artist data, however. To get more detail - about a resource, the C{include} parameters may be used which expect - an L{ArtistIncludes}, L{ReleaseIncludes}, or L{TrackIncludes} object, - depending on the resource type. - - To get data about a release which also includes the main artist - and all tracks, for example, the following query can be used: - - >>> import musicbrainz2.webservice as ws - >>> q = ws.Query() - >>> releaseId = '33dbcf02-25b9-4a35-bdb7-729455f33ad7' - >>> include = ws.ReleaseIncludes(artist=True, tracks=True) - >>> release = q.getReleaseById(releaseId, include) - >>> release.title - u'Tales of a Librarian' - >>> release.artist.name - u'Tori Amos' - >>> release.tracks[0].title - u'Precious Things' - >>> - - Note that the query gets more expensive for the server the more - data you request, so please be nice. - - - Searching in Collections - ======================== - - For each resource type (artist, release, and track), there is one - collection which contains all resources of a type. You can search - these collections using the L{getArtists}, L{getReleases}, and - L{getTracks} methods. The collections are huge, so you have to - use filters (L{ArtistFilter}, L{ReleaseFilter}, or L{TrackFilter}) - to retrieve only resources matching given criteria. - - For example, If you want to search the release collection for - releases with a specified DiscID, you would use L{getReleases} - and a L{ReleaseFilter} object: - - >>> import musicbrainz2.webservice as ws - >>> q = ws.Query() - >>> filter = ws.ReleaseFilter(discId='8jJklE258v6GofIqDIrE.c5ejBE-') - >>> results = q.getReleases(filter=filter) - >>> results[0].score - 100 - >>> results[0].release.title - u'Under the Pink' - >>> - - The query returns a list of results (L{wsxml.ReleaseResult} objects - in this case), which are ordered by score, with a higher score - indicating a better match. Note that those results don't contain - all the data about a resource. If you need more detail, you can then - use the L{getArtistById}, L{getReleaseById}, or L{getTrackById} - methods to request the resource. - - All filters support the C{limit} argument to limit the number of - results returned. This defaults to 25, but the server won't send - more than 100 results to save bandwidth and processing power. Using - C{limit} and the C{offset} parameter, you can page through the - results. - - - Error Handling - ============== - - All methods in this class raise a L{WebServiceError} exception in case - of errors. Depending on the method, a subclass of L{WebServiceError} may - be raised which allows an application to handle errors more precisely. - The following example handles connection errors (invalid host name - etc.) separately and all other web service errors in a combined - catch clause: - - >>> try: - ... artist = q.getArtistById('c0b2500e-0cef-4130-869d-732b23ed9df5') - ... except ws.ConnectionError, e: - ... pass # implement your error handling here - ... except ws.WebServiceError, e: - ... pass # catches all other web service errors - ... - >>> - """ - - def __init__(self, ws=None, wsFactory=WebService, clientId=None): - """Constructor. - - The C{ws} parameter has to be a subclass of L{IWebService}. - If it isn't given, the C{wsFactory} parameter is used to - create an L{IWebService} subclass. - - If the constructor is called without arguments, an instance - of L{WebService} is used, preconfigured to use the MusicBrainz - server. This should be enough for most users. - - If you want to use queries which require authentication you - have to pass a L{WebService} instance where user name and - password have been set. - - The C{clientId} parameter is required for data submission. - The format is C{'application-version'}, where C{application} - is your application's name and C{version} is a version - number which may not include a '-' character. - - @param ws: a subclass instance of L{IWebService}, or None - @param wsFactory: a callable object which creates an object - @param clientId: a unicode string containing the application's ID - """ - if ws is None: - self._ws = wsFactory() - else: - self._ws = ws - - self._clientId = clientId - self._log = logging.getLogger(str(self.__class__)) - - - def getArtistById(self, id_, include=None): - """Returns an artist. - - If no artist with that ID can be found, C{include} contains - invalid tags or there's a server problem, an exception is - raised. - - @param id_: a string containing the artist's ID - @param include: an L{ArtistIncludes} object, or None - - @return: an L{Artist } object, or None - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResourceNotFoundError: artist doesn't exist - @raise ResponseError: server returned invalid data - """ - uuid = mbutils.extractUuid(id_, 'artist') - result = self._getFromWebService('artist', uuid, include) - artist = result.getArtist() - if artist is not None: - return artist - else: - raise ResponseError("server didn't return artist") - - - def getArtists(self, filter): - """Returns artists matching given criteria. - - @param filter: an L{ArtistFilter} object - - @return: a list of L{musicbrainz2.wsxml.ArtistResult} objects - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResponseError: server returned invalid data - """ - result = self._getFromWebService('artist', '', filter=filter) - return result.getArtistResults() - - def getLabelById(self, id_, include=None): - """Returns a L{model.Label} - - If no label with that ID can be found, or there is a server problem, - an exception is raised. - - @param id_: a string containing the label's ID. - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResourceNotFoundError: release doesn't exist - @raise ResponseError: server returned invalid data - """ - uuid = mbutils.extractUuid(id_, 'label') - result = self._getFromWebService('label', uuid, include) - label = result.getLabel() - if label is not None: - return label - else: - raise ResponseError("server didn't return a label") - - def getLabels(self, filter): - result = self._getFromWebService('label', '', filter=filter) - return result.getLabelResults() - - def getReleaseById(self, id_, include=None): - """Returns a release. - - If no release with that ID can be found, C{include} contains - invalid tags or there's a server problem, and exception is - raised. - - @param id_: a string containing the release's ID - @param include: a L{ReleaseIncludes} object, or None - - @return: a L{Release } object, or None - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResourceNotFoundError: release doesn't exist - @raise ResponseError: server returned invalid data - """ - uuid = mbutils.extractUuid(id_, 'release') - result = self._getFromWebService('release', uuid, include) - release = result.getRelease() - if release is not None: - return release - else: - raise ResponseError("server didn't return release") - - - def getReleases(self, filter): - """Returns releases matching given criteria. - - @param filter: a L{ReleaseFilter} object - - @return: a list of L{musicbrainz2.wsxml.ReleaseResult} objects - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResponseError: server returned invalid data - """ - result = self._getFromWebService('release', '', filter=filter) - return result.getReleaseResults() - - def getReleaseGroupById(self, id_, include=None): - """Returns a release group. - - If no release group with that ID can be found, C{include} - contains invalid tags, or there's a server problem, an - exception is raised. - - @param id_: a string containing the release group's ID - @param include: a L{ReleaseGroupIncludes} object, or None - - @return: a L{ReleaseGroup } object, or None - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResourceNotFoundError: release doesn't exist - @raise ResponseError: server returned invalid data - """ - uuid = mbutils.extractUuid(id_, 'release-group') - result = self._getFromWebService('release-group', uuid, include) - releaseGroup = result.getReleaseGroup() - if releaseGroup is not None: - return releaseGroup - else: - raise ResponseError("server didn't return releaseGroup") - - def getReleaseGroups(self, filter): - """Returns release groups matching the given criteria. - - @param filter: a L{ReleaseGroupFilter} object - - @return: a list of L{musicbrainz2.wsxml.ReleaseGroupResult} objects - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResponseError: server returned invalid data - """ - result = self._getFromWebService('release-group', '', filter=filter) - return result.getReleaseGroupResults() - - def getTrackById(self, id_, include=None): - """Returns a track. - - If no track with that ID can be found, C{include} contains - invalid tags or there's a server problem, an exception is - raised. - - @param id_: a string containing the track's ID - @param include: a L{TrackIncludes} object, or None - - @return: a L{Track } object, or None - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResourceNotFoundError: track doesn't exist - @raise ResponseError: server returned invalid data - """ - uuid = mbutils.extractUuid(id_, 'track') - result = self._getFromWebService('track', uuid, include) - track = result.getTrack() - if track is not None: - return track - else: - raise ResponseError("server didn't return track") - - - def getTracks(self, filter): - """Returns tracks matching given criteria. - - @param filter: a L{TrackFilter} object - - @return: a list of L{musicbrainz2.wsxml.TrackResult} objects - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResponseError: server returned invalid data - """ - result = self._getFromWebService('track', '', filter=filter) - return result.getTrackResults() - - - def getUserByName(self, name): - """Returns information about a MusicBrainz user. - - You can only request user data if you know the user name and - password for that account. If username and/or password are - incorrect, an L{AuthenticationError} is raised. - - See the example in L{Query} on how to supply user name and - password. - - @param name: a unicode string containing the user's name - - @return: a L{User } object - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise AuthenticationError: invalid user name and/or password - @raise ResourceNotFoundError: track doesn't exist - @raise ResponseError: server returned invalid data - """ - filter = UserFilter(name=name) - result = self._getFromWebService('user', '', None, filter) - - if len(result.getUserList()) > 0: - return result.getUserList()[0] - else: - raise ResponseError("response didn't contain user data") - - - def _getFromWebService(self, entity, id_, include=None, filter=None): - if filter is None: - filterParams = [ ] - else: - filterParams = filter.createParameters() - - if include is None: - includeParams = [ ] - else: - includeParams = include.createIncludeTags() - - stream = self._ws.get(entity, id_, includeParams, filterParams) - try: - parser = MbXmlParser() - return parser.parse(stream) - except ParseError, e: - raise ResponseError(str(e), e) - - - def submitPuids(self, tracks2puids): - """Submit track to PUID mappings. - - The C{tracks2puids} parameter has to be a dictionary, with the - keys being MusicBrainz track IDs (either as absolute URIs or - in their 36 character ASCII representation) and the values - being PUIDs (ASCII, 36 characters). - - Note that this method only works if a valid user name and - password have been set. See the example in L{Query} on how - to supply authentication data. - - @param tracks2puids: a dictionary mapping track IDs to PUIDs - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid track or PUIDs - @raise AuthenticationError: invalid user name and/or password - """ - assert self._clientId is not None, 'Please supply a client ID' - params = [ ] - params.append( ('client', self._clientId.encode('utf-8')) ) - - for (trackId, puid) in tracks2puids.iteritems(): - trackId = mbutils.extractUuid(trackId, 'track') - params.append( ('puid', trackId + ' ' + puid) ) - - encodedStr = urllib.urlencode(params, True) - - self._ws.post('track', '', encodedStr) - - def submitISRCs(self, tracks2isrcs): - """Submit track to ISRC mappings. - - The C{tracks2isrcs} parameter has to be a dictionary, with the - keys being MusicBrainz track IDs (either as absolute URIs or - in their 36 character ASCII representation) and the values - being ISRCs (ASCII, 12 characters). - - Note that this method only works if a valid user name and - password have been set. See the example in L{Query} on how - to supply authentication data. - - @param tracks2isrcs: a dictionary mapping track IDs to ISRCs - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid track or ISRCs - @raise AuthenticationError: invalid user name and/or password - """ - params = [ ] - - for (trackId, isrc) in tracks2isrcs.iteritems(): - trackId = mbutils.extractUuid(trackId, 'track') - params.append( ('isrc', trackId + ' ' + isrc) ) - - encodedStr = urllib.urlencode(params, True) - - self._ws.post('track', '', encodedStr) - - def addToUserCollection(self, releases): - """Add releases to a user's collection. - - The releases parameter must be a list. It can contain either L{Release} - objects or a string representing a MusicBrainz release ID (either as - absolute URIs or in their 36 character ASCII representation). - - Adding a release that is already in the collection has no effect. - - @param releases: a list of releases to add to the user collection - - @raise ConnectionError: couldn't connect to server - @raise AuthenticationError: invalid user name and/or password - """ - rels = [] - for release in releases: - if isinstance(release, Release): - rels.append(mbutils.extractUuid(release.id)) - else: - rels.append(mbutils.extractUuid(release)) - encodedStr = urllib.urlencode({'add': ",".join(rels)}, True) - self._ws.post('collection', '', encodedStr) - - def removeFromUserCollection(self, releases): - """Remove releases from a user's collection. - - The releases parameter must be a list. It can contain either L{Release} - objects or a string representing a MusicBrainz release ID (either as - absolute URIs or in their 36 character ASCII representation). - - Removing a release that is not in the collection has no effect. - - @param releases: a list of releases to remove from the user collection - - @raise ConnectionError: couldn't connect to server - @raise AuthenticationError: invalid user name and/or password - """ - rels = [] - for release in releases: - if isinstance(release, Release): - rels.append(mbutils.extractUuid(release.id)) - else: - rels.append(mbutils.extractUuid(release)) - encodedStr = urllib.urlencode({'remove': ",".join(rels)}, True) - self._ws.post('collection', '', encodedStr) - - def getUserCollection(self, offset=0, maxitems=100): - """Get the releases that are in a user's collection - - A maximum of 100 items will be returned for any one call - to this method. To fetch more than 100 items, use the offset - parameter. - - @param offset: the offset to start fetching results from - @param maxitems: the upper limit on items to return - - @return: a list of L{musicbrainz2.wsxml.ReleaseResult} objects - - @raise ConnectionError: couldn't connect to server - @raise AuthenticationError: invalid user name and/or password - """ - params = { 'offset': offset, 'maxitems': maxitems } - - stream = self._ws.get('collection', '', filter=params) - print stream - try: - parser = MbXmlParser() - result = parser.parse(stream) - except ParseError, e: - raise ResponseError(str(e), e) - - return result.getReleaseResults() - - def submitUserTags(self, entityUri, tags): - """Submit folksonomy tags for an entity. - - Note that all previously existing tags from the authenticated - user are replaced with the ones given to this method. Other - users' tags are not affected. - - @param entityUri: a string containing an absolute MB ID - @param tags: A list of either L{Tag } objects - or strings - - @raise ValueError: invalid entityUri - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID, entity or tags - @raise AuthenticationError: invalid user name and/or password - """ - entity = mbutils.extractEntityType(entityUri) - uuid = mbutils.extractUuid(entityUri, entity) - params = ( - ('type', 'xml'), - ('entity', entity), - ('id', uuid), - ('tags', ','.join([unicode(tag).encode('utf-8') for tag in tags])) - ) - - encodedStr = urllib.urlencode(params) - - self._ws.post('tag', '', encodedStr) - - - def getUserTags(self, entityUri): - """Returns a list of folksonomy tags a user has applied to an entity. - - The given parameter has to be a fully qualified MusicBrainz ID, as - returned by other library functions. - - Note that this method only works if a valid user name and - password have been set. Only the tags the authenticated user - applied to the entity will be returned. If username and/or - password are incorrect, an AuthenticationError is raised. - - This method will return a list of L{Tag } - objects. - - @param entityUri: a string containing an absolute MB ID - - @raise ValueError: invalid entityUri - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or entity - @raise AuthenticationError: invalid user name and/or password - """ - entity = mbutils.extractEntityType(entityUri) - uuid = mbutils.extractUuid(entityUri, entity) - params = { 'entity': entity, 'id': uuid } - - stream = self._ws.get('tag', '', filter=params) - try: - parser = MbXmlParser() - result = parser.parse(stream) - except ParseError, e: - raise ResponseError(str(e), e) - - return result.getTagList() - - def submitUserRating(self, entityUri, rating): - """Submit rating for an entity. - - Note that all previously existing rating from the authenticated - user are replaced with the one given to this method. Other - users' ratings are not affected. - - @param entityUri: a string containing an absolute MB ID - @param rating: A L{Rating } object - or integer - - @raise ValueError: invalid entityUri - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID, entity or tags - @raise AuthenticationError: invalid user name and/or password - """ - entity = mbutils.extractEntityType(entityUri) - uuid = mbutils.extractUuid(entityUri, entity) - params = ( - ('type', 'xml'), - ('entity', entity), - ('id', uuid), - ('rating', unicode(rating).encode('utf-8')) - ) - - encodedStr = urllib.urlencode(params) - - self._ws.post('rating', '', encodedStr) - - - def getUserRating(self, entityUri): - """Return the rating a user has applied to an entity. - - The given parameter has to be a fully qualified MusicBrainz - ID, as returned by other library functions. - - Note that this method only works if a valid user name and - password have been set. Only the rating the authenticated user - applied to the entity will be returned. If username and/or - password are incorrect, an AuthenticationError is raised. - - This method will return a L{Rating } - object. - - @param entityUri: a string containing an absolute MB ID - - @raise ValueError: invalid entityUri - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or entity - @raise AuthenticationError: invalid user name and/or password - """ - entity = mbutils.extractEntityType(entityUri) - uuid = mbutils.extractUuid(entityUri, entity) - params = { 'entity': entity, 'id': uuid } - - stream = self._ws.get('rating', '', filter=params) - try: - parser = MbXmlParser() - result = parser.parse(stream) - except ParseError, e: - raise ResponseError(str(e), e) - - return result.getRating() - - def submitCDStub(self, cdstub): - """Submit a CD Stub to the database. - - The number of tracks added to the CD Stub must match the TOC and DiscID - otherwise the submission wil fail. The submission will also fail if - the Disc ID is already in the MusicBrainz database. - - This method will only work if no user name and password are set. - - @param cdstub: a L{CDStub} object to submit - - @raise RequestError: Missmatching TOC/Track information or the - the CD Stub already exists or the Disc ID already exists - """ - assert self._clientId is not None, 'Please supply a client ID' - disc = cdstub._disc - params = [ ] - params.append( ('client', self._clientId.encode('utf-8')) ) - params.append( ('discid', disc.id) ) - params.append( ('title', cdstub.title) ) - params.append( ('artist', cdstub.artist) ) - if cdstub.barcode != "": - params.append( ('barcode', cdstub.barcode) ) - if cdstub.comment != "": - params.append( ('comment', cdstub.comment) ) - - trackind = 0 - for track,artist in cdstub.tracks: - params.append( ('track%d' % trackind, track) ) - if artist != "": - params.append( ('artist%d' % trackind, artist) ) - - trackind += 1 - - toc = "%d %d %d " % (disc.firstTrackNum, disc.lastTrackNum, disc.sectors) - toc = toc + ' '.join( map(lambda x: str(x[0]), disc.getTracks()) ) - - params.append( ('toc', toc) ) - - encodedStr = urllib.urlencode(params) - self._ws.post('release', '', encodedStr) - -def _createIncludes(tagMap): - selected = filter(lambda x: x[1] == True, tagMap.items()) - return map(lambda x: x[0], selected) - -def _createParameters(params): - """Remove (x, None) tuples and encode (x, str/unicode) to utf-8.""" - ret = [ ] - for p in params: - if isinstance(p[1], (str, unicode)): - ret.append( (p[0], p[1].encode('utf-8')) ) - elif p[1] is not None: - ret.append(p) - - return ret - -def _paramsValid(params): - """Check if the query parameter collides with other parameters.""" - tmp = [ ] - for name, value in params: - if value is not None and name not in ('offset', 'limit'): - tmp.append(name) - - if 'query' in tmp and len(tmp) > 1: - return False - else: - return True - -if __name__ == '__main__': - import doctest - doctest.testmod() - -# EOF diff --git a/musicbrainz2/wsxml.py b/musicbrainz2/wsxml.py deleted file mode 100644 index 7fd2a166..00000000 --- a/musicbrainz2/wsxml.py +++ /dev/null @@ -1,1675 +0,0 @@ -"""A parser for the Music Metadata XML Format (MMD). - -This module contains L{MbXmlParser}, which parses the U{Music Metadata XML -Format (MMD) } returned by the -MusicBrainz webservice. - -There are also DOM helper functions in this module used by the parser which -probably aren't useful to users. -""" -__revision__ = '$Id: wsxml.py 12028 2009-09-01 13:15:50Z matt $' - -import re -import logging -import urlparse -import xml.dom.minidom -import xml.sax.saxutils as saxutils -from xml.parsers.expat import ExpatError -from xml.dom import DOMException - -import musicbrainz2.utils as mbutils -import musicbrainz2.model as model -from musicbrainz2.model import NS_MMD_1, NS_REL_1, NS_EXT_1 - -__all__ = [ - 'DefaultFactory', 'Metadata', 'ParseError', - 'MbXmlParser', 'MbXmlWriter', - 'AbstractResult', - 'ArtistResult', 'ReleaseResult', 'TrackResult', 'LabelResult', - 'ReleaseGroupResult' -] - - -class DefaultFactory(object): - """A factory to instantiate classes from the domain model. - - This factory may be used to create objects from L{musicbrainz2.model}. - """ - def newArtist(self): return model.Artist() - def newRelease(self): return model.Release() - def newReleaseGroup(self): return model.ReleaseGroup() - def newTrack(self): return model.Track() - def newRelation(self): return model.Relation() - def newReleaseEvent(self): return model.ReleaseEvent() - def newDisc(self): return model.Disc() - def newArtistAlias(self): return model.ArtistAlias() - def newUser(self): return model.User() - def newLabel(self): return model.Label() - def newLabelAlias(self): return model.LabelAlias() - def newTag(self): return model.Tag() - def newRating(self): return model.Rating() - - -class ParseError(Exception): - """Exception to be thrown if a parse error occurs. - - The C{'msg'} attribute contains a printable error message, C{'reason'} - is the lower level exception that was raised. - """ - - def __init__(self, msg='Parse Error', reason=None): - Exception.__init__(self) - self.msg = msg - self.reason = reason - - def __str__(self): - return self.msg - - -class Metadata(object): - """Represents a parsed Music Metadata XML document. - - The Music Metadata XML format is very flexible and may contain a - diverse set of data (e.g. an artist, a release and a list of tracks), - but usually only a small subset is used (either an artist, a release - or a track, or a lists of objects from one class). - - @see: L{MbXmlParser} for reading, and L{MbXmlWriter} for writing - Metadata objects - """ - def __init__(self): - self._artist = None - self._release = None - self._track = None - self._label = None - self._releaseGroup = None - self._artistResults = [ ] - self._artistResultsOffset = None - self._artistResultsCount = None - self._releaseResults = [ ] - self._releaseResultsOffset = None - self._releaseResultsCount = None - self._releaseGroupResults = [ ] - self._releaseGroupResultsOffset = None - self._releaseGroupResultsCount = None - self._trackResults = [ ] - self._trackResultsOffset = None - self._trackResultsCount = None - self._labelResults = [ ] - self._labelResultsOffset = None - self._labelResultsCount = None - self._tagList = [ ] - self._rating = None - self._userList = [ ] - - def getArtist(self): - return self._artist - - def setArtist(self, artist): - self._artist = artist - - artist = property(getArtist, setArtist, doc='An Artist object.') - - def getLabel(self): - return self._label - - def setLabel(self, label): - self._label = label - - label = property(getLabel, setLabel, doc='A Label object.') - - def getRelease(self): - return self._release - - def setRelease(self, release): - self._release = release - - release = property(getRelease, setRelease, doc='A Release object.') - - def getReleaseGroup(self): - return self._releaseGroup - - def setReleaseGroup(self, releaseGroup): - self._releaseGroup = releaseGroup - - releaseGroup = property(getReleaseGroup, setReleaseGroup) - - def getTrack(self): - return self._track - - def setTrack(self, track): - self._track = track - - track = property(getTrack, setTrack, doc='A Track object.') - - def getArtistResults(self): - """Returns an artist result list. - - @return: a list of L{ArtistResult} objects. - """ - return self._artistResults - - artistResults = property(getArtistResults, - doc='A list of ArtistResult objects.') - - def getArtistResultsOffset(self): - """Returns the offset of the artist result list. - - The offset is used for paging through the result list. It - is zero-based. - - @return: an integer containing the offset, or None - - @see: L{getArtistResults}, L{getArtistResultsCount} - """ - return self._artistResultsOffset - - def setArtistResultsOffset(self, value): - """Sets the offset of the artist result list. - - @param value: an integer containing the offset, or None - - @see: L{getArtistResultsOffset} - """ - self._artistResultsOffset = value - - artistResultsOffset = property( - getArtistResultsOffset, setArtistResultsOffset, - doc='The offset of the artist results.') - - def getArtistResultsCount(self): - """Returns the total number of results available. - - This may or may not match with the number of elements that - L{getArtistResults} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setArtistResultsCount}, L{getArtistResultsOffset} - """ - return self._artistResultsCount - - def setArtistResultsCount(self, value): - """Sets the total number of available results. - - @param value: an integer containing the count, or None - - @see: L{getArtistResults}, L{setArtistResultsOffset} - """ - self._artistResultsCount = value - - artistResultsCount = property( - getArtistResultsCount, setArtistResultsCount, - doc='The total number of artists results.') - - def getLabelResults(self): - """Returns a label result list. - - @return: a list of L{LabelResult} objects. - """ - return self._labelResults - - labelResults = property(getLabelResults, - doc='A list of LabelResult objects') - - def getLabelResultsOffset(self): - """Returns the offset of the label result list. - - The offset is used for paging through the result list. It - is zero-based. - - @return: an integer containing the offset, or None - - @see: L{getLabelResults}, L{getLabelResultsCount} - """ - return self._labelResultsOffset - - def setLabelResultsOffset(self, value): - """Sets the offset of the label result list. - - @param value: an integer containing the offset, or None - - @see: L{getLabelResultsOffset} - """ - self._labelResultsOffset = value - - labelResultsOffset = property( - getLabelResultsOffset, setLabelResultsOffset, - doc='The offset of the label results.') - - def getLabelResultsCount(self): - """Returns the total number of results available. - - This may or may not match with the number of elements that - L{getLabelResults} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setLabelResultsCount}, L{getLabelResultsOffset} - """ - return self._labelResultsCount - - def setLabelResultsCount(self, value): - """Sets the total number of available results. - - @param value: an integer containing the count, or None - - @see: L{getLabelResults}, L{setLabelResultsOffset} - """ - self._labelResultsCount = value - - labelResultsCount = property( - getLabelResultsCount, setLabelResultsCount, - doc='The total number of label results.') - - def getReleaseResults(self): - """Returns a release result list. - - @return: a list of L{ReleaseResult} objects. - """ - return self._releaseResults - - releaseResults = property(getReleaseResults, - doc='A list of ReleaseResult objects.') - - def getReleaseResultsOffset(self): - """Returns the offset of the release result list. - - The offset is used for paging through the result list. It - is zero-based. - - @return: an integer containing the offset, or None - - @see: L{getReleaseResults}, L{getReleaseResultsCount} - """ - return self._releaseResultsOffset - - def setReleaseResultsOffset(self, value): - """Sets the offset of the release result list. - - @param value: an integer containing the offset, or None - - @see: L{getReleaseResultsOffset} - """ - self._releaseResultsOffset = value - - releaseResultsOffset = property( - getReleaseResultsOffset, setReleaseResultsOffset, - doc='The offset of the release results.') - - def getReleaseResultsCount(self): - """Returns the total number of results available. - - This may or may not match with the number of elements that - L{getReleaseResults} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setReleaseResultsCount}, L{getReleaseResultsOffset} - """ - return self._releaseResultsCount - - def setReleaseResultsCount(self, value): - """Sets the total number of available results. - - @param value: an integer containing the count, or None - - @see: L{getReleaseResults}, L{setReleaseResultsOffset} - """ - self._releaseResultsCount = value - - releaseResultsCount = property( - getReleaseResultsCount, setReleaseResultsCount, - doc='The total number of release results.') - - def getReleaseGroupResults(self): - """Returns a release group result list. - - @return: a list of L{ReleaseGroupResult} objects. - """ - return self._releaseGroupResults - - releaseGroupResults = property(getReleaseGroupResults, - doc = 'A list of ReleaseGroupResult objects.') - - def getReleaseGroupResultsOffset(self): - """Returns the offset of the release group result list. - - The offset is used for paging through the result list. It - is zero-based. - - @return: an integer containing the offset, or None. - - @see: L{getReleaseGroupResults}, L{getReleaseGroupResultsCount} - """ - return self._releaseGroupResultsOffset - - def setReleaseGroupResultsOffset(self, value): - """Sets the offset of the release group result list. - - @param value: an integer containing the offset, or None - - @see: L{getReleaseGroupResultsOffset} - """ - self._releaseGroupResultsOffset = value - - releaseGroupResultsOffset = property( - getReleaseGroupResultsOffset, setReleaseGroupResultsOffset, - doc='The offset of the release group results.') - - def getReleaseGroupResultsCount(self): - """Returns the total number of results available. - - This may or may not match with the number of elements that - L{getReleaseGroupResults} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setReleaseGroupResultsCount}, L{getReleaseGroupResultsOffset} - """ - return self._releaseGroupResultsCount - - def setReleaseGroupResultsCount(self, value): - """Sets the total number of available results. - - @param value: an integer containing the count, or None - - @see: L{getReleaseGroupResults}, L{setReleaseGroupResultsOffset} - """ - self._releaseGroupResultsCount = value - - releaseGroupResultsCount = property( - getReleaseGroupResultsCount, setReleaseGroupResultsCount, - doc='The total number of release group results.') - - def getTrackResults(self): - """Returns a track result list. - - @return: a list of L{TrackResult} objects. - """ - return self._trackResults - - trackResults = property(getTrackResults, - doc='A list of TrackResult objects.') - - def getTrackResultsOffset(self): - """Returns the offset of the track result list. - - The offset is used for paging through the result list. It - is zero-based. - - @return: an integer containing the offset, or None - - @see: L{getTrackResults}, L{getTrackResultsCount} - """ - return self._trackResultsOffset - - def setTrackResultsOffset(self, value): - """Sets the offset of the track result list. - - @param value: an integer containing the offset, or None - - @see: L{getTrackResultsOffset} - """ - self._trackResultsOffset = value - - trackResultsOffset = property( - getTrackResultsOffset, setTrackResultsOffset, - doc='The offset of the track results.') - - def getTrackResultsCount(self): - """Returns the total number of results available. - - This may or may not match with the number of elements that - L{getTrackResults} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setTrackResultsCount}, L{getTrackResultsOffset} - """ - return self._trackResultsCount - - def setTrackResultsCount(self, value): - """Sets the total number of available results. - - @param value: an integer containing the count, or None - - @see: L{getTrackResults}, L{setTrackResultsOffset} - """ - self._trackResultsCount = value - - trackResultsCount = property( - getTrackResultsCount, setTrackResultsCount, - doc='The total number of track results.') - - - def getTagList(self): - """Returns a list of tags. - - @return: a list of L{model.Tag} objects - """ - return self._tagList - - tagResults = property(getTagList, - doc='A list of Tag objects.') - - def getRating(self): - """Returns the rating. - - @return: rating object - """ - return self._rating - - def setRating(self, value): - """Sets the rating. - - @param value: a L{model.Rating} object - """ - self._rating = value - - rating = property(getRating, setRating, doc='A Rating object.') - - - # MusicBrainz extension to the schema - def getUserList(self): - """Returns a list of users. - - @return: a list of L{model.User} objects - - @note: This is a MusicBrainz extension. - """ - return self._userList - - userResults = property(getUserList, - doc='A list of User objects.') - - -class AbstractResult(object): - """The abstract representation of a result. - - A result is an instance of some kind (Artist, Release, ...) - associated with a score. - """ - - def __init__(self, score): - self._score = score - - def getScore(self): - """Returns the result score. - - The score indicates how good this result matches the search - parameters. The higher the value, the better the match. - - @return: an int between 0 and 100 (both inclusive), or None - """ - return self._score - - def setScore(self, score): - self._score = score - - score = property(getScore, setScore, doc='The relevance score.') - - -class ArtistResult(AbstractResult): - """Represents an artist result. - - An ArtistResult consists of a I{score} and an artist. The score is a - number between 0 and 100, where a higher number indicates a better - match. - """ - def __init__(self, artist, score): - super(ArtistResult, self).__init__(score) - self._artist = artist - - def getArtist(self): - """Returns an Artist object. - - @return: a L{musicbrainz2.model.Artist} object - """ - return self._artist - - def setArtist(self, artist): - self._artist = artist - - artist = property(getArtist, setArtist, doc='An Artist object.') - - -class ReleaseResult(AbstractResult): - """Represents a release result. - - A ReleaseResult consists of a I{score} and a release. The score is a - number between 0 and 100, where a higher number indicates a better - match. - """ - def __init__(self, release, score): - super(ReleaseResult, self).__init__(score) - self._release = release - - def getRelease(self): - """Returns a Release object. - - @return: a L{musicbrainz2.model.Release} object - """ - return self._release - - def setRelease(self, release): - self._release = release - - release = property(getRelease, setRelease, doc='A Release object.') - -class ReleaseGroupResult(AbstractResult): - """Represents a release group result. - - A ReleaseGroupResult consists of a I{score} and a release group. The - score is a number between 0 and 100, where a higher number indicates - a better match. - """ - def __init__(self, releaseGroup, score): - super(ReleaseGroupResult, self).__init__(score) - self._releaseGroup = releaseGroup - - def getReleaseGroup(self): - """Returns a ReleaseGroup object. - - @return: a L{musicbrainz2.model.ReleaseGroup} object - """ - return self._releaseGroup - - def setReleaseGroup(self, value): - self._releaseGroup = value - - releaseGroup = property(getReleaseGroup, setReleaseGroup, doc='A ReleaseGroup object.') - -class TrackResult(AbstractResult): - """Represents a track result. - - A TrackResult consists of a I{score} and a track. The score is a - number between 0 and 100, where a higher number indicates a better - match. - """ - def __init__(self, track, score): - super(TrackResult, self).__init__(score) - self._track = track - - def getTrack(self): - """Returns a Track object. - - @return: a L{musicbrainz2.model.Track} object - """ - return self._track - - def setTrack(self, track): - self._track = track - - track = property(getTrack, setTrack, doc='A Track object.') - - -class LabelResult(AbstractResult): - """Represents a label result. - - An LabelResult consists of a I{score} and a label. The score is a - number between 0 and 100, where a higher number indicates a better - match. - """ - def __init__(self, label, score): - super(LabelResult, self).__init__(score) - self._label = label - - def getLabel(self): - """Returns a Label object. - - @return: a L{musicbrainz2.model.Label} object - """ - return self._label - - def setLabel(self, label): - self._label = label - - label = property(getLabel, setLabel, doc='A Label object.') - - -class MbXmlParser(object): - """A parser for the Music Metadata XML format. - - This parser supports all basic features and extensions defined by - MusicBrainz, including unlimited document nesting. By default it - reads an XML document from a file-like object (stream) and returns - an object tree representing the document using classes from - L{musicbrainz2.model}. - - The implementation tries to be as permissive as possible. Invalid - contents are skipped, but documents have to be well-formed and using - the correct namespace. In case of unrecoverable errors, a L{ParseError} - exception is raised. - - @see: U{The Music Metadata XML Format - } - """ - - def __init__(self, factory=DefaultFactory()): - """Constructor. - - The C{factory} parameter has be an instance of L{DefaultFactory} - or a subclass of it. It is used by L{parse} to obtain objects - from L{musicbrainz2.model} to build resulting object tree. - If you supply your own factory, you have to make sure all - returned objects have the same interface as their counterparts - from L{musicbrainz2.model}. - - @param factory: an object factory - """ - self._log = logging.getLogger(str(self.__class__)) - self._factory = factory - - def parse(self, inStream): - """Parses the MusicBrainz web service XML. - - Returns a L{Metadata} object representing the parsed XML or - raises a L{ParseError} exception if the data was malformed. - The parser tries to be liberal and skips invalid content if - possible. - - Note that an L{IOError} may be raised if there is a problem - reading C{inStream}. - - @param inStream: a file-like object - @return: a L{Metadata} object (never None) - @raise ParseError: if the document is not valid - @raise IOError: if reading from the stream failed - """ - - try: - doc = xml.dom.minidom.parse(inStream) - - # Try to find the root element. If this isn't an mmd - # XML file or the namespace is wrong, this will fail. - elems = doc.getElementsByTagNameNS(NS_MMD_1, 'metadata') - - if len(elems) != 0: - md = self._createMetadata(elems[0]) - else: - msg = 'cannot find root element mmd:metadata' - self._log.debug('ParseError: ' + msg) - raise ParseError(msg) - - doc.unlink() - - return md - except ExpatError, e: - self._log.debug('ExpatError: ' + str(e)) - raise ParseError(msg=str(e), reason=e) - except DOMException, e: - self._log.debug('DOMException: ' + str(e)) - raise ParseError(msg=str(e), reason=e) - - - def _createMetadata(self, metadata): - md = Metadata() - - for node in _getChildElements(metadata): - if _matches(node, 'artist'): - md.artist = self._createArtist(node) - elif _matches(node, 'release'): - md.release = self._createRelease(node) - elif _matches(node, 'release-group'): - md.releaseGroup = self._createReleaseGroup(node) - elif _matches(node, 'track'): - md.track = self._createTrack(node) - elif _matches(node, 'label'): - md.label = self._createLabel(node) - elif _matches(node, 'artist-list'): - (offset, count) = self._getListAttrs(node) - md.artistResultsOffset = offset - md.artistResultsCount = count - self._addArtistResults(node, md.getArtistResults()) - elif _matches(node, 'release-list'): - (offset, count) = self._getListAttrs(node) - md.releaseResultsOffset = offset - md.releaseResultsCount = count - self._addReleaseResults(node, md.getReleaseResults()) - elif _matches(node, 'release-group-list'): - (offset, count) = self._getListAttrs(node) - md.releaseGroupResultsOffset = offset - md.releaseGroupResultsCount = count - self._addReleaseGroupResults(node, md.getReleaseGroupResults()) - elif _matches(node, 'track-list'): - (offset, count) = self._getListAttrs(node) - md.trackResultsOffset = offset - md.trackResultsCount = count - self._addTrackResults(node, md.getTrackResults()) - elif _matches(node, 'label-list'): - (offset, count) = self._getListAttrs(node) - md.labelResultsOffset = offset - md.labelResultsCount = count - self._addLabelResults(node, md.getLabelResults()) - elif _matches(node, 'tag-list'): - self._addTagsToList(node, md.getTagList()) - elif _matches(node, 'user-list', NS_EXT_1): - self._addUsersToList(node, md.getUserList()) - - return md - - - def _addArtistResults(self, listNode, resultList): - for c in _getChildElements(listNode): - artist = self._createArtist(c) - score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) - if artist is not None: - resultList.append(ArtistResult(artist, score)) - - def _addReleaseResults(self, listNode, resultList): - for c in _getChildElements(listNode): - release = self._createRelease(c) - score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) - if release is not None: - resultList.append(ReleaseResult(release, score)) - - def _addReleaseGroupResults(self, listNode, resultList): - for c in _getChildElements(listNode): - releaseGroup = self._createReleaseGroup(c) - score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) - if releaseGroup is not None: - resultList.append(ReleaseGroupResult(releaseGroup, score)) - - def _addTrackResults(self, listNode, resultList): - for c in _getChildElements(listNode): - track = self._createTrack(c) - score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) - if track is not None: - resultList.append(TrackResult(track, score)) - - def _addLabelResults(self, listNode, resultList): - for c in _getChildElements(listNode): - label = self._createLabel(c) - score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) - if label is not None: - resultList.append(LabelResult(label, score)) - - def _addReleasesToList(self, listNode, resultList): - self._addToList(listNode, resultList, self._createRelease) - - def _addReleaseGroupsToList(self, listNode, resultList): - self._addToList(listNode, resultList, self._createReleaseGroup) - - def _addTracksToList(self, listNode, resultList): - self._addToList(listNode, resultList, self._createTrack) - - def _addUsersToList(self, listNode, resultList): - self._addToList(listNode, resultList, self._createUser) - - def _addTagsToList(self, listNode, resultList): - self._addToList(listNode, resultList, self._createTag) - - def _addTagsToEntity(self, listNode, entity): - for node in _getChildElements(listNode): - tag = self._createTag(node) - entity.addTag(tag) - - def _addRatingToEntity(self, attrNode, entity): - rating = self._createRating(attrNode) - entity.setRating(rating) - - def _addToList(self, listNode, resultList, creator): - for c in _getChildElements(listNode): - resultList.append(creator(c)) - - def _getListAttrs(self, listNode): - offset = _getIntAttr(listNode, 'offset') - count = _getIntAttr(listNode, 'count') - return (offset, count) - - - def _createArtist(self, artistNode): - artist = self._factory.newArtist() - artist.setId(_getIdAttr(artistNode, 'id', 'artist')) - artist.setType(_getUriAttr(artistNode, 'type')) - - for node in _getChildElements(artistNode): - if _matches(node, 'name'): - artist.setName(_getText(node)) - elif _matches(node, 'sort-name'): - artist.setSortName(_getText(node)) - elif _matches(node, 'disambiguation'): - artist.setDisambiguation(_getText(node)) - elif _matches(node, 'life-span'): - artist.setBeginDate(_getDateAttr(node, 'begin')) - artist.setEndDate(_getDateAttr(node, 'end')) - elif _matches(node, 'alias-list'): - self._addArtistAliases(node, artist) - elif _matches(node, 'release-list'): - (offset, count) = self._getListAttrs(node) - artist.setReleasesOffset(offset) - artist.setReleasesCount(count) - self._addReleasesToList(node, artist.getReleases()) - elif _matches(node, 'release-group-list'): - (offset, count) = self._getListAttrs(node) - artist.setReleaseGroupsOffset(offset) - artist.setReleaseGroupsCount(count) - self._addReleaseGroupsToList(node, artist.getReleaseGroups()) - elif _matches(node, 'relation-list'): - self._addRelationsToEntity(node, artist) - elif _matches(node, 'tag-list'): - self._addTagsToEntity(node, artist) - elif _matches(node, 'rating'): - self._addRatingToEntity(node, artist) - - return artist - - def _createLabel(self, labelNode): - label = self._factory.newLabel() - label.setId(_getIdAttr(labelNode, 'id', 'label')) - label.setType(_getUriAttr(labelNode, 'type')) - - for node in _getChildElements(labelNode): - if _matches(node, 'name'): - label.setName(_getText(node)) - if _matches(node, 'sort-name'): - label.setSortName(_getText(node)) - elif _matches(node, 'disambiguation'): - label.setDisambiguation(_getText(node)) - elif _matches(node, 'label-code'): - label.setCode(_getText(node)) - elif _matches(node, 'country'): - country = _getText(node, '^[A-Z]{2}$') - label.setCountry(country) - elif _matches(node, 'life-span'): - label.setBeginDate(_getDateAttr(node, 'begin')) - label.setEndDate(_getDateAttr(node, 'end')) - elif _matches(node, 'alias-list'): - self._addLabelAliases(node, label) - elif _matches(node, 'tag-list'): - self._addTagsToEntity(node, label) - elif _matches(node, 'rating'): - self._addRatingToEntity(node, label) - - return label - - def _createRelease(self, releaseNode): - release = self._factory.newRelease() - release.setId(_getIdAttr(releaseNode, 'id', 'release')) - for t in _getUriListAttr(releaseNode, 'type'): - release.addType(t) - - for node in _getChildElements(releaseNode): - if _matches(node, 'title'): - release.setTitle(_getText(node)) - elif _matches(node, 'text-representation'): - lang = _getAttr(node, 'language', '^[A-Z]{3}$') - release.setTextLanguage(lang) - script = _getAttr(node, 'script', '^[A-Z][a-z]{3}$') - release.setTextScript(script) - elif _matches(node, 'asin'): - release.setAsin(_getText(node)) - elif _matches(node, 'artist'): - release.setArtist(self._createArtist(node)) - elif _matches(node, 'release-event-list'): - self._addReleaseEvents(node, release) - elif _matches(node, 'release-group'): - release.setReleaseGroup(self._createReleaseGroup(node)) - elif _matches(node, 'disc-list'): - self._addDiscs(node, release) - elif _matches(node, 'track-list'): - (offset, count) = self._getListAttrs(node) - release.setTracksOffset(offset) - release.setTracksCount(count) - self._addTracksToList(node, release.getTracks()) - elif _matches(node, 'relation-list'): - self._addRelationsToEntity(node, release) - elif _matches(node, 'tag-list'): - self._addTagsToEntity(node, release) - elif _matches(node, 'rating'): - self._addRatingToEntity(node, release) - - return release - - def _createReleaseGroup(self, node): - rg = self._factory.newReleaseGroup() - rg.setId(_getIdAttr(node, 'id', 'release-group')) - rg.setType(_getUriAttr(node, 'type')) - - for child in _getChildElements(node): - if _matches(child, 'title'): - rg.setTitle(_getText(child)) - elif _matches(child, 'artist'): - rg.setArtist(self._createArtist(child)) - elif _matches(child, 'release-list'): - (offset, count) = self._getListAttrs(child) - rg.setReleasesOffset(offset) - rg.setReleasesCount(count) - self._addReleasesToList(child, rg.getReleases()) - - return rg - - def _addReleaseEvents(self, releaseListNode, release): - for node in _getChildElements(releaseListNode): - if _matches(node, 'event'): - country = _getAttr(node, 'country', '^[A-Z]{2}$') - date = _getDateAttr(node, 'date') - catalogNumber = _getAttr(node, 'catalog-number') - barcode = _getAttr(node, 'barcode') - format = _getUriAttr(node, 'format') - - # The date attribute is mandatory. If it isn't present, - # we don't add anything from this release event. - if date is not None: - event = self._factory.newReleaseEvent() - event.setCountry(country) - event.setDate(date) - event.setCatalogNumber(catalogNumber) - event.setBarcode(barcode) - event.setFormat(format) - - for subNode in _getChildElements(node): - if _matches(subNode, 'label'): - event.setLabel(self._createLabel(subNode)) - - release.addReleaseEvent(event) - - - def _addDiscs(self, discIdListNode, release): - for node in _getChildElements(discIdListNode): - if _matches(node, 'disc') and node.hasAttribute('id'): - d = self._factory.newDisc() - d.setId(node.getAttribute('id')) - d.setSectors(_getIntAttr(node, 'sectors', 0)) - release.addDisc(d) - - - def _addArtistAliases(self, aliasListNode, artist): - for node in _getChildElements(aliasListNode): - if _matches(node, 'alias'): - alias = self._factory.newArtistAlias() - self._initializeAlias(alias, node) - artist.addAlias(alias) - - - def _addLabelAliases(self, aliasListNode, label): - for node in _getChildElements(aliasListNode): - if _matches(node, 'alias'): - alias = self._factory.newLabelAlias() - self._initializeAlias(alias, node) - label.addAlias(alias) - - - def _initializeAlias(self, alias, node): - alias.setValue(_getText(node)) - alias.setType(_getUriAttr(node, 'type')) - alias.setScript(_getAttr(node, 'script', - '^[A-Z][a-z]{3}$')) - - - def _createTrack(self, trackNode): - track = self._factory.newTrack() - track.setId(_getIdAttr(trackNode, 'id', 'track')) - - for node in _getChildElements(trackNode): - if _matches(node, 'title'): - track.setTitle(_getText(node)) - elif _matches(node, 'artist'): - track.setArtist(self._createArtist(node)) - elif _matches(node, 'duration'): - track.setDuration(_getPositiveIntText(node)) - elif _matches(node, 'release-list'): - self._addReleasesToList(node, track.getReleases()) - elif _matches(node, 'puid-list'): - self._addPuids(node, track) - elif _matches(node, 'isrc-list'): - self._addISRCs(node, track) - elif _matches(node, 'relation-list'): - self._addRelationsToEntity(node, track) - elif _matches(node, 'tag-list'): - self._addTagsToEntity(node, track) - elif _matches(node, 'rating'): - self._addRatingToEntity(node, track) - - return track - - # MusicBrainz extension - def _createUser(self, userNode): - user = self._factory.newUser() - for t in _getUriListAttr(userNode, 'type', NS_EXT_1): - user.addType(t) - - for node in _getChildElements(userNode): - if _matches(node, 'name'): - user.setName(_getText(node)) - elif _matches(node, 'nag', NS_EXT_1): - user.setShowNag(_getBooleanAttr(node, 'show')) - - return user - - def _createRating(self, ratingNode): - rating = self._factory.newRating() - rating.value = _getText(ratingNode) - rating.count = _getIntAttr(ratingNode, 'votes-count') - return rating - - def _createTag(self, tagNode): - tag = self._factory.newTag() - tag.value = _getText(tagNode) - tag.count = _getIntAttr(tagNode, 'count') - return tag - - - def _addPuids(self, puidListNode, track): - for node in _getChildElements(puidListNode): - if _matches(node, 'puid') and node.hasAttribute('id'): - track.addPuid(node.getAttribute('id')) - - def _addISRCs(self, isrcListNode, track): - for node in _getChildElements(isrcListNode): - if _matches(node, 'isrc') and node.hasAttribute('id'): - track.addISRC(node.getAttribute('id')) - - def _addRelationsToEntity(self, relationListNode, entity): - targetType = _getUriAttr(relationListNode, 'target-type', NS_REL_1) - - if targetType is None: - return - - for node in _getChildElements(relationListNode): - if _matches(node, 'relation'): - rel = self._createRelation(node, targetType) - if rel is not None: - entity.addRelation(rel) - - - def _createRelation(self, relationNode, targetType): - relation = self._factory.newRelation() - - relation.setType(_getUriAttr(relationNode, 'type', NS_REL_1)) - relation.setTargetType(targetType) - resType = _getResourceType(targetType) - relation.setTargetId(_getIdAttr(relationNode, 'target', resType)) - - if relation.getType() is None \ - or relation.getTargetType() is None \ - or relation.getTargetId() is None: - return None - - relation.setDirection(_getDirectionAttr(relationNode, 'direction')) - relation.setBeginDate(_getDateAttr(relationNode, 'begin')) - relation.setEndDate(_getDateAttr(relationNode, 'end')) - - for a in _getUriListAttr(relationNode, 'attributes', NS_REL_1): - relation.addAttribute(a) - - target = None - children = _getChildElements(relationNode) - if len(children) > 0: - node = children[0] - if _matches(node, 'artist'): - target = self._createArtist(node) - elif _matches(node, 'release'): - target = self._createRelease(node) - elif _matches(node, 'track'): - target = self._createTrack(node) - - relation.setTarget(target) - - return relation - - -# -# XML output -# - -class _XmlWriter(object): - def __init__(self, outStream, indentAmount=' ', newline="\n"): - self._out = outStream - self._indentAmount = indentAmount - self._stack = [ ] - self._newline = newline - - def prolog(self, encoding='UTF-8', version='1.0'): - pi = '' % (version, encoding) - self._out.write(pi + self._newline) - - def start(self, name, attrs={ }): - indent = self._getIndention() - self._stack.append(name) - self._out.write(indent + self._makeTag(name, attrs) + self._newline) - - def end(self): - name = self._stack.pop() - indent = self._getIndention() - self._out.write('%s\n' % (indent, name)) - - def elem(self, name, value, attrs={ }): - # delete attributes with an unset value - for (k, v) in attrs.items(): - if v is None or v == '': - del attrs[k] - - if value is None or value == '': - if len(attrs) == 0: - return - self._out.write(self._getIndention()) - self._out.write(self._makeTag(name, attrs, True) + '\n') - else: - escValue = saxutils.escape(value or '') - self._out.write(self._getIndention()) - self._out.write(self._makeTag(name, attrs)) - self._out.write(escValue) - self._out.write('\n' % name) - - def _getIndention(self): - return self._indentAmount * len(self._stack) - - def _makeTag(self, name, attrs={ }, close=False): - ret = '<' + name - - for (k, v) in attrs.iteritems(): - if v is not None: - v = saxutils.quoteattr(str(v)) - ret += ' %s=%s' % (k, v) - - if close: - return ret + '/>' - else: - return ret + '>' - - - -class MbXmlWriter(object): - """Write XML in the Music Metadata XML format.""" - - def __init__(self, indentAmount=' ', newline="\n"): - """Constructor. - - @param indentAmount: the amount of whitespace to use per level - """ - self._indentAmount = indentAmount - self._newline = newline - - - def write(self, outStream, metadata): - """Writes the XML representation of a Metadata object to a file. - - @param outStream: an open file-like object - @param metadata: a L{Metadata} object - """ - xml = _XmlWriter(outStream, self._indentAmount, self._newline) - - xml.prolog() - xml.start('metadata', { - 'xmlns': NS_MMD_1, - 'xmlns:ext': NS_EXT_1, - }) - - self._writeArtist(xml, metadata.getArtist()) - self._writeRelease(xml, metadata.getRelease()) - self._writeReleaseGroup(xml, metadata.getReleaseGroup()) - self._writeTrack(xml, metadata.getTrack()) - self._writeLabel(xml, metadata.getLabel()) - - if len(metadata.getArtistResults()) > 0: - xml.start('artist-list', { - 'offset': metadata.artistResultsOffset, - 'count': metadata.artistResultsCount, - }) - for result in metadata.getArtistResults(): - self._writeArtist(xml, result.getArtist(), - result.getScore()) - xml.end() - - if len(metadata.getReleaseResults()) > 0: - xml.start('release-list', { - 'offset': metadata.releaseResultsOffset, - 'count': metadata.releaseResultsCount, - }) - for result in metadata.getReleaseResults(): - self._writeRelease(xml, result.getRelease(), - result.getScore()) - xml.end() - - if len(metadata.getReleaseGroupResults()) > 0: - xml.start('release-group-list', { - 'offset': metadata.releaseGroupResultsOffset, - 'count': metadata.releaseGroupResultsCount - }) - for result in metadata.getReleaseGroupResults(): - self._writeReleaseGroup(xml, result.getReleaseGroup(), - result.getScore()) - xml.end() - - if len(metadata.getTrackResults()) > 0: - xml.start('track-list', { - 'offset': metadata.trackResultsOffset, - 'count': metadata.trackResultsCount, - }) - for result in metadata.getTrackResults(): - self._writeTrack(xml, result.getTrack(), - result.getScore()) - xml.end() - - if len(metadata.getLabelResults()) > 0: - xml.start('label-list', { - 'offset': metadata.labelResultsOffset, - 'count': metadata.labelResultsCount, - }) - for result in metadata.getLabelResults(): - self._writeLabel(xml, result.getLabel(), - result.getScore()) - xml.end() - - xml.end() - - - def _writeArtist(self, xml, artist, score=None): - if artist is None: - return - - xml.start('artist', { - 'id': mbutils.extractUuid(artist.getId()), - 'type': mbutils.extractFragment(artist.getType()), - 'ext:score': score, - }) - - xml.elem('name', artist.getName()) - xml.elem('sort-name', artist.getSortName()) - xml.elem('disambiguation', artist.getDisambiguation()) - xml.elem('life-span', None, { - 'begin': artist.getBeginDate(), - 'end': artist.getEndDate(), - }) - - if len(artist.getAliases()) > 0: - xml.start('alias-list') - for alias in artist.getAliases(): - xml.elem('alias', alias.getValue(), { - 'type': alias.getType(), - 'script': alias.getScript(), - }) - xml.end() - - if len(artist.getReleases()) > 0: - xml.start('release-list') - for release in artist.getReleases(): - self._writeRelease(xml, release) - xml.end() - - if len(artist.getReleaseGroups()) > 0: - xml.start('release-group-list') - for releaseGroup in artist.getReleaseGroups(): - self._writeReleaseGroup(xml, releaseGroup) - xml.end() - - self._writeRelationList(xml, artist) - # TODO: extensions - - xml.end() - - - def _writeRelease(self, xml, release, score=None): - if release is None: - return - - types = [mbutils.extractFragment(t) for t in release.getTypes()] - typesStr = None - if len(types) > 0: - typesStr = ' '.join(types) - - xml.start('release', { - 'id': mbutils.extractUuid(release.getId()), - 'type': typesStr, - 'ext:score': score, - }) - - xml.elem('title', release.getTitle()) - xml.elem('text-representation', None, { - 'language': release.getTextLanguage(), - 'script': release.getTextScript() - }) - xml.elem('asin', release.getAsin()) - - self._writeArtist(xml, release.getArtist()) - self._writeReleaseGroup(xml, release.getReleaseGroup()) - - if len(release.getReleaseEvents()) > 0: - xml.start('release-event-list') - for event in release.getReleaseEvents(): - self._writeReleaseEvent(xml, event) - xml.end() - - if len(release.getDiscs()) > 0: - xml.start('disc-list') - for disc in release.getDiscs(): - xml.elem('disc', None, { 'id': disc.getId() }) - xml.end() - - if len(release.getTracks()) > 0: - # TODO: count attribute - xml.start('track-list', { - 'offset': release.getTracksOffset() - }) - for track in release.getTracks(): - self._writeTrack(xml, track) - xml.end() - - self._writeRelationList(xml, release) - # TODO: extensions - - xml.end() - - def _writeReleaseGroup(self, xml, rg, score = None): - if rg is None: - return - - xml.start('release-group', { - 'id': mbutils.extractUuid(rg.getId()), - 'type': mbutils.extractFragment(rg.getType()), - 'ext:score': score, - }) - - xml.elem('title', rg.getTitle()) - self._writeArtist(xml, rg.getArtist()) - - if len(rg.getReleases()) > 0: - xml.start('release-list') - for rel in rg.getReleases(): - self._writeRelease(xml, rel) - xml.end() - - xml.end() - - def _writeReleaseEvent(self, xml, event): - xml.start('event', { - 'country': event.getCountry(), - 'date': event.getDate(), - 'catalog-number': event.getCatalogNumber(), - 'barcode': event.getBarcode(), - 'format': event.getFormat() - }) - - self._writeLabel(xml, event.getLabel()) - - xml.end() - - - def _writeTrack(self, xml, track, score=None): - if track is None: - return - - xml.start('track', { - 'id': mbutils.extractUuid(track.getId()), - 'ext:score': score, - }) - - xml.elem('title', track.getTitle()) - xml.elem('duration', str(track.getDuration())) - self._writeArtist(xml, track.getArtist()) - - if len(track.getReleases()) > 0: - # TODO: offset + count - xml.start('release-list') - for release in track.getReleases(): - self._writeRelease(xml, release) - xml.end() - - if len(track.getPuids()) > 0: - xml.start('puid-list') - for puid in track.getPuids(): - xml.elem('puid', None, { 'id': puid }) - xml.end() - - self._writeRelationList(xml, track) - # TODO: extensions - - xml.end() - - - def _writeLabel(self, xml, label, score=None): - if label is None: - return - - xml.start('label', { - 'id': mbutils.extractUuid(label.getId()), - 'type': mbutils.extractFragment(label.getType()), - 'ext:score': score, - }) - - xml.elem('name', label.getName()) - xml.elem('sort-name', label.getSortName()) - xml.elem('disambiguation', label.getDisambiguation()) - xml.elem('life-span', None, { - 'begin': label.getBeginDate(), - 'end': label.getEndDate(), - }) - - if len(label.getAliases()) > 0: - xml.start('alias-list') - for alias in label.getAliases(): - xml.elem('alias', alias.getValue(), { - 'type': alias.getType(), - 'script': alias.getScript(), - }) - xml.end() - - # TODO: releases, artists - - self._writeRelationList(xml, label) - # TODO: extensions - - xml.end() - - - def _writeRelationList(self, xml, entity): - for tt in entity.getRelationTargetTypes(): - xml.start('relation-list', { - 'target-type': mbutils.extractFragment(tt), - }) - for rel in entity.getRelations(targetType=tt): - self._writeRelation(xml, rel, tt) - xml.end() - - - def _writeRelation(self, xml, rel, targetType): - relAttrs = ' '.join([mbutils.extractFragment(a) - for a in rel.getAttributes()]) - - if relAttrs == '': - relAttrs = None - - attrs = { - 'type': mbutils.extractFragment(rel.getType()), - 'target': rel.getTargetId(), - 'direction': rel.getDirection(), - 'begin': rel.getBeginDate(), - 'end': rel.getBeginDate(), - 'attributes': relAttrs, - } - - if rel.getTarget() is None: - xml.elem('relation', None, attrs) - else: - xml.start('relation', attrs) - if targetType == NS_REL_1 + 'Artist': - self._writeArtist(xml, rel.getTarget()) - elif targetType == NS_REL_1 + 'Release': - self._writeRelease(xml, rel.getTarget()) - elif targetType == NS_REL_1 + 'Track': - self._writeTrack(xml, rel.getTarget()) - xml.end() - - -# -# DOM Utilities -# - -def _matches(node, name, namespace=NS_MMD_1): - """Checks if an xml.dom.Node and a given name and namespace match.""" - - if node.localName == name and node.namespaceURI == namespace: - return True - else: - return False - - -def _getChildElements(parentNode): - """Returns all direct child elements of the given xml.dom.Node.""" - - children = [ ] - for node in parentNode.childNodes: - if node.nodeType == node.ELEMENT_NODE: - children.append(node) - - return children - - -def _getText(element, regex=None, default=None): - """Returns the text content of the given xml.dom.Element. - - This function simply fetches all contained text nodes, so the element - should not contain child elements. - """ - res = '' - for node in element.childNodes: - if node.nodeType == node.TEXT_NODE: - res += node.data - - if regex is None or re.match(regex, res): - return res - else: - return default - - -def _getPositiveIntText(element): - """Returns the text content of the given xml.dom.Element as an int.""" - - res = _getText(element) - - if res is None: - return None - - try: - return int(res) - except ValueError: - return None - - -def _getAttr(element, attrName, regex=None, default=None, ns=None): - """Returns an attribute of the given element. - - If there is no attribute with that name or the attribute doesn't - match the regular expression, default is returned. - """ - if element.hasAttributeNS(ns, attrName): - content = element.getAttributeNS(ns, attrName) - - if regex is None or re.match(regex, content): - return content - else: - return default - else: - return default - - -def _getDateAttr(element, attrName): - """Gets an incomplete date from an attribute.""" - return _getAttr(element, attrName, '^\d+(-\d\d)?(-\d\d)?$') - - -def _getIdAttr(element, attrName, typeName): - """Gets an ID from an attribute and turns it into an absolute URI.""" - value = _getAttr(element, attrName) - - return _makeAbsoluteUri('http://musicbrainz.org/' + typeName + '/', value) - - - -def _getIntAttr(element, attrName, min=0, max=None, ns=None): - """Gets an int from an attribute, or None.""" - try: - val = int(_getAttr(element, attrName, ns=ns)) - - if max is None: - max = val - - if min <= val <= max: - return val - else: - return None - except ValueError: - return None # raised if conversion to int fails - except TypeError: - return None # raised if no such attribute exists - - -def _getUriListAttr(element, attrName, prefix=NS_MMD_1): - """Gets a list of URIs from an attribute.""" - if not element.hasAttribute(attrName): - return [ ] - - f = lambda x: x != '' - uris = filter(f, re.split('\s+', element.getAttribute(attrName))) - - m = lambda x: _makeAbsoluteUri(prefix, x) - uris = map(m, uris) - - return uris - - -def _getUriAttr(element, attrName, prefix=NS_MMD_1): - """Gets a URI from an attribute. - - This also works for space-separated URI lists. In this case, the - first URI is returned. - """ - uris = _getUriListAttr(element, attrName, prefix) - if len(uris) > 0: - return uris[0] - else: - return None - - -def _getBooleanAttr(element, attrName): - """Gets a boolean value from an attribute.""" - value = _getAttr(element, attrName) - if value == 'true': - return True - elif value == 'false': - return False - else: - return None - - -def _getDirectionAttr(element, attrName): - """Gets the Relation reading direction from an attribute.""" - regex = '^\s*(' + '|'.join(( - model.Relation.DIR_FORWARD, - model.Relation.DIR_BACKWARD)) + ')\s*$' - return _getAttr(element, 'direction', regex, model.Relation.DIR_NONE) - - -def _makeAbsoluteUri(prefix, uriStr): - """Creates an absolute URI adding prefix, if necessary.""" - if uriStr is None: - return None - - (scheme, netloc, path, params, query, frag) = urlparse.urlparse(uriStr) - - if scheme == '' and netloc == '': - return prefix + uriStr - else: - return uriStr - - -def _getResourceType(uri): - """Gets the resource type from a URI. - - The resource type is the basename of the URI's path. - """ - m = re.match('^' + NS_REL_1 + '(.*)$', uri) - - if m: - return m.group(1).lower() - else: - return None - -# EOF diff --git a/pyItunes/Library.py b/pyItunes/Library.py deleted file mode 100644 index 460a1519..00000000 --- a/pyItunes/Library.py +++ /dev/null @@ -1,41 +0,0 @@ -from pyItunes.Song import Song -import time -class Library: - def __init__(self,dictionary): - self.songs = self.parseDictionary(dictionary) - - def parseDictionary(self,dictionary): - songs = [] - format = "%Y-%m-%dT%H:%M:%SZ" - for song,attributes in dictionary.iteritems(): - s = Song() - s.name = attributes.get('Name') - s.artist = attributes.get('Artist') - s.album_artist = attributes.get('Album Aritst') - s.composer = attributes.get('Composer') - s.album = attributes.get('Album') - s.genre = attributes.get('Genre') - s.kind = attributes.get('Kind') - if attributes.get('Size'): - s.size = int(attributes.get('Size')) - s.total_time = attributes.get('Total Time') - s.track_number = attributes.get('Track Number') - if attributes.get('Year'): - s.year = int(attributes.get('Year')) - if attributes.get('Date Modified'): - s.date_modified = time.strptime(attributes.get('Date Modified'),format) - if attributes.get('Date Added'): - s.date_added = time.strptime(attributes.get('Date Added'),format) - if attributes.get('Bit Rate'): - s.bit_rate = int(attributes.get('Bit Rate')) - if attributes.get('Sample Rate'): - s.sample_rate = int(attributes.get('Sample Rate')) - s.comments = attributes.get("Comments ") - if attributes.get('Rating'): - s.rating = int(attributes.get('Rating')) - if attributes.get('Play Count'): - s.play_count = int(attributes.get('Play Count')) - if attributes.get('Location'): - s.location = attributes.get('Location') - songs.append(s) - return songs \ No newline at end of file diff --git a/pyItunes/Library.pyc b/pyItunes/Library.pyc deleted file mode 100644 index b8a8ca118591c3d3b67e3bba6ec4bab7d0f396fc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1990 zcma)7U2hvj6uqleB@NN?4UF`K9dxZxupK6HyS2h1QW!BG78>ous39KiC;p z7$r~Tg`dYW|AIdN=ic3Mj0_0dJDz*y+^@4cv%me(+j;Zu_p=$*pBDaK;j*7$Nbpm1 zLo`=(sYtOM&`m&>0VNGeT9h>D7orbD7niUDnvB};Gk@YUQkc0WhDn}XkKGn#Z_Tuf z%jGH?Q(oh;pJ33$Nst2(If0T%gTl!u!19a-f1R+?5r$i2!>H(k;&~(iCX#zBZf*~( zA}*~yH8W@O4EY^*X;GD}C|sn~^DDQ5NwT7iV37>=5VKuxT)cPwHu8HoXue%2Q^gck*C>`3J{_rmNKA^iM-L=S}74X`0+ad#Y zAf2@-YIvmKk)}tQ9%*@`<&jmKwnw%+XUiko9@+Lt$0Hp^@+YJqOMn@QnLn;r z0hz~K34GO$kLkNx-lUK4SdQ%h#^J@YVft*CoDaWxIXrtgoLtJ59mlD5yp@kjXRNbb zp3bL>bZ}f6XPrFwCwW@rR+qA!WL0KwbXIDK@{P%o>I5)9Ys;JKJa_SYaBfm9A?hGdH~H}uXFxy=kaoIk}tBVgSYt%Yk4-}^T)BLaT*Qk)P^v~H85OjC$(i>lUOuy ztVzt8L^Z%=&DJ%MRng>nrO6Fz!eI?+kRt=y$id@So5(>M3GG;$D7)8Y32nqfD4fi? zvJit6OCUy+){0dp86z<(Cbw&fL#hPiSTQS#Rd!vsC?@&Q5LMDPZJY+$@)3?X8V+!0fPr2*J&;4ggErO@Eki;+2u6Fl%-`aovjiamYe?KSU{&T?pANg_w@=v zF^><~M;_+ZtlH2X8!q{BzZtX;i=I!g==%hVL*Nl`06Yes0EfT`I0Bvm$G}Ix$G|h- zIq(Ab1o#yA4EP*45jXR=@A-@vqoLqZ2pjH3 z%{L(F7~9k)y!LbMg~q}krvY0w!$Kp!3}b?%&4AER~)&jYA8`VCbCP#}q*-tgb diff --git a/pyItunes/XMLLibraryParser.py b/pyItunes/XMLLibraryParser.py deleted file mode 100644 index 7e4b239a..00000000 --- a/pyItunes/XMLLibraryParser.py +++ /dev/null @@ -1,42 +0,0 @@ -import re -class XMLLibraryParser: - def __init__(self,xmlLibrary): - f = open(xmlLibrary) - s = f.read() - lines = s.split("\n") - self.dictionary = self.parser(lines) - - def getValue(self,restOfLine): - value = re.sub("<.*?>","",restOfLine) - u = unicode(value,"utf-8") - cleanValue = u.encode("ascii","xmlcharrefreplace") - return cleanValue - - def keyAndRestOfLine(self,line): - rawkey = re.search('(.*?)',line).group(0) - key = re.sub("","",rawkey) - restOfLine = re.sub(".*?","",line).strip() - return key,restOfLine - - def parser(self,lines): - dicts = 0 - songs = {} - inSong = False - for line in lines: - if re.search('',line): - dicts += 1 - if re.search('',line): - dicts -= 1 - inSong = False - songs[songkey] = temp - if dicts == 2 and re.search('(.*?)',line): - rawkey = re.search('(.*?)',line).group(0) - songkey = re.sub("","",rawkey) - inSong = True - temp = {} - if dicts == 3 and re.search('(.*?)',line): - key,restOfLine = self.keyAndRestOfLine(line) - temp[key] = self.getValue(restOfLine) - if len(songs) > 0 and dicts < 2: - return songs - return songs \ No newline at end of file diff --git a/pyItunes/XMLLibraryParser.pyc b/pyItunes/XMLLibraryParser.pyc deleted file mode 100644 index 79cd2bce121fb05d208bdfb5e47a253e5ded02d5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2010 zcmb7FTW=dh6h7l??1mItS#1@lka%&c(ugN25Bij*Cbo6t8!D=FFVSeDj^-zyAJU_|3taC#fty9sJ(M>psJf;cMij z$R322WF5)c@~6m&$g@4ewduHvefe8_cKwGCpFY{y$)7h#bM!;fINPio8!SG=>uzDN za0z>%vpqOADzq7dn$tLYU5TdS4h+&328QDABfRb|1~0NOa$@ATC5x8ikVTH$vS>@L zUb)nf1u}%P!<~?0xEEwjSG$gR4fPbBfvVb;Dl|69JR`cg$i44Ds`W&E2ryRdUE^b zy={0xL{8^^_W2z>LE_Rp_a8xrW2Og5)7V*K>mo_5-V_3fG}ddf^XI-#>oQNP%qobs zq^PJx45x@S*7@&eI{-&x{GJqZtEkO&IW25b{!Hb0q1(4!?FNXngl+`jF1-i3pP0-k zbi@sW0*P8nesKwiDQMstQElsfXu3o)K%IkS;2@^TxLIq3Dyw7i(jIM(0qu>+7R4Mo zafE2Hb(4x{or(=AIK|GeG2nF(dIizhq)87HtNo^$*AboT)cGc_|Aj7@LDH#Nll%d% zdNufsE@SOHfHDZ|9+X+Mx-X`*1a{Mm%w_=g0<@%!a`HPA0oVu<9%yv}N#OJP3j@R+ z|8~P1cLjGHuwEz~s|_*07+`-1tI~*BZUC%d!}Ze*cgXDkcuz6!%Y+6r@5+moRMV9; zHU#VYXH>k}&`H)b85BlevNc((=}q?uE|TT}dYTaZ57hs?GtR18h!47ict(Cj$qnRq zsEw#Ei=j3mtPVo1C~~j`%?T;I=ty;Gl|K4W7n8P3&8utj3xzS;&u$u17Nw2gva z5h*BGhVFfwJ3}{|9Xw!#D1*LE3TGqUYjl;vH4e%za_PrrZlmi|UBRGKDr`CCg=;xS zOi%qVjnMR5Rqi__U|vokN1NP3Z{8l(+99}wlw?1QKH+dqaz=DF!mIx;VtV@YrDrai z4ZK6lH_WFX=m^pf(#TxFI567umVLHN4s9H3&xzy1Dw`LWMtV3NHyoUi-j$6?|J%nS j*yw+k)=JL|dVGQx-c-Pwo6hT5GoOT9x0MC-w{HFc;4x?( diff --git a/pyItunes/__init__.py b/pyItunes/__init__.py deleted file mode 100644 index bc7acfad..00000000 --- a/pyItunes/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from pyItunes.XMLLibraryParser import XMLLibraryParser -from pyItunes.Library import Library -from pyItunes.Song import Song \ No newline at end of file diff --git a/pyItunes/__init__.pyc b/pyItunes/__init__.pyc deleted file mode 100644 index 5090244ddebc2c0ca24516a87d3f66a2785eab20..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 305 zcmcckiI>YgBq-s=4F<|$LkeTmT&+iY;yBcN^?@}K;{*L+$O*%zz6_|+d?V; diff --git a/searcher.py b/searcher.py deleted file mode 100644 index dc859435..00000000 --- a/searcher.py +++ /dev/null @@ -1,212 +0,0 @@ -import urllib -from webServer import database -from headphones import config_file -from configobj import ConfigObj -import string -import feedparser -import sqlite3 -import re -import logger - - -config = ConfigObj(config_file) -General = config['General'] -NZBMatrix = config['NZBMatrix'] -SABnzbd = config['SABnzbd'] -Newznab = config['Newznab'] -NZBsorg = config['NZBsorg'] -usenet_retention = General['usenet_retention'] -include_lossless = General['include_lossless'] -nzbmatrix = NZBMatrix['nzbmatrix'] -nzbmatrix_username = NZBMatrix['nzbmatrix_username'] -nzbmatrix_apikey = NZBMatrix['nzbmatrix_apikey'] -newznab = Newznab['newznab'] -newznab_host = Newznab['newznab_host'] -newznab_apikey = Newznab['newznab_apikey'] -nzbsorg = NZBsorg['nzbsorg'] -nzbsorg_uid = NZBsorg['nzbsorg_uid'] -nzbsorg_hash = NZBsorg['nzbsorg_hash'] -sab_host = SABnzbd['sab_host'] -sab_username = SABnzbd['sab_username'] -sab_password = SABnzbd['sab_password'] -sab_apikey = SABnzbd['sab_apikey'] -sab_category = SABnzbd['sab_category'] - - - -def searchNZB(albumid=None): - - conn=sqlite3.connect(database) - c=conn.cursor() - - if albumid: - c.execute('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted" AND AlbumID="%s"' % albumid) - else: - c.execute('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted"') - - results = c.fetchall() - - for albums in results: - - reldate = albums[3] - year = reldate[:4] - clname = string.replace(albums[0], ' & ', ' ') - clalbum = string.replace(albums[1], ' & ', ' ') - term1 = re.sub('[\.\-]', ' ', '%s %s %s' % (clname, clalbum, year)).encode('utf-8') - term = string.replace(term1, '"', '') - - logger.log(u"Searching for "+term+" since it was marked as wanted") - - resultlist = [] - - if nzbmatrix == '1': - - if include_lossless == '1': - categories = "23,22" - maxsize = 2000000000 - else: - categories = "22" - maxsize = 250000000 - - - params = { "page": "download", - "username": nzbmatrix_username, - "apikey": nzbmatrix_apikey, - "subcat": categories, - "age": usenet_retention, - "english": 1, - "ssl": 1, - "scenename": 1, - "term": term - } - - searchURL = "http://rss.nzbmatrix.com/rss.php?" + urllib.urlencode(params) - logger.log(u"Parsing results from "+searchURL) - d = feedparser.parse(searchURL) - - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - if size < maxsize: - resultlist.append((title, size, url)) - logger.log(u"Found " + title +" : " + url + " (Size: " + size + ")") - else: - logger.log(title + u" is larger than the maxsize for this category, skipping. (Size: " + size+")", logger.WARNING) - - - except: - logger.log(u"No results found") - - if newznab == '1': - - if include_lossless == '1': - categories = "3040,3010" - maxsize = 2000000000 - else: - categories = "3010" - maxsize = 250000000 - - params = { "t": "search", - "apikey": newznab_apikey, - "cat": categories, - "maxage": usenet_retention, - "q": term - } - - searchURL = newznab_host + '/api?' + urllib.urlencode(params) - logger.log(u"Parsing results from "+searchURL) - - d = feedparser.parse(searchURL) - - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - if size < maxsize: - resultlist.append((title, size, url)) - logger.log(u"Found " + title +" : " + url + " (Size: " + size + ")") - else: - logger.log(title + u" is larger than the maxsize for this category, skipping. (Size: " + size+")", logger.WARNING) - - except: - logger.log(u"No results found") - - if nzbsorg == '1': - - if include_lossless == '1': - categories = "5,3010" - maxsize = 2000000000 - else: - categories = "5" - maxsize = 250000000 - - params = { "action": "search", - "dl": 1, - "i": nzbsorg_uid, - "h": nzbsorg_hash, - "age": usenet_retention, - "q": term - } - - searchURL = 'https://secure.nzbs.org/rss.php?' + urllib.urlencode(params) - - logger.log(u"Parsing results from "+searchURL) - d = feedparser.parse(searchURL) - - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - if size < maxsize: - resultlist.append((title, size, url)) - logger.log(u"Found " + title +" : " + url + " (Size: " + size + ")") - else: - logger.log(title + u" is larger than the maxsize for this category, skipping. (Size: " + size +")", logger.WARNING) - - - except: - logger.log(u"No results found") - - if len(resultlist): - bestqual = sorted(resultlist, key=lambda title: title[1], reverse=True)[0] - - logger.log(u"Downloading: " + bestqual[0]) - downloadurl = bestqual[2] - - linkparams = {} - - linkparams["mode"] = "addurl" - - if sab_apikey != '': - linkparams["apikey"] = sab_apikey - if sab_username != '': - linkparams["ma_username"] = sab_username - if sab_password != '': - linkparams["ma_password"] = sab_password - if sab_category != '': - linkparams["cat"] = sab_category - - linkparams["name"] = downloadurl - - saburl = 'http://' + sab_host + '/sabnzbd/api?' + urllib.urlencode(linkparams) - logger.log(u"Sending link to SABNZBD: " + saburl) - - try: - urllib.urlopen(saburl) - - except: - logger.log(u"Unable to send link. Are you sure the host address is correct?", logger.ERROR) - - c.execute('UPDATE albums SET status = "Snatched" WHERE AlbumID="%s"' % albums[2]) - c.execute('INSERT INTO snatched VALUES( ?, ?, ?, ?, CURRENT_DATE, ?)', (albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched")) - conn.commit() - - else: - pass - - c.close() - \ No newline at end of file diff --git a/templates.py b/templates.py deleted file mode 100644 index b7bd364c..00000000 --- a/templates.py +++ /dev/null @@ -1,43 +0,0 @@ -from headphones import web_root - -_header = ''' - - - Headphones - - - - - -
''' - -_logobar = ''' - -
- ''' - -_nav = '''
''' - -_footer = ''' -
- - ''' \ No newline at end of file diff --git a/threadtools.py b/threadtools.py deleted file mode 100644 index c25f7515..00000000 --- a/threadtools.py +++ /dev/null @@ -1,41 +0,0 @@ -from cherrypy.process.plugins import SimplePlugin -from apscheduler.scheduler import Scheduler - -import os -import time -import threading -import Queue - -class threadtool(SimplePlugin): - - sched = Scheduler() - thread = None - - def __init__(self, bus): - SimplePlugin.__init__(self, bus) - - def start(self): - self.running = True - if not self.thread: - self.thread = threading.Thread(target=self.run) - self.thread.start() - self.sched.start() - start.priority = 80 - - def stop(self): - self.running = False - if self.thread: - self.thread.join() - self.thread = None - self.sched.shutdown() - stop.priority = 10 - - def run(self): - import updater - import searcher - import mover - from webServer import database - if os.path.exists(database): - self.sched.add_cron_job(updater.dbUpdate, hour=4, minute=0, second=0) - self.sched.add_interval_job(searcher.searchNZB, hours=12) - #self.sched.add_interval_job(mover.moveFiles, minutes=10) diff --git a/updater.py b/updater.py deleted file mode 100644 index 18217da5..00000000 --- a/updater.py +++ /dev/null @@ -1,76 +0,0 @@ -from webServer import database -import musicbrainz2.webservice as ws -import musicbrainz2.model as m -import musicbrainz2.utils as u -from mb import getReleaseGroup -import sqlite3 -import time - -import logger - -def dbUpdate(): - - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('SELECT ArtistID, ArtistName from artists WHERE Status="Active"') - - activeartists = c.fetchall() - - i = 0 - - while i < len(activeartists): - - artistid = activeartists[i][0] - artistname = activeartists[i][1] - logger.log(u"Updating album information for artist: " + artistname) - - c.execute('SELECT AlbumID from albums WHERE ArtistID="%s"' % artistid) - albumlist = c.fetchall() - - inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), releaseGroups=True) - artist = ws.Query().getArtistById(artistid, inc) - - for rg in artist.getReleaseGroups(): - - rgid = u.extractUuid(rg.id) - releaseid = getReleaseGroup(rgid) - inc = ws.ReleaseIncludes(artist=True, releaseEvents= True, tracks= True, releaseGroup=True) - results = ws.Query().getReleaseById(releaseid, inc) - - if any(releaseid in x for x in albumlist): - - logger.log(results.title + " already exists in the database. Updating ASIN, Release Date, Tracks") - - c.execute('UPDATE albums SET AlbumASIN="%s", ReleaseDate="%s" WHERE AlbumID="%s"' % (results.asin, results.getEarliestReleaseDate(), u.extractUuid(results.id))) - - for track in results.tracks: - c.execute('UPDATE tracks SET TrackDuration="%s" WHERE AlbumID="%s" AND TrackID="%s"' % (track.duration, u.extractUuid(results.id), u.extractUuid(track.id))) - conn.commit() - - else: - - logger.log(u"New album found! Adding "+results.title+"to the database...") - c.execute('INSERT INTO albums VALUES( ?, ?, ?, ?, ?, CURRENT_DATE, ?, ?)', (artistid, results.artist.name, results.title, results.asin, results.getEarliestReleaseDate(), u.extractUuid(results.id), 'Skipped')) - conn.commit() - c.execute('SELECT ReleaseDate, DateAdded from albums WHERE AlbumID="%s"' % u.extractUuid(results.id)) - - latestrelease = c.fetchall() - - if latestrelease[0][0] > latestrelease[0][1]: - - c.execute('UPDATE albums SET Status = "Wanted" WHERE AlbumID="%s"' % u.extractUuid(results.id)) - - else: - pass - - for track in results.tracks: - - c.execute('INSERT INTO tracks VALUES( ?, ?, ?, ?, ?, ?, ?, ?)', (artistid, results.artist.name, results.title, results.asin, u.extractUuid(results.id), track.title, track.duration, u.extractUuid(track.id))) - conn.commit() - time.sleep(1) - i += 1 - - conn.commit() - c.close() - conn.close() - diff --git a/webServer.py b/webServer.py deleted file mode 100644 index ce7b3e6d..00000000 --- a/webServer.py +++ /dev/null @@ -1,541 +0,0 @@ -import templates -import config -import cherrypy -import musicbrainz2.webservice as ws -import musicbrainz2.model as m -import musicbrainz2.utils as u -import os -import string -import time -import datetime -import sqlite3 -import sys -import configobj -from headphones import FULL_PATH, config_file -from mb import getReleaseGroup -import logger - -database = os.path.join(FULL_PATH, 'headphones.db') - -class Headphones: - - def index(self): - raise cherrypy.HTTPRedirect("home") - index.exposed=True - - def home(self): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('SELECT ArtistName, ArtistID, Status from artists order by ArtistSortName collate nocase') - results = c.fetchall() - if len(results): - i = 0 - page.append('''
- - - - - - ''') - while i < len(results): - c.execute('''SELECT AlbumTitle, ReleaseDate, DateAdded, AlbumID from albums WHERE ArtistID='%s' order by ReleaseDate DESC''' % results[i][1]) - latestalbum = c.fetchall() - today = datetime.date.today() - if len(latestalbum) > 0: - if latestalbum[0][1] > datetime.date.isoformat(today): - newalbumName = '%s' % (latestalbum[0][3], latestalbum[0][0]) - releaseDate = '(%s)' % latestalbum[0][1] - else: - newalbumName = '%s' % (latestalbum[0][3], latestalbum[0][0]) - releaseDate = "" - if len(latestalbum) == 0: - newalbumName = 'None' - releaseDate = "" - if results[i][2] == 'Paused': - newStatus = '''%s(resume)''' % (results[i][2], results[i][1]) - else: - newStatus = '''%s(pause)''' % (results[i][2], results[i][1]) - page.append(''' - - ''' % (results[i][1], results[i][0], results[i][1], results[i][1], newStatus, newalbumName, releaseDate)) - i = i+1 - c.close() - page.append('''
Artist NameStatusUpcoming Albums
%s - (link) [delete]%s%s %s
''') - page.append(templates._footer) - - else: - page.append("""
Add some artists to the database!
""") - return page - home.exposed = True - - - def artistPage(self, ArtistID): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('''SELECT ArtistName from artists WHERE ArtistID="%s"''' % ArtistID) - artistname = c.fetchall() - c.execute('''SELECT AlbumTitle, ReleaseDate, AlbumID, Status, ArtistName, AlbumASIN from albums WHERE ArtistID="%s" order by ReleaseDate DESC''' % ArtistID) - results = c.fetchall() - c.close() - i = 0 - page.append('''
-

%s

- - - - - - - ''' % (artistname[0])) - while i < len(results): - if results[i][3] == 'Skipped': - newStatus = '''%s [want]''' % (results[i][3], results[i][2], ArtistID) - elif results[i][3] == 'Wanted': - newStatus = '''%s[skip]''' % (results[i][3], results[i][2], ArtistID) - elif results[i][3] == 'Downloaded': - newStatus = '''%s[retry]''' % (results[i][3], results[i][2], ArtistID) - elif results[i][3] == 'Snatched': - newStatus = '''%s[retry]''' % (results[i][3], results[i][2], ArtistID) - else: - newStatus = '%s' % (results[i][3]) - page.append(''' - - - ''' % (results[i][5], results[i][2], results[i][0], results[i][2], results[i][1], newStatus)) - i = i+1 - page.append('''
Album NameRelease DateStatus
%s - (link)%s%s
''') - page.append(templates._footer) - return page - artistPage.exposed = True - - - def albumPage(self, AlbumID): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('''SELECT ArtistID, ArtistName, AlbumTitle, TrackTitle, TrackDuration, TrackID, AlbumASIN from tracks WHERE AlbumID="%s"''' % AlbumID) - results = c.fetchall() - if results[0][6]: - albumart = '''


''' % results[0][6] - else: - albumart = '' - c.close() - i = 0 - page.append('''''') - - - page.append(templates._footer) - return page - - albumPage.exposed = True - - - def findArtist(self, name): - - page = [templates._header] - if len(name) == 0 or name == 'Add an artist': - raise cherrypy.HTTPRedirect("home") - else: - artistResults = ws.Query().getArtists(ws.ArtistFilter(string.replace(name, '&', '%38'), limit=8)) - if len(artistResults) == 0: - logger.log(u"No results found for " + name) - page.append('''No results!Go back''') - return page - elif len(artistResults) > 1: - page.append('''Search returned multiple artists. Click the artist you want to add:

''') - for result in artistResults: - artist = result.artist - detail = artist.getDisambiguation() - if detail: - disambiguation = '(%s)' % detail - else: - disambiguation = '' - page.append('''%s %s (more info)
''' % (u.extractUuid(artist.id), artist.name, disambiguation, u.extractUuid(artist.id))) - return page - else: - for result in artistResults: - artist = result.artist - logger.log(u"Found one artist matching your search term: " + artist.name +" ("+ artist.id+")") - raise cherrypy.HTTPRedirect("addArtist?artistid=%s" % u.extractUuid(artist.id)) - - findArtist.exposed = True - - def artistInfo(self, artistid): - page = [templates._header] - inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), releaseGroups=True) - artist = ws.Query().getArtistById(artistid, inc) - page.append('''Artist Name: %s
''' % artist.name) - page.append('''Unique ID: %s

Albums:
''' % u.extractUuid(artist.id)) - for rg in artist.getReleaseGroups(): - page.append('''%s
''' % rg.title) - return page - - artistInfo.exposed = True - - def addArtist(self, artistid): - inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), releaseGroups=True) - artist = ws.Query().getArtistById(artistid, inc) - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('SELECT ArtistID from artists') - artistlist = c.fetchall() - if any(artistid in x for x in artistlist): - page = [templates._header] - page.append('''%s has already been added. Go back.''' % artist.name) - logger.log(artist.name + u" is already in the database!", logger.WARNING) - c.close() - return page - - else: - logger.log(u"Adding " + artist.name + " to the database.") - c.execute('INSERT INTO artists VALUES( ?, ?, ?, CURRENT_DATE, ?)', (artistid, artist.name, artist.sortName, 'Active')) - for rg in artist.getReleaseGroups(): - rgid = u.extractUuid(rg.id) - - releaseid = getReleaseGroup(rgid) - - inc = ws.ReleaseIncludes(artist=True, releaseEvents= True, tracks= True, releaseGroup=True) - results = ws.Query().getReleaseById(releaseid, inc) - - logger.log(u"Now adding album: " + results.title+ " to the database") - c.execute('INSERT INTO albums VALUES( ?, ?, ?, ?, ?, CURRENT_DATE, ?, ?)', (artistid, results.artist.name, results.title, results.asin, results.getEarliestReleaseDate(), u.extractUuid(results.id), 'Skipped')) - c.execute('SELECT ReleaseDate, DateAdded from albums WHERE AlbumID="%s"' % u.extractUuid(results.id)) - latestrelease = c.fetchall() - - if latestrelease[0][0] > latestrelease[0][1]: - logger.log(results.title + u" is an upcoming album. Setting its status to 'Wanted'...") - c.execute('UPDATE albums SET Status = "Wanted" WHERE AlbumID="%s"' % u.extractUuid(results.id)) - else: - pass - - for track in results.tracks: - c.execute('INSERT INTO tracks VALUES( ?, ?, ?, ?, ?, ?, ?, ?)', (artistid, results.artist.name, results.title, results.asin, u.extractUuid(results.id), track.title, track.duration, u.extractUuid(track.id))) - time.sleep(1) - - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("home") - - addArtist.exposed = True - - def pauseArtist(self, ArtistID): - - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Pausing artist: " + ArtistID) - c.execute('UPDATE artists SET status = "Paused" WHERE ArtistId="%s"' % ArtistID) - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("home") - - pauseArtist.exposed = True - - def resumeArtist(self, ArtistID): - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Resuming artist: " + ArtistID) - c.execute('UPDATE artists SET status = "Active" WHERE ArtistId="%s"' % ArtistID) - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("home") - - resumeArtist.exposed = True - - def deleteArtist(self, ArtistID): - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Deleting all traces of artist: " + ArtistID) - c.execute('''DELETE from artists WHERE ArtistID="%s"''' % ArtistID) - c.execute('''DELETE from albums WHERE ArtistID="%s"''' % ArtistID) - c.execute('''DELETE from tracks WHERE ArtistID="%s"''' % ArtistID) - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("home") - - deleteArtist.exposed = True - - def queueAlbum(self, AlbumID, ArtistID): - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Marking album: " + AlbumID + "as wanted...") - c.execute('UPDATE albums SET status = "Wanted" WHERE AlbumID="%s"' % AlbumID) - conn.commit() - c.close() - import searcher - searcher.searchNZB(AlbumID) - raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID) - - queueAlbum.exposed = True - - def unqueueAlbum(self, AlbumID, ArtistID): - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Marking album: " + AlbumID + "as skipped...") - c.execute('UPDATE albums SET status = "Skipped" WHERE AlbumID="%s"' % AlbumID) - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID) - - unqueueAlbum.exposed = True - - def upcoming(self): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - today = datetime.date.today() - todaysql = datetime.date.isoformat(today) - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('''SELECT AlbumTitle, ReleaseDate, DateAdded, AlbumASIN, AlbumID, ArtistName, ArtistID from albums WHERE ReleaseDate > date('now') order by ReleaseDate DESC''') - albums = c.fetchall() - page.append('''
- - - - - - ''') - if len(albums) == 0: - page.append("""
Upcoming Albums

No albums are coming out soon :(
- (try adding some more artists!)
""") - - i = 0 - while i < len(albums): - - if albums[i][3]: - albumart = '''


''' % (albums[i][3], albums[i][3]) - else: - albumart = 'No Album Art... yet.' - - page.append(''' - - - ''' % (albumart, albums[i][6], albums[i][5], albums[i][4], albums[i][0], albums[i][1])) - i += 1 - page.append('''
%s%s%s (%s)
''') - if len(albums): - page.append(templates._footer) - - return page - upcoming.exposed = True - - def manage(self): - config = configobj.ConfigObj(config_file) - try: - path = config['General']['path_to_xml'] - except: - path = 'Absolute path to iTunes XML or Top-Level Music Directory' - try: - path2 = config['General']['path_to_itunes'] - except: - path2 = 'Enter a directory to scan' - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - page.append(''' -

Scan Music Library


- Where do you keep your music?

- You can put in any directory, and it will scan for audio files in that folder - (including all subdirectories)

For example: '/Users/name/Music' -

- It may take a while depending on how many files you have. You can navigate away from the page
- as soon as you click 'Submit' -

- -
- -


-

Import or Sync Your iTunes Library/Music Folder


- This is here for legacy purposes (try the Music Scanner above!)

- If you'd rather import an iTunes .xml file, you can enter the full path here.

-
- -


- ''' % (path2, path)) - page.append(templates._footer) - return page - manage.exposed = True - - def importItunes(self, path): - config = configobj.ConfigObj(config_file) - config['General']['path_to_xml'] = path - config.write() - import itunesimport - itunesimport.itunesImport(path) - raise cherrypy.HTTPRedirect("home") - importItunes.exposed = True - - def musicScan(self, path): - config = configobj.ConfigObj(config_file) - config['General']['path_to_itunes'] = path - config.write() - import itunesimport - itunesimport.scanMusic(path) - raise cherrypy.HTTPRedirect("home") - musicScan.exposed = True - - def forceUpdate(self): - import updater - updater.dbUpdate() - raise cherrypy.HTTPRedirect("home") - forceUpdate.exposed = True - - def forceSearch(self): - import searcher - searcher.searchNZB() - raise cherrypy.HTTPRedirect("home") - forceSearch.exposed = True - - - def history(self): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('''SELECT AlbumID, Title TEXT, Size INTEGER, URL TEXT, DateAdded TEXT, Status TEXT from snatched order by DateAdded DESC''') - snatched = c.fetchall() - page.append('''
- - - - - - ''') - if len(snatched) == 0: - page.append("""
History clear all

""") - - i = 0 - while i < len(snatched): - mb = snatched[i][2] / 1048576 - size = '%.2fM' % mb - page.append(''' - - - - - ''' % (snatched[i][5], snatched[i][1], size, snatched[i][4])) - i += 1 - page.append('''
%s%s%s%s
''') - if len(snatched): - page.append(templates._footer) - return page - history.exposed = True - - def clearhistory(self): - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Clearing history") - c.execute('''DELETE from snatched''') - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("history") - clearhistory.exposed = True - - def config(self): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - page.append(config.form) - #page.append(templates._footer) - return page - - config.exposed = True - - - def configUpdate(self, http_host='127.0.0.1', http_username=None, http_port=8181, http_password=None, launch_browser=0, - sab_host=None, sab_username=None, sab_apikey=None, sab_password=None, sab_category=None, music_download_dir=None, - usenet_retention=None, nzbmatrix=0, nzbmatrix_username=None, nzbmatrix_apikey=None, newznab=0, newznab_host=None, newznab_apikey=None, - nzbsorg=0, nzbsorg_uid=None, nzbsorg_hash=None, include_lossless=0,flac_to_mp3=0, move_to_itunes=0, path_to_itunes=None, rename_mp3s=0, cleanup=0, add_album_art=0): - - configs = configobj.ConfigObj(config_file) - SABnzbd = configs['SABnzbd'] - General = configs['General'] - NZBMatrix = configs['NZBMatrix'] - Newznab = configs['Newznab'] - NZBsorg = configs['NZBsorg'] - General['http_host'] = http_host - General['http_port'] = http_port - General['http_username'] = http_username - General['http_password'] = http_password - General['launch_browser'] = launch_browser - SABnzbd['sab_host'] = sab_host - SABnzbd['sab_username'] = sab_username - SABnzbd['sab_password'] = sab_password - SABnzbd['sab_apikey'] = sab_apikey - SABnzbd['sab_category'] = sab_category - General['music_download_dir'] = music_download_dir - General['usenet_retention'] = usenet_retention - NZBMatrix['nzbmatrix'] = nzbmatrix - NZBMatrix['nzbmatrix_username'] = nzbmatrix_username - NZBMatrix['nzbmatrix_apikey'] = nzbmatrix_apikey - Newznab['newznab'] = newznab - Newznab['newznab_host'] = newznab_host - Newznab['newznab_apikey'] = newznab_apikey - NZBsorg['nzbsorg'] = nzbsorg - NZBsorg['nzbsorg_uid'] = nzbsorg_uid - NZBsorg['nzbsorg_hash'] = nzbsorg_hash - General['include_lossless'] = include_lossless - General['flac_to_mp3'] = flac_to_mp3 - General['move_to_itunes'] = move_to_itunes - General['path_to_itunes'] = path_to_itunes - General['rename_mp3s'] = rename_mp3s - General['cleanup'] = cleanup - General['add_album_art'] = add_album_art - - configs.write() - reload(config) - raise cherrypy.HTTPRedirect("config") - - - configUpdate.exposed = True - - def shutdown(self): - sys.exit(0) - - shutdown.exposed = True - - def restart(self): - logger.log(u"Restarting Headphones.") - restart = True - #answer = raw_input("Do you want to restart this program ? ") - #if answer.strip() in "y Y yes Yes YES".split(): - #restart = True - if restart: - python = sys.executable - os.execl(python, python, * sys.argv) - - restart.exposed = True \ No newline at end of file