diff --git a/.gitignore b/.gitignore index 939db295..f1b2e7cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,17 @@ +# Compiled source # +################### *.pyc -*.log \ No newline at end of file + +# Logs and databases # +###################### +*.log +*.db +*.ini +logs/* + +# OS generated files # +###################### +.DS_Store? +ehthumbs.db +Icon? +Thumbs.db \ No newline at end of file diff --git a/apscheduler/__init__.py b/apscheduler/__init__.py deleted file mode 100644 index 6b502147..00000000 --- a/apscheduler/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -version_info = (2, 0, 0, 'rc', 2) -version = '.'.join(str(n) for n in version_info[:3]) -release = version + ''.join(str(n) for n in version_info[3:]) diff --git a/apscheduler/events.py b/apscheduler/events.py deleted file mode 100644 index 80bde8e6..00000000 --- a/apscheduler/events.py +++ /dev/null @@ -1,64 +0,0 @@ -__all__ = ('EVENT_SCHEDULER_START', 'EVENT_SCHEDULER_SHUTDOWN', - 'EVENT_JOBSTORE_ADDED', 'EVENT_JOBSTORE_REMOVED', - 'EVENT_JOBSTORE_JOB_ADDED', 'EVENT_JOBSTORE_JOB_REMOVED', - 'EVENT_JOB_EXECUTED', 'EVENT_JOB_ERROR', 'EVENT_JOB_MISSED', - 'EVENT_ALL', 'SchedulerEvent', 'JobStoreEvent', 'JobEvent') - - -EVENT_SCHEDULER_START = 1 # The scheduler was started -EVENT_SCHEDULER_SHUTDOWN = 2 # The scheduler was shut down -EVENT_JOBSTORE_ADDED = 4 # A job store was added to the scheduler -EVENT_JOBSTORE_REMOVED = 8 # A job store was removed from the scheduler -EVENT_JOBSTORE_JOB_ADDED = 16 # A job was added to a job store -EVENT_JOBSTORE_JOB_REMOVED = 32 # A job was removed from a job store -EVENT_JOB_EXECUTED = 64 # A job was executed successfully -EVENT_JOB_ERROR = 128 # A job raised an exception during execution -EVENT_JOB_MISSED = 256 # A job's execution was missed -EVENT_ALL = (EVENT_SCHEDULER_START | EVENT_SCHEDULER_SHUTDOWN | - EVENT_JOBSTORE_ADDED | EVENT_JOBSTORE_REMOVED | - EVENT_JOBSTORE_JOB_ADDED | EVENT_JOBSTORE_JOB_REMOVED | - EVENT_JOB_EXECUTED | EVENT_JOB_ERROR | EVENT_JOB_MISSED) - - -class SchedulerEvent(object): - """ - An event that concerns the scheduler itself. - - :var code: the type code of this event - """ - def __init__(self, code): - self.code = code - - -class JobStoreEvent(SchedulerEvent): - """ - An event that concerns job stores. - - :var alias: the alias of the job store involved - :var job: the new job if a job was added - """ - def __init__(self, code, alias, job=None): - SchedulerEvent.__init__(self, code) - self.alias = alias - if job: - self.job = job - - -class JobEvent(SchedulerEvent): - """ - An event that concerns the execution of individual jobs. - - :var job: the job instance in question - :var scheduled_run_time: the time when the job was scheduled to be run - :var retval: the return value of the successfully executed job - :var exception: the exception raised by the job - :var traceback: the traceback object associated with the exception - """ - def __init__(self, code, job, scheduled_run_time, retval=None, - exception=None, traceback=None): - SchedulerEvent.__init__(self, code) - self.job = job - self.scheduled_run_time = scheduled_run_time - self.retval = retval - self.exception = exception - self.traceback = traceback diff --git a/apscheduler/job.py b/apscheduler/job.py deleted file mode 100644 index 868e7234..00000000 --- a/apscheduler/job.py +++ /dev/null @@ -1,134 +0,0 @@ -""" -Jobs represent scheduled tasks. -""" - -from threading import Lock -from datetime import timedelta - -from apscheduler.util import to_unicode, ref_to_obj, get_callable_name,\ - obj_to_ref - - -class MaxInstancesReachedError(Exception): - pass - - -class Job(object): - """ - Encapsulates the actual Job along with its metadata. Job instances - are created by the scheduler when adding jobs, and it should not be - directly instantiated. - - :param trigger: trigger that determines the execution times - :param func: callable to call when the trigger is triggered - :param args: list of positional arguments to call func with - :param kwargs: dict of keyword arguments to call func with - :param name: name of the job (optional) - :param misfire_grace_time: seconds after the designated run time that - the job is still allowed to be run - :param coalesce: run once instead of many times if the scheduler determines - that the job should be run more than once in succession - :param max_runs: maximum number of times this job is allowed to be - triggered - :param max_instances: maximum number of concurrently running - instances allowed for this job - """ - id = None - next_run_time = None - - def __init__(self, trigger, func, args, kwargs, misfire_grace_time, - coalesce, name=None, max_runs=None, max_instances=1): - if not trigger: - raise ValueError('The trigger must not be None') - if not hasattr(func, '__call__'): - raise TypeError('func must be callable') - if not hasattr(args, '__getitem__'): - raise TypeError('args must be a list-like object') - if not hasattr(kwargs, '__getitem__'): - raise TypeError('kwargs must be a dict-like object') - if misfire_grace_time <= 0: - raise ValueError('misfire_grace_time must be a positive value') - if max_runs is not None and max_runs <= 0: - raise ValueError('max_runs must be a positive value') - if max_instances <= 0: - raise ValueError('max_instances must be a positive value') - - self._lock = Lock() - - self.trigger = trigger - self.func = func - self.args = args - self.kwargs = kwargs - self.name = to_unicode(name or get_callable_name(func)) - self.misfire_grace_time = misfire_grace_time - self.coalesce = coalesce - self.max_runs = max_runs - self.max_instances = max_instances - self.runs = 0 - self.instances = 0 - - def compute_next_run_time(self, now): - if self.runs == self.max_runs: - self.next_run_time = None - else: - self.next_run_time = self.trigger.get_next_fire_time(now) - - return self.next_run_time - - def get_run_times(self, now): - """ - Computes the scheduled run times between ``next_run_time`` and ``now``. - """ - run_times = [] - run_time = self.next_run_time - increment = timedelta(microseconds=1) - while ((not self.max_runs or self.runs < self.max_runs) and - run_time and run_time <= now): - run_times.append(run_time) - run_time = self.trigger.get_next_fire_time(run_time + increment) - - return run_times - - def add_instance(self): - self._lock.acquire() - try: - if self.instances == self.max_instances: - raise MaxInstancesReachedError - self.instances += 1 - finally: - self._lock.release() - - def remove_instance(self): - self._lock.acquire() - try: - assert self.instances > 0, 'Already at 0 instances' - self.instances -= 1 - finally: - self._lock.release() - - def __getstate__(self): - # Prevents the unwanted pickling of transient or unpicklable variables - state = self.__dict__.copy() - state.pop('instances', None) - state.pop('func', None) - state.pop('_lock', None) - state['func_ref'] = obj_to_ref(self.func) - return state - - def __setstate__(self, state): - state['instances'] = 0 - state['func'] = ref_to_obj(state.pop('func_ref')) - state['_lock'] = Lock() - self.__dict__ = state - - def __eq__(self, other): - if isinstance(other, Job): - return self.id is not None and other.id == self.id or self is other - return NotImplemented - - def __repr__(self): - return '' % (self.name, repr(self.trigger)) - - def __str__(self): - return '%s (trigger: %s, next run at: %s)' % (self.name, - str(self.trigger), str(self.next_run_time)) diff --git a/apscheduler/jobstores/__init__.py b/apscheduler/jobstores/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/apscheduler/jobstores/base.py b/apscheduler/jobstores/base.py deleted file mode 100644 index f0a16ddb..00000000 --- a/apscheduler/jobstores/base.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Abstract base class that provides the interface needed by all job stores. -Job store methods are also documented here. -""" - - -class JobStore(object): - def add_job(self, job): - """Adds the given job from this store.""" - raise NotImplementedError - - def update_job(self, job): - """Persists the running state of the given job.""" - raise NotImplementedError - - def remove_job(self, job): - """Removes the given jobs from this store.""" - raise NotImplementedError - - def load_jobs(self): - """Loads jobs from this store into memory.""" - raise NotImplementedError - - def close(self): - """Frees any resources still bound to this job store.""" diff --git a/apscheduler/jobstores/mongodb_store.py b/apscheduler/jobstores/mongodb_store.py deleted file mode 100644 index 3f522c25..00000000 --- a/apscheduler/jobstores/mongodb_store.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Stores jobs in a MongoDB database. -""" -import logging - -from apscheduler.jobstores.base import JobStore -from apscheduler.job import Job - -try: - import cPickle as pickle -except ImportError: # pragma: nocover - import pickle - -try: - from bson.binary import Binary - from pymongo.connection import Connection -except ImportError: # pragma: nocover - raise ImportError('MongoDBJobStore requires PyMongo installed') - -logger = logging.getLogger(__name__) - - -class MongoDBJobStore(JobStore): - def __init__(self, database='apscheduler', collection='jobs', - connection=None, pickle_protocol=pickle.HIGHEST_PROTOCOL, - **connect_args): - self.jobs = [] - self.pickle_protocol = pickle_protocol - - if not database: - raise ValueError('The "database" parameter must not be empty') - if not collection: - raise ValueError('The "collection" parameter must not be empty') - - if connection: - self.connection = connection - else: - self.connection = Connection(**connect_args) - - self.collection = self.connection[database][collection] - - def add_job(self, job): - job_dict = job.__getstate__() - job_dict['trigger'] = Binary(pickle.dumps(job.trigger, - self.pickle_protocol)) - job_dict['args'] = Binary(pickle.dumps(job.args, - self.pickle_protocol)) - job_dict['kwargs'] = Binary(pickle.dumps(job.kwargs, - self.pickle_protocol)) - job.id = self.collection.insert(job_dict) - self.jobs.append(job) - - def remove_job(self, job): - self.collection.remove(job.id) - self.jobs.remove(job) - - def load_jobs(self): - jobs = [] - for job_dict in self.collection.find(): - try: - job = Job.__new__(Job) - job_dict['id'] = job_dict.pop('_id') - job_dict['trigger'] = pickle.loads(job_dict['trigger']) - job_dict['args'] = pickle.loads(job_dict['args']) - job_dict['kwargs'] = pickle.loads(job_dict['kwargs']) - job.__setstate__(job_dict) - jobs.append(job) - except Exception: - job_name = job_dict.get('name', '(unknown)') - logger.exception('Unable to restore job "%s"', job_name) - self.jobs = jobs - - def update_job(self, job): - spec = {'_id': job.id} - document = {'$set': {'next_run_time': job.next_run_time}, - '$inc': {'runs': 1}} - self.collection.update(spec, document) - - def close(self): - self.connection.disconnect() - - def __repr__(self): - connection = self.collection.database.connection - return '<%s (connection=%s)>' % (self.__class__.__name__, connection) diff --git a/apscheduler/jobstores/ram_store.py b/apscheduler/jobstores/ram_store.py deleted file mode 100644 index 85091fe8..00000000 --- a/apscheduler/jobstores/ram_store.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Stores jobs in an array in RAM. Provides no persistence support. -""" - -from apscheduler.jobstores.base import JobStore - - -class RAMJobStore(JobStore): - def __init__(self): - self.jobs = [] - - def add_job(self, job): - self.jobs.append(job) - - def update_job(self, job): - pass - - def remove_job(self, job): - self.jobs.remove(job) - - def load_jobs(self): - pass - - def __repr__(self): - return '<%s>' % (self.__class__.__name__) diff --git a/apscheduler/jobstores/shelve_store.py b/apscheduler/jobstores/shelve_store.py deleted file mode 100644 index 87c95f8f..00000000 --- a/apscheduler/jobstores/shelve_store.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -Stores jobs in a file governed by the :mod:`shelve` module. -""" - -import shelve -import pickle -import random -import logging - -from apscheduler.jobstores.base import JobStore -from apscheduler.job import Job -from apscheduler.util import itervalues - -logger = logging.getLogger(__name__) - - -class ShelveJobStore(JobStore): - MAX_ID = 1000000 - - def __init__(self, path, pickle_protocol=pickle.HIGHEST_PROTOCOL): - self.jobs = [] - self.path = path - self.pickle_protocol = pickle_protocol - self.store = shelve.open(path, 'c', self.pickle_protocol) - - def _generate_id(self): - id = None - while not id: - id = str(random.randint(1, self.MAX_ID)) - if not id in self.store: - return id - - def add_job(self, job): - job.id = self._generate_id() - self.jobs.append(job) - self.store[job.id] = job.__getstate__() - - def update_job(self, job): - job_dict = self.store[job.id] - job_dict['next_run_time'] = job.next_run_time - job_dict['runs'] = job.runs - self.store[job.id] = job_dict - - def remove_job(self, job): - del self.store[job.id] - self.jobs.remove(job) - - def load_jobs(self): - jobs = [] - for job_dict in itervalues(self.store): - try: - job = Job.__new__(Job) - job.__setstate__(job_dict) - jobs.append(job) - except Exception: - job_name = job_dict.get('name', '(unknown)') - logger.exception('Unable to restore job "%s"', job_name) - - self.jobs = jobs - - def close(self): - self.store.close() - - def __repr__(self): - return '<%s (path=%s)>' % (self.__class__.__name__, self.path) diff --git a/apscheduler/jobstores/sqlalchemy_store.py b/apscheduler/jobstores/sqlalchemy_store.py deleted file mode 100644 index 8ece7e24..00000000 --- a/apscheduler/jobstores/sqlalchemy_store.py +++ /dev/null @@ -1,87 +0,0 @@ -""" -Stores jobs in a database table using SQLAlchemy. -""" -import pickle -import logging - -from apscheduler.jobstores.base import JobStore -from apscheduler.job import Job - -try: - from sqlalchemy import * -except ImportError: # pragma: nocover - raise ImportError('SQLAlchemyJobStore requires SQLAlchemy installed') - -logger = logging.getLogger(__name__) - - -class SQLAlchemyJobStore(JobStore): - def __init__(self, url=None, engine=None, tablename='apscheduler_jobs', - metadata=None, pickle_protocol=pickle.HIGHEST_PROTOCOL): - self.jobs = [] - self.pickle_protocol = pickle_protocol - - if engine: - self.engine = engine - elif url: - self.engine = create_engine(url) - else: - raise ValueError('Need either "engine" or "url" defined') - - self.jobs_t = Table(tablename, metadata or MetaData(), - Column('id', Integer, - Sequence(tablename + '_id_seq', optional=True), - primary_key=True), - Column('trigger', PickleType(pickle_protocol, mutable=False), - nullable=False), - Column('func_ref', String(1024), nullable=False), - Column('args', PickleType(pickle_protocol, mutable=False), - nullable=False), - Column('kwargs', PickleType(pickle_protocol, mutable=False), - nullable=False), - Column('name', Unicode(1024), unique=True), - Column('misfire_grace_time', Integer, nullable=False), - Column('coalesce', Boolean, nullable=False), - Column('max_runs', Integer), - Column('max_instances', Integer), - Column('next_run_time', DateTime, nullable=False), - Column('runs', BigInteger)) - - self.jobs_t.create(self.engine, True) - - def add_job(self, job): - job_dict = job.__getstate__() - result = self.engine.execute(self.jobs_t.insert().values(**job_dict)) - job.id = result.inserted_primary_key[0] - self.jobs.append(job) - - def remove_job(self, job): - delete = self.jobs_t.delete().where(self.jobs_t.c.id == job.id) - self.engine.execute(delete) - self.jobs.remove(job) - - def load_jobs(self): - jobs = [] - for row in self.engine.execute(select([self.jobs_t])): - try: - job = Job.__new__(Job) - job_dict = dict(row.items()) - job.__setstate__(job_dict) - jobs.append(job) - except Exception: - job_name = job_dict.get('name', '(unknown)') - logger.exception('Unable to restore job "%s"', job_name) - self.jobs = jobs - - def update_job(self, job): - job_dict = job.__getstate__() - update = self.jobs_t.update().where(self.jobs_t.c.id == job.id).\ - values(next_run_time=job_dict['next_run_time'], - runs=job_dict['runs']) - self.engine.execute(update) - - def close(self): - self.engine.dispose() - - def __repr__(self): - return '<%s (url=%s)>' % (self.__class__.__name__, self.engine.url) diff --git a/apscheduler/scheduler.py b/apscheduler/scheduler.py deleted file mode 100644 index ee08ad8b..00000000 --- a/apscheduler/scheduler.py +++ /dev/null @@ -1,559 +0,0 @@ -""" -This module is the main part of the library. It houses the Scheduler class -and related exceptions. -""" - -from threading import Thread, Event, Lock -from datetime import datetime, timedelta -from logging import getLogger -import os -import sys - -from apscheduler.util import * -from apscheduler.triggers import SimpleTrigger, IntervalTrigger, CronTrigger -from apscheduler.jobstores.ram_store import RAMJobStore -from apscheduler.job import Job, MaxInstancesReachedError -from apscheduler.events import * -from apscheduler.threadpool import ThreadPool - -logger = getLogger(__name__) - - -class SchedulerAlreadyRunningError(Exception): - """ - Raised when attempting to start or configure the scheduler when it's - already running. - """ - - def __str__(self): - return 'Scheduler is already running' - - -class Scheduler(object): - """ - This class is responsible for scheduling jobs and triggering - their execution. - """ - - _stopped = False - _thread = None - - def __init__(self, gconfig={}, **options): - self._wakeup = Event() - self._jobstores = {} - self._jobstores_lock = Lock() - self._listeners = [] - self._listeners_lock = Lock() - self._pending_jobs = [] - self.configure(gconfig, **options) - - def configure(self, gconfig={}, **options): - """ - Reconfigures the scheduler with the given options. Can only be done - when the scheduler isn't running. - """ - if self.running: - raise SchedulerAlreadyRunningError - - # Set general options - config = combine_opts(gconfig, 'apscheduler.', options) - self.misfire_grace_time = int(config.pop('misfire_grace_time', 1)) - self.coalesce = asbool(config.pop('coalesce', True)) - self.daemonic = asbool(config.pop('daemonic', True)) - - # Configure the thread pool - if 'threadpool' in config: - self._threadpool = maybe_ref(config['threadpool']) - else: - threadpool_opts = combine_opts(config, 'threadpool.') - self._threadpool = ThreadPool(**threadpool_opts) - - # Configure job stores - jobstore_opts = combine_opts(config, 'jobstore.') - jobstores = {} - for key, value in jobstore_opts.items(): - store_name, option = key.split('.', 1) - opts_dict = jobstores.setdefault(store_name, {}) - opts_dict[option] = value - - for alias, opts in jobstores.items(): - classname = opts.pop('class') - cls = maybe_ref(classname) - jobstore = cls(**opts) - self.add_jobstore(jobstore, alias, True) - - def start(self): - """ - Starts the scheduler in a new thread. - """ - if self.running: - raise SchedulerAlreadyRunningError - - # Create a RAMJobStore as the default if there is no default job store - if not 'default' in self._jobstores: - self.add_jobstore(RAMJobStore(), 'default', True) - - # Schedule all pending jobs - for job, jobstore in self._pending_jobs: - self._real_add_job(job, jobstore, False) - del self._pending_jobs[:] - - self._stopped = False - self._thread = Thread(target=self._main_loop, name='APScheduler') - self._thread.setDaemon(self.daemonic) - self._thread.start() - - def shutdown(self, wait=True, shutdown_threadpool=True): - """ - Shuts down the scheduler and terminates the thread. - Does not interrupt any currently running jobs. - - :param wait: ``True`` to wait until all currently executing jobs have - finished (if ``shutdown_threadpool`` is also ``True``) - :param shutdown_threadpool: ``True`` to shut down the thread pool - """ - if not self.running: - return - - self._stopped = True - self._wakeup.set() - - # Shut down the thread pool - if shutdown_threadpool: - self._threadpool.shutdown(wait) - - # Wait until the scheduler thread terminates - self._thread.join() - - @property - def running(self): - return not self._stopped and self._thread and self._thread.isAlive() - - def add_jobstore(self, jobstore, alias, quiet=False): - """ - Adds a job store to this scheduler. - - :param jobstore: job store to be added - :param alias: alias for the job store - :param quiet: True to suppress scheduler thread wakeup - :type jobstore: instance of - :class:`~apscheduler.jobstores.base.JobStore` - :type alias: str - """ - self._jobstores_lock.acquire() - try: - if alias in self._jobstores: - raise KeyError('Alias "%s" is already in use' % alias) - self._jobstores[alias] = jobstore - jobstore.load_jobs() - finally: - self._jobstores_lock.release() - - # Notify listeners that a new job store has been added - self._notify_listeners(JobStoreEvent(EVENT_JOBSTORE_ADDED, alias)) - - # Notify the scheduler so it can scan the new job store for jobs - if not quiet: - self._wakeup.set() - - def remove_jobstore(self, alias): - """ - Removes the job store by the given alias from this scheduler. - - :type alias: str - """ - self._jobstores_lock.acquire() - try: - try: - del self._jobstores[alias] - except KeyError: - raise KeyError('No such job store: %s' % alias) - finally: - self._jobstores_lock.release() - - # Notify listeners that a job store has been removed - self._notify_listeners(JobStoreEvent(EVENT_JOBSTORE_REMOVED, alias)) - - def add_listener(self, callback, mask=EVENT_ALL): - """ - Adds a listener for scheduler events. When a matching event occurs, - ``callback`` is executed with the event object as its sole argument. - If the ``mask`` parameter is not provided, the callback will receive - events of all types. - - :param callback: any callable that takes one argument - :param mask: bitmask that indicates which events should be listened to - """ - self._listeners_lock.acquire() - try: - self._listeners.append((callback, mask)) - finally: - self._listeners_lock.release() - - def remove_listener(self, callback): - """ - Removes a previously added event listener. - """ - self._listeners_lock.acquire() - try: - for i, (cb, _) in enumerate(self._listeners): - if callback == cb: - del self._listeners[i] - finally: - self._listeners_lock.release() - - def _notify_listeners(self, event): - self._listeners_lock.acquire() - try: - listeners = tuple(self._listeners) - finally: - self._listeners_lock.release() - - for cb, mask in listeners: - if event.code & mask: - try: - cb(event) - except: - logger.exception('Error notifying listener') - - def _real_add_job(self, job, jobstore, wakeup): - job.compute_next_run_time(datetime.now()) - if not job.next_run_time: - raise ValueError('Not adding job since it would never be run') - - self._jobstores_lock.acquire() - try: - try: - store = self._jobstores[jobstore] - except KeyError: - raise KeyError('No such job store: %s' % jobstore) - store.add_job(job) - finally: - self._jobstores_lock.release() - - # Notify listeners that a new job has been added - event = JobStoreEvent(EVENT_JOBSTORE_JOB_ADDED, jobstore, job) - self._notify_listeners(event) - - logger.info('Added job "%s" to job store "%s"', job, jobstore) - - # Notify the scheduler about the new job - if wakeup: - self._wakeup.set() - - def add_job(self, trigger, func, args, kwargs, jobstore='default', - **options): - """ - Adds the given job to the job list and notifies the scheduler thread. - - :param trigger: alias of the job store to store the job in - :param func: callable to run at the given time - :param args: list of positional arguments to call func with - :param kwargs: dict of keyword arguments to call func with - :param jobstore: alias of the job store to store the job in - :rtype: :class:`~apscheduler.job.Job` - """ - job = Job(trigger, func, args or [], kwargs or {}, - options.pop('misfire_grace_time', self.misfire_grace_time), - options.pop('coalesce', self.coalesce), **options) - if not self.running: - self._pending_jobs.append((job, jobstore)) - logger.info('Adding job tentatively -- it will be properly ' - 'scheduled when the scheduler starts') - else: - self._real_add_job(job, jobstore, True) - return job - - def _remove_job(self, job, alias, jobstore): - jobstore.remove_job(job) - - # Notify listeners that a job has been removed - event = JobStoreEvent(EVENT_JOBSTORE_JOB_REMOVED, alias, job) - self._notify_listeners(event) - - logger.info('Removed job "%s"', job) - - def add_date_job(self, func, date, args=None, kwargs=None, **options): - """ - Schedules a job to be completed on a specific date and time. - - :param func: callable to run at the given time - :param date: the date/time to run the job at - :param name: name of the job - :param jobstore: stored the job in the named (or given) job store - :param misfire_grace_time: seconds after the designated run time that - the job is still allowed to be run - :type date: :class:`datetime.date` - :rtype: :class:`~apscheduler.job.Job` - """ - trigger = SimpleTrigger(date) - return self.add_job(trigger, func, args, kwargs, **options) - - def add_interval_job(self, func, weeks=0, days=0, hours=0, minutes=0, - seconds=0, start_date=None, args=None, kwargs=None, - **options): - """ - Schedules a job to be completed on specified intervals. - - :param func: callable to run - :param weeks: number of weeks to wait - :param days: number of days to wait - :param hours: number of hours to wait - :param minutes: number of minutes to wait - :param seconds: number of seconds to wait - :param start_date: when to first execute the job and start the - counter (default is after the given interval) - :param args: list of positional arguments to call func with - :param kwargs: dict of keyword arguments to call func with - :param name: name of the job - :param jobstore: alias of the job store to add the job to - :param misfire_grace_time: seconds after the designated run time that - the job is still allowed to be run - :rtype: :class:`~apscheduler.job.Job` - """ - interval = timedelta(weeks=weeks, days=days, hours=hours, - minutes=minutes, seconds=seconds) - trigger = IntervalTrigger(interval, start_date) - return self.add_job(trigger, func, args, kwargs, **options) - - def add_cron_job(self, func, year='*', month='*', day='*', week='*', - day_of_week='*', hour='*', minute='*', second='*', - start_date=None, args=None, kwargs=None, **options): - """ - Schedules a job to be completed on times that match the given - expressions. - - :param func: callable to run - :param year: year to run on - :param month: month to run on (0 = January) - :param day: day of month to run on - :param week: week of the year to run on - :param day_of_week: weekday to run on (0 = Monday) - :param hour: hour to run on - :param second: second to run on - :param args: list of positional arguments to call func with - :param kwargs: dict of keyword arguments to call func with - :param name: name of the job - :param jobstore: alias of the job store to add the job to - :param misfire_grace_time: seconds after the designated run time that - the job is still allowed to be run - :return: the scheduled job - :rtype: :class:`~apscheduler.job.Job` - """ - trigger = CronTrigger(year=year, month=month, day=day, week=week, - day_of_week=day_of_week, hour=hour, - minute=minute, second=second, - start_date=start_date) - return self.add_job(trigger, func, args, kwargs, **options) - - def cron_schedule(self, **options): - """ - Decorator version of :meth:`add_cron_job`. - This decorator does not wrap its host function. - Unscheduling decorated functions is possible by passing the ``job`` - attribute of the scheduled function to :meth:`unschedule_job`. - """ - def inner(func): - func.job = self.add_cron_job(func, **options) - return func - return inner - - def interval_schedule(self, **options): - """ - Decorator version of :meth:`add_interval_job`. - This decorator does not wrap its host function. - Unscheduling decorated functions is possible by passing the ``job`` - attribute of the scheduled function to :meth:`unschedule_job`. - """ - def inner(func): - func.job = self.add_interval_job(func, **options) - return func - return inner - - def get_jobs(self): - """ - Returns a list of all scheduled jobs. - - :return: list of :class:`~apscheduler.job.Job` objects - """ - self._jobstores_lock.acquire() - try: - jobs = [] - for jobstore in itervalues(self._jobstores): - jobs.extend(jobstore.jobs) - return jobs - finally: - self._jobstores_lock.release() - - def unschedule_job(self, job): - """ - Removes a job, preventing it from being run any more. - """ - self._jobstores_lock.acquire() - try: - for alias, jobstore in iteritems(self._jobstores): - if job in list(jobstore.jobs): - self._remove_job(job, alias, jobstore) - return - finally: - self._jobstores_lock.release() - - raise KeyError('Job "%s" is not scheduled in any job store' % job) - - def unschedule_func(self, func): - """ - Removes all jobs that would execute the given function. - """ - found = False - self._jobstores_lock.acquire() - try: - for alias, jobstore in iteritems(self._jobstores): - for job in list(jobstore.jobs): - if job.func == func: - self._remove_job(job, alias, jobstore) - found = True - finally: - self._jobstores_lock.release() - - if not found: - raise KeyError('The given function is not scheduled in this ' - 'scheduler') - - def print_jobs(self, out=None): - """ - Prints out a textual listing of all jobs currently scheduled on this - scheduler. - - :param out: a file-like object to print to (defaults to **sys.stdout** - if nothing is given) - """ - out = out or sys.stdout - job_strs = [] - self._jobstores_lock.acquire() - try: - for alias, jobstore in iteritems(self._jobstores): - job_strs.append('Jobstore %s:' % alias) - if jobstore.jobs: - for job in jobstore.jobs: - job_strs.append(' %s' % job) - else: - job_strs.append(' No scheduled jobs') - finally: - self._jobstores_lock.release() - - out.write(os.linesep.join(job_strs)) - - def _run_job(self, job, run_times): - """ - Acts as a harness that runs the actual job code in a thread. - """ - for run_time in run_times: - # See if the job missed its run time window, and handle possible - # misfires accordingly - difference = datetime.now() - run_time - grace_time = timedelta(seconds=job.misfire_grace_time) - if difference > grace_time: - # Notify listeners about a missed run - event = JobEvent(EVENT_JOB_MISSED, job, run_time) - self._notify_listeners(event) - logger.warning('Run time of job "%s" was missed by %s', - job, difference) - else: - try: - job.add_instance() - except MaxInstancesReachedError: - event = JobEvent(EVENT_JOB_MISSED, job, run_time) - self._notify_listeners(event) - logger.warning('Execution of job "%s" skipped: ' - 'maximum number of running instances ' - 'reached (%d)', job, job.max_instances) - break - - logger.info('Running job "%s" (scheduled at %s)', job, - run_time) - - try: - retval = job.func(*job.args, **job.kwargs) - except: - # Notify listeners about the exception - exc, tb = sys.exc_info()[1:] - event = JobEvent(EVENT_JOB_ERROR, job, run_time, - exception=exc, traceback=tb) - self._notify_listeners(event) - - logger.exception('Job "%s" raised an exception', job) - else: - # Notify listeners about successful execution - event = JobEvent(EVENT_JOB_EXECUTED, job, run_time, - retval=retval) - self._notify_listeners(event) - - logger.info('Job "%s" executed successfully', job) - - job.remove_instance() - - # If coalescing is enabled, don't attempt any further runs - if job.coalesce: - break - - def _process_jobs(self, now): - """ - Iterates through jobs in every jobstore, starts pending jobs - and figures out the next wakeup time. - """ - next_wakeup_time = None - self._jobstores_lock.acquire() - try: - for alias, jobstore in iteritems(self._jobstores): - for job in tuple(jobstore.jobs): - run_times = job.get_run_times(now) - if run_times: - self._threadpool.submit(self._run_job, job, run_times) - - # Increase the job's run count - if job.coalesce: - job.runs += 1 - else: - job.runs += len(run_times) - - # Update the job, but don't keep finished jobs around - if job.compute_next_run_time(now + timedelta(microseconds=1)): - jobstore.update_job(job) - else: - self._remove_job(job, alias, jobstore) - - if not next_wakeup_time: - next_wakeup_time = job.next_run_time - elif job.next_run_time: - next_wakeup_time = min(next_wakeup_time, - job.next_run_time) - return next_wakeup_time - finally: - self._jobstores_lock.release() - - def _main_loop(self): - """Executes jobs on schedule.""" - - logger.info('Scheduler started') - self._notify_listeners(SchedulerEvent(EVENT_SCHEDULER_START)) - - self._wakeup.clear() - while not self._stopped: - logger.debug('Looking for jobs to run') - now = datetime.now() - next_wakeup_time = self._process_jobs(now) - - # Sleep until the next job is scheduled to be run, - # a new job is added or the scheduler is stopped - if next_wakeup_time is not None: - wait_seconds = time_difference(next_wakeup_time, now) - logger.debug('Next wakeup is due at %s (in %f seconds)', - next_wakeup_time, wait_seconds) - self._wakeup.wait(wait_seconds) - else: - logger.debug('No jobs; waiting until a job is added') - self._wakeup.wait() - self._wakeup.clear() - - logger.info('Scheduler has been shut down') - self._notify_listeners(SchedulerEvent(EVENT_SCHEDULER_SHUTDOWN)) diff --git a/apscheduler/threadpool.py b/apscheduler/threadpool.py deleted file mode 100644 index 8ec47da0..00000000 --- a/apscheduler/threadpool.py +++ /dev/null @@ -1,133 +0,0 @@ -""" -Generic thread pool class. Modeled after Java's ThreadPoolExecutor. -Please note that this ThreadPool does *not* fully implement the PEP 3148 -ThreadPool! -""" - -from threading import Thread, Lock, currentThread -from weakref import ref -import logging -import atexit - -try: - from queue import Queue, Empty -except ImportError: - from Queue import Queue, Empty - -logger = logging.getLogger(__name__) -_threadpools = set() - - -# Worker threads are daemonic in order to let the interpreter exit without -# an explicit shutdown of the thread pool. The following trick is necessary -# to allow worker threads to finish cleanly. -def _shutdown_all(): - for pool_ref in tuple(_threadpools): - pool = pool_ref() - if pool: - pool.shutdown() - -atexit.register(_shutdown_all) - - -class ThreadPool(object): - def __init__(self, core_threads=0, max_threads=20, keepalive=1): - """ - :param core_threads: maximum number of persistent threads in the pool - :param max_threads: maximum number of total threads in the pool - :param thread_class: callable that creates a Thread object - :param keepalive: seconds to keep non-core worker threads waiting - for new tasks - """ - self.core_threads = core_threads - self.max_threads = max(max_threads, core_threads, 1) - self.keepalive = keepalive - self._queue = Queue() - self._threads_lock = Lock() - self._threads = set() - self._shutdown = False - - _threadpools.add(ref(self)) - logger.info('Started thread pool with %d core threads and %s maximum ' - 'threads', core_threads, max_threads or 'unlimited') - - def _adjust_threadcount(self): - self._threads_lock.acquire() - try: - if self.num_threads < self.max_threads: - self._add_thread(self.num_threads < self.core_threads) - finally: - self._threads_lock.release() - - def _add_thread(self, core): - t = Thread(target=self._run_jobs, args=(core,)) - t.setDaemon(True) - t.start() - self._threads.add(t) - - def _run_jobs(self, core): - logger.debug('Started worker thread') - block = True - timeout = None - if not core: - block = self.keepalive > 0 - timeout = self.keepalive - - while True: - try: - func, args, kwargs = self._queue.get(block, timeout) - except Empty: - break - - if self._shutdown: - break - - try: - func(*args, **kwargs) - except: - logger.exception('Error in worker thread') - - self._threads_lock.acquire() - self._threads.remove(currentThread()) - self._threads_lock.release() - - logger.debug('Exiting worker thread') - - @property - def num_threads(self): - return len(self._threads) - - def submit(self, func, *args, **kwargs): - if self._shutdown: - raise RuntimeError('Cannot schedule new tasks after shutdown') - - self._queue.put((func, args, kwargs)) - self._adjust_threadcount() - - def shutdown(self, wait=True): - if self._shutdown: - return - - logging.info('Shutting down thread pool') - self._shutdown = True - _threadpools.remove(ref(self)) - - self._threads_lock.acquire() - for _ in range(self.num_threads): - self._queue.put((None, None, None)) - self._threads_lock.release() - - if wait: - self._threads_lock.acquire() - threads = tuple(self._threads) - self._threads_lock.release() - for thread in threads: - thread.join() - - def __repr__(self): - if self.max_threads: - threadcount = '%d/%d' % (self.num_threads, self.max_threads) - else: - threadcount = '%d' % self.num_threads - - return '' % (id(self), threadcount) diff --git a/apscheduler/triggers/__init__.py b/apscheduler/triggers/__init__.py deleted file mode 100644 index 74a97884..00000000 --- a/apscheduler/triggers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from apscheduler.triggers.cron import CronTrigger -from apscheduler.triggers.interval import IntervalTrigger -from apscheduler.triggers.simple import SimpleTrigger diff --git a/apscheduler/triggers/cron/__init__.py b/apscheduler/triggers/cron/__init__.py deleted file mode 100644 index 3f8d9a8f..00000000 --- a/apscheduler/triggers/cron/__init__.py +++ /dev/null @@ -1,135 +0,0 @@ -from datetime import date, datetime - -from apscheduler.triggers.cron.fields import * -from apscheduler.util import datetime_ceil, convert_to_datetime - - -class CronTrigger(object): - FIELD_NAMES = ('year', 'month', 'day', 'week', 'day_of_week', 'hour', - 'minute', 'second') - FIELDS_MAP = {'year': BaseField, - 'month': BaseField, - 'week': WeekField, - 'day': DayOfMonthField, - 'day_of_week': DayOfWeekField, - 'hour': BaseField, - 'minute': BaseField, - 'second': BaseField} - - def __init__(self, **values): - self.start_date = values.pop('start_date', None) - if self.start_date: - self.start_date = convert_to_datetime(self.start_date) - - self.fields = [] - for field_name in self.FIELD_NAMES: - if field_name in values: - exprs = values.pop(field_name) - is_default = False - elif not values: - exprs = DEFAULT_VALUES[field_name] - is_default = True - else: - exprs = '*' - is_default = True - - field_class = self.FIELDS_MAP[field_name] - field = field_class(field_name, exprs, is_default) - self.fields.append(field) - - def _increment_field_value(self, dateval, fieldnum): - """ - Increments the designated field and resets all less significant fields - to their minimum values. - - :type dateval: datetime - :type fieldnum: int - :type amount: int - :rtype: tuple - :return: a tuple containing the new date, and the number of the field - that was actually incremented - """ - i = 0 - values = {} - while i < len(self.fields): - field = self.fields[i] - if not field.REAL: - if i == fieldnum: - fieldnum -= 1 - i -= 1 - else: - i += 1 - continue - - if i < fieldnum: - values[field.name] = field.get_value(dateval) - i += 1 - elif i > fieldnum: - values[field.name] = field.get_min(dateval) - i += 1 - else: - value = field.get_value(dateval) - maxval = field.get_max(dateval) - if value == maxval: - fieldnum -= 1 - i -= 1 - else: - values[field.name] = value + 1 - i += 1 - - return datetime(**values), fieldnum - - def _set_field_value(self, dateval, fieldnum, new_value): - values = {} - for i, field in enumerate(self.fields): - if field.REAL: - if i < fieldnum: - values[field.name] = field.get_value(dateval) - elif i > fieldnum: - values[field.name] = field.get_min(dateval) - else: - values[field.name] = new_value - - return datetime(**values) - - def get_next_fire_time(self, start_date): - if self.start_date: - start_date = max(start_date, self.start_date) - next_date = datetime_ceil(start_date) - fieldnum = 0 - while 0 <= fieldnum < len(self.fields): - field = self.fields[fieldnum] - curr_value = field.get_value(next_date) - next_value = field.get_next_value(next_date) - - if next_value is None: - # No valid value was found - next_date, fieldnum = self._increment_field_value(next_date, - fieldnum - 1) - elif next_value > curr_value: - # A valid, but higher than the starting value, was found - if field.REAL: - next_date = self._set_field_value(next_date, fieldnum, - next_value) - fieldnum += 1 - else: - next_date, fieldnum = self._increment_field_value(next_date, - fieldnum) - else: - # A valid value was found, no changes necessary - fieldnum += 1 - - if fieldnum >= 0: - return next_date - - def __str__(self): - options = ["%s='%s'" % (f.name, str(f)) for f in self.fields - if not f.is_default] - return 'cron[%s]' % (', '.join(options)) - - def __repr__(self): - options = ["%s='%s'" % (f.name, str(f)) for f in self.fields - if not f.is_default] - if self.start_date: - options.append("start_date='%s'" % self.start_date.isoformat(' ')) - return '<%s (%s)>' % (self.__class__.__name__, ', '.join(options)) diff --git a/apscheduler/triggers/cron/expressions.py b/apscheduler/triggers/cron/expressions.py deleted file mode 100644 index 018c7a30..00000000 --- a/apscheduler/triggers/cron/expressions.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -This module contains the expressions applicable for CronTrigger's fields. -""" - -from calendar import monthrange -import re - -from apscheduler.util import asint - -__all__ = ('AllExpression', 'RangeExpression', 'WeekdayRangeExpression', - 'WeekdayPositionExpression') - - -WEEKDAYS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] - - -class AllExpression(object): - value_re = re.compile(r'\*(?:/(?P\d+))?$') - - def __init__(self, step=None): - self.step = asint(step) - if self.step == 0: - raise ValueError('Increment must be higher than 0') - - def get_next_value(self, date, field): - start = field.get_value(date) - minval = field.get_min(date) - maxval = field.get_max(date) - start = max(start, minval) - - if not self.step: - next = start - else: - distance_to_next = (self.step - (start - minval)) % self.step - next = start + distance_to_next - - if next <= maxval: - return next - - def __str__(self): - if self.step: - return '*/%d' % self.step - return '*' - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self.step) - - -class RangeExpression(AllExpression): - value_re = re.compile( - r'(?P\d+)(?:-(?P\d+))?(?:/(?P\d+))?$') - - def __init__(self, first, last=None, step=None): - AllExpression.__init__(self, step) - first = asint(first) - last = asint(last) - if last is None and step is None: - last = first - if last is not None and first > last: - raise ValueError('The minimum value in a range must not be ' - 'higher than the maximum') - self.first = first - self.last = last - - def get_next_value(self, date, field): - start = field.get_value(date) - minval = field.get_min(date) - maxval = field.get_max(date) - - # Apply range limits - minval = max(minval, self.first) - if self.last is not None: - maxval = min(maxval, self.last) - start = max(start, minval) - - if not self.step: - next = start - else: - distance_to_next = (self.step - (start - minval)) % self.step - next = start + distance_to_next - - if next <= maxval: - return next - - def __str__(self): - if self.last != self.first and self.last is not None: - range = '%d-%d' % (self.first, self.last) - else: - range = str(self.first) - - if self.step: - return '%s/%d' % (range, self.step) - return range - - def __repr__(self): - args = [str(self.first)] - if self.last != self.first and self.last is not None or self.step: - args.append(str(self.last)) - if self.step: - args.append(str(self.step)) - return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) - - -class WeekdayRangeExpression(RangeExpression): - value_re = re.compile(r'(?P[a-z]+)(?:-(?P[a-z]+))?', - re.IGNORECASE) - - def __init__(self, first, last=None): - try: - first_num = WEEKDAYS.index(first.lower()) - except ValueError: - raise ValueError('Invalid weekday name "%s"' % first) - - if last: - try: - last_num = WEEKDAYS.index(last.lower()) - except ValueError: - raise ValueError('Invalid weekday name "%s"' % last) - else: - last_num = None - - RangeExpression.__init__(self, first_num, last_num) - - def __str__(self): - if self.last != self.first and self.last is not None: - return '%s-%s' % (WEEKDAYS[self.first], WEEKDAYS[self.last]) - return WEEKDAYS[self.first] - - def __repr__(self): - args = ["'%s'" % WEEKDAYS[self.first]] - if self.last != self.first and self.last is not None: - args.append("'%s'" % WEEKDAYS[self.last]) - return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) - - -class WeekdayPositionExpression(AllExpression): - options = ['1st', '2nd', '3rd', '4th', '5th', 'last'] - value_re = re.compile(r'(?P%s) +(?P(?:\d+|\w+))' - % '|'.join(options), re.IGNORECASE) - - def __init__(self, option_name, weekday_name): - try: - self.option_num = self.options.index(option_name.lower()) - except ValueError: - raise ValueError('Invalid weekday position "%s"' % option_name) - - try: - self.weekday = WEEKDAYS.index(weekday_name.lower()) - except ValueError: - raise ValueError('Invalid weekday name "%s"' % weekday_name) - - def get_next_value(self, date, field): - # Figure out the weekday of the month's first day and the number - # of days in that month - first_day_wday, last_day = monthrange(date.year, date.month) - - # Calculate which day of the month is the first of the target weekdays - first_hit_day = self.weekday - first_day_wday + 1 - if first_hit_day <= 0: - first_hit_day += 7 - - # Calculate what day of the month the target weekday would be - if self.option_num < 5: - target_day = first_hit_day + self.option_num * 7 - else: - target_day = first_hit_day + ((last_day - first_hit_day) / 7) * 7 - - if target_day <= last_day and target_day >= date.day: - return target_day - - def __str__(self): - return '%s %s' % (self.options[self.option_num], - WEEKDAYS[self.weekday]) - - def __repr__(self): - return "%s('%s', '%s')" % (self.__class__.__name__, - self.options[self.option_num], - WEEKDAYS[self.weekday]) diff --git a/apscheduler/triggers/cron/fields.py b/apscheduler/triggers/cron/fields.py deleted file mode 100644 index ef970cc9..00000000 --- a/apscheduler/triggers/cron/fields.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -Fields represent CronTrigger options which map to :class:`~datetime.datetime` -fields. -""" - -from calendar import monthrange - -from apscheduler.triggers.cron.expressions import * - -__all__ = ('MIN_VALUES', 'MAX_VALUES', 'DEFAULT_VALUES', 'BaseField', - 'WeekField', 'DayOfMonthField', 'DayOfWeekField') - - -MIN_VALUES = {'year': 1970, 'month': 1, 'day': 1, 'week': 1, - 'day_of_week': 0, 'hour': 0, 'minute': 0, 'second': 0} -MAX_VALUES = {'year': 2 ** 63, 'month': 12, 'day:': 31, 'week': 53, - 'day_of_week': 6, 'hour': 23, 'minute': 59, 'second': 59} -DEFAULT_VALUES = {'year': '*', 'month': 1, 'day': 1, 'week': '*', - 'day_of_week': '*', 'hour': 0, 'minute': 0, 'second': 0} - - -class BaseField(object): - REAL = True - COMPILERS = [AllExpression, RangeExpression] - - def __init__(self, name, exprs, is_default=False): - self.name = name - self.is_default = is_default - self.compile_expressions(exprs) - - def get_min(self, dateval): - return MIN_VALUES[self.name] - - def get_max(self, dateval): - return MAX_VALUES[self.name] - - def get_value(self, dateval): - return getattr(dateval, self.name) - - def get_next_value(self, dateval): - smallest = None - for expr in self.expressions: - value = expr.get_next_value(dateval, self) - if smallest is None or (value is not None and value < smallest): - smallest = value - - return smallest - - def compile_expressions(self, exprs): - self.expressions = [] - - # Split a comma-separated expression list, if any - exprs = str(exprs).strip() - if ',' in exprs: - for expr in exprs.split(','): - self.compile_expression(expr) - else: - self.compile_expression(exprs) - - def compile_expression(self, expr): - for compiler in self.COMPILERS: - match = compiler.value_re.match(expr) - if match: - compiled_expr = compiler(**match.groupdict()) - self.expressions.append(compiled_expr) - return - - raise ValueError('Unrecognized expression "%s" for field "%s"' % - (expr, self.name)) - - def __str__(self): - expr_strings = (str(e) for e in self.expressions) - return ','.join(expr_strings) - - def __repr__(self): - return "%s('%s', '%s')" % (self.__class__.__name__, self.name, - str(self)) - - -class WeekField(BaseField): - REAL = False - - def get_value(self, dateval): - return dateval.isocalendar()[1] - - -class DayOfMonthField(BaseField): - COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression] - - def get_max(self, dateval): - return monthrange(dateval.year, dateval.month)[1] - - -class DayOfWeekField(BaseField): - REAL = False - COMPILERS = BaseField.COMPILERS + [WeekdayRangeExpression] - - def get_value(self, dateval): - return dateval.weekday() diff --git a/apscheduler/triggers/interval.py b/apscheduler/triggers/interval.py deleted file mode 100644 index dd16d777..00000000 --- a/apscheduler/triggers/interval.py +++ /dev/null @@ -1,39 +0,0 @@ -from datetime import datetime, timedelta -from math import ceil - -from apscheduler.util import convert_to_datetime, timedelta_seconds - - -class IntervalTrigger(object): - def __init__(self, interval, start_date=None): - if not isinstance(interval, timedelta): - raise TypeError('interval must be a timedelta') - if start_date: - start_date = convert_to_datetime(start_date) - - self.interval = interval - self.interval_length = timedelta_seconds(self.interval) - if self.interval_length == 0: - self.interval = timedelta(seconds=1) - self.interval_length = 1 - - if start_date is None: - self.start_date = datetime.now() + self.interval - else: - self.start_date = convert_to_datetime(start_date) - - def get_next_fire_time(self, start_date): - if start_date < self.start_date: - return self.start_date - - timediff_seconds = timedelta_seconds(start_date - self.start_date) - next_interval_num = int(ceil(timediff_seconds / self.interval_length)) - return self.start_date + self.interval * next_interval_num - - def __str__(self): - return 'interval[%s]' % str(self.interval) - - def __repr__(self): - return "<%s (interval=%s, start_date=%s)>" % ( - self.__class__.__name__, repr(self.interval), - repr(self.start_date)) diff --git a/apscheduler/triggers/simple.py b/apscheduler/triggers/simple.py deleted file mode 100644 index ea61b3f1..00000000 --- a/apscheduler/triggers/simple.py +++ /dev/null @@ -1,17 +0,0 @@ -from apscheduler.util import convert_to_datetime - - -class SimpleTrigger(object): - def __init__(self, run_date): - self.run_date = convert_to_datetime(run_date) - - def get_next_fire_time(self, start_date): - if self.run_date >= start_date: - return self.run_date - - def __str__(self): - return 'date[%s]' % str(self.run_date) - - def __repr__(self): - return '<%s (run_date=%s)>' % ( - self.__class__.__name__, repr(self.run_date)) diff --git a/apscheduler/util.py b/apscheduler/util.py deleted file mode 100644 index af28ae49..00000000 --- a/apscheduler/util.py +++ /dev/null @@ -1,204 +0,0 @@ -""" -This module contains several handy functions primarily meant for internal use. -""" - -from datetime import date, datetime, timedelta -from time import mktime -import re -import sys - -__all__ = ('asint', 'asbool', 'convert_to_datetime', 'timedelta_seconds', - 'time_difference', 'datetime_ceil', 'combine_opts', - 'get_callable_name', 'obj_to_ref', 'ref_to_obj', 'maybe_ref', - 'to_unicode', 'iteritems', 'itervalues', 'xrange') - - -def asint(text): - """ - Safely converts a string to an integer, returning None if the string - is None. - - :type text: str - :rtype: int - """ - if text is not None: - return int(text) - - -def asbool(obj): - """ - Interprets an object as a boolean value. - - :rtype: bool - """ - if isinstance(obj, str): - obj = obj.strip().lower() - if obj in ('true', 'yes', 'on', 'y', 't', '1'): - return True - if obj in ('false', 'no', 'off', 'n', 'f', '0'): - return False - raise ValueError('Unable to interpret value "%s" as boolean' % obj) - return bool(obj) - - -_DATE_REGEX = re.compile( - r'(?P\d{4})-(?P\d{1,2})-(?P\d{1,2})' - r'(?: (?P\d{1,2}):(?P\d{1,2}):(?P\d{1,2})' - r'(?:\.(?P\d{1,6}))?)?') - - -def convert_to_datetime(input): - """ - Converts the given object to a datetime object, if possible. - If an actual datetime object is passed, it is returned unmodified. - If the input is a string, it is parsed as a datetime. - - Date strings are accepted in three different forms: date only (Y-m-d), - date with time (Y-m-d H:M:S) or with date+time with microseconds - (Y-m-d H:M:S.micro). - - :rtype: datetime - """ - if isinstance(input, datetime): - return input - elif isinstance(input, date): - return datetime.fromordinal(input.toordinal()) - elif isinstance(input, str): - m = _DATE_REGEX.match(input) - if not m: - raise ValueError('Invalid date string') - values = [(k, int(v or 0)) for k, v in m.groupdict().items()] - values = dict(values) - return datetime(**values) - raise TypeError('Unsupported input type: %s' % type(input)) - - -def timedelta_seconds(delta): - """ - Converts the given timedelta to seconds. - - :type delta: timedelta - :rtype: float - """ - return delta.days * 24 * 60 * 60 + delta.seconds + \ - delta.microseconds / 1000000.0 - - -def time_difference(date1, date2): - """ - Returns the time difference in seconds between the given two - datetime objects. The difference is calculated as: date1 - date2. - - :param date1: the later datetime - :type date1: datetime - :param date2: the earlier datetime - :type date2: datetime - :rtype: float - """ - later = mktime(date1.timetuple()) + date1.microsecond / 1000000.0 - earlier = mktime(date2.timetuple()) + date2.microsecond / 1000000.0 - return later - earlier - - -def datetime_ceil(dateval): - """ - Rounds the given datetime object upwards. - - :type dateval: datetime - """ - if dateval.microsecond > 0: - return dateval + timedelta(seconds=1, - microseconds=-dateval.microsecond) - return dateval - - -def combine_opts(global_config, prefix, local_config={}): - """ - Returns a subdictionary from keys and values of ``global_config`` where - the key starts with the given prefix, combined with options from - local_config. The keys in the subdictionary have the prefix removed. - - :type global_config: dict - :type prefix: str - :type local_config: dict - :rtype: dict - """ - prefixlen = len(prefix) - subconf = {} - for key, value in global_config.items(): - if key.startswith(prefix): - key = key[prefixlen:] - subconf[key] = value - subconf.update(local_config) - return subconf - - -def get_callable_name(func): - """ - Returns the best available display name for the given function/callable. - """ - name = func.__module__ - if hasattr(func, '__self__') and func.__self__: - name += '.' + func.__self__.__name__ - elif hasattr(func, 'im_self') and func.im_self: # py2.4, 2.5 - name += '.' + func.im_self.__name__ - if hasattr(func, '__name__'): - name += '.' + func.__name__ - return name - - -def obj_to_ref(obj): - """ - Returns the path to the given object. - """ - ref = '%s:%s' % (obj.__module__, obj.__name__) - try: - obj2 = ref_to_obj(ref) - except AttributeError: - pass - else: - if obj2 == obj: - return ref - - raise ValueError('Only module level objects are supported') - - -def ref_to_obj(ref): - """ - Returns the object pointed to by ``ref``. - """ - modulename, rest = ref.split(':', 1) - obj = __import__(modulename) - for name in modulename.split('.')[1:] + rest.split('.'): - obj = getattr(obj, name) - return obj - - -def maybe_ref(ref): - """ - Returns the object that the given reference points to, if it is indeed - a reference. If it is not a reference, the object is returned as-is. - """ - if not isinstance(ref, str): - return ref - return ref_to_obj(ref) - - -def to_unicode(string, encoding='ascii'): - """ - Safely converts a string to a unicode representation on any - Python version. - """ - if hasattr(string, 'decode'): - return string.decode(encoding, 'ignore') - return string - - -if sys.version_info < (3, 0): # pragma: nocover - iteritems = lambda d: d.iteritems() - itervalues = lambda d: d.itervalues() - xrange = xrange -else: # pragma: nocover - iteritems = lambda d: d.items() - itervalues = lambda d: d.values() - xrange = range diff --git a/config.py b/config.py deleted file mode 100644 index 3b95eb78..00000000 --- a/config.py +++ /dev/null @@ -1,290 +0,0 @@ -import os -from configobj import ConfigObj -from headphones import config_file - -config = ConfigObj(config_file) - -General = config['General'] -http_host = General['http_host'] -http_port = General['http_port'] -http_username = General['http_username'] -http_password = General['http_password'] -try: - http_root = General['http_root'] -except KeyError: - General['http_root'] = '' - config.write() -launch_browser = General['launch_browser'] -usenet_retention = General['usenet_retention'] -include_lossless = General['include_lossless'] -flac_to_mp3 = General['flac_to_mp3'] -move_to_itunes = General['move_to_itunes'] -path_to_itunes = General['path_to_itunes'] -rename_mp3s = General['rename_mp3s'] -cleanup = General['cleanup'] -add_album_art = General['add_album_art'] -music_download_dir = General['music_download_dir'] -NZBMatrix = config['NZBMatrix'] -nzbmatrix = NZBMatrix['nzbmatrix'] -nzbmatrix_username = NZBMatrix['nzbmatrix_username'] -nzbmatrix_apikey = NZBMatrix['nzbmatrix_apikey'] -Newznab = config['Newznab'] -newznab = Newznab['newznab'] -newznab_host = Newznab['newznab_host'] -newznab_apikey = Newznab['newznab_apikey'] -NZBsorg = config['NZBsorg'] -nzbsorg = NZBsorg['nzbsorg'] -nzbsorg_uid = NZBsorg['nzbsorg_uid'] -nzbsorg_hash = NZBsorg['nzbsorg_hash'] -SABnzbd = config['SABnzbd'] -sab_username = SABnzbd['sab_username'] -sab_password = SABnzbd['sab_password'] -sab_apikey = SABnzbd['sab_apikey'] -sab_category = SABnzbd['sab_category'] -sab_host = SABnzbd['sab_host'] - -def var_to_chk(variable): - if variable == '1': - return 'Checked' - else: - return '' - -form = ''' -
-
- -
-
-
-
-

Web Interface

- - - - - - - - - - - - - - - - -
-

- HTTP Host:

-
- i.e. localhost or 0.0.0.0 -

-
-

- HTTP Username:

- -

-
-

- HTTP Port:

- -

-
-

- HTTP Password:

- -

-
-

Launch Browser on Startup:

-
- -

Download Settings

- - - - - - - - - - - - - - - - - - - - - - - -
-

SABnzbd Host:


- - usually localhost:8080 -
-

SABnzbd Username:

-
-
- -

SABnzbd API:

-
-
- -

SABnzbd Password:

-
-
- -

SABnzbd Category:

-
-
- -

Music Download Directory:


- - Absolute or relative path to the dir where SAB downloads your music
- i.e. Downloads/music or /Users/name/Downloads/music
-
-
- -

Usenet Retention:

-
- -

Search Providers

- - - - - - - - - - - - - - - - - - - - - - - - - -
-

NZBMatrix:

-
-

- NZBMatrix Username:
- -

-
-

- NZBMatrix API:
- -

-
-
- -

Newznab:

-
-
- -

- Newznab Host:
-
- i.e. http://nzb.su -

-
-
- -

- Newznab API:
- -

-
-
- -

NZBs.org:

-
-
- -

- NZBs.org UID:
- -

-
-
- -

- NZBs.org Hash:
- -

-
- -

Quality & Post Processing

- - - - - - - - - - - - - - - - -
-

Album Quality:

- Include lossless
- Convert lossless to mp3 -
-

-

iTunes:

- Move downloads to iTunes -

-
-
- -

Path to Music folder:
-
- i.e. /Users/name/Music/iTunes or /Volumes/share/music -

-
- Renaming & Metadata: -

- Rename & add metadata -
- Delete leftover files -

-
-
-

Album Art:

- Add album art -
- -


- (For now, all changes require a restart to take effect)

-
-
-
''' % (http_host, http_username, http_port, http_password, var_to_chk(launch_browser), sab_host, sab_username, sab_apikey, sab_password, sab_category, music_download_dir, usenet_retention, var_to_chk(nzbmatrix), nzbmatrix_username, nzbmatrix_apikey, var_to_chk(newznab), newznab_host, newznab_apikey, var_to_chk(nzbsorg), nzbsorg_uid, nzbsorg_hash, var_to_chk(include_lossless), var_to_chk(flac_to_mp3), var_to_chk(move_to_itunes), path_to_itunes, var_to_chk(rename_mp3s), var_to_chk(cleanup), var_to_chk(add_album_art)) - diff --git a/configcreate.py b/configcreate.py deleted file mode 100644 index df8cc035..00000000 --- a/configcreate.py +++ /dev/null @@ -1,41 +0,0 @@ -from configobj import ConfigObj - -def configCreate(path): - config = ConfigObj() - config.filename = path - config['General'] = {} - config['General']['http_host'] = '0.0.0.0' - config['General']['http_port'] = 8181 - config['General']['http_username'] = '' - config['General']['http_password'] = '' - config['General']['http_root'] = '' - config['General']['launch_browser'] = 1 - config['General']['include_lossless'] = 0 - config['General']['flac_to_mp3'] = 0 - config['General']['move_to_itunes'] = 0 - config['General']['path_to_itunes'] = '' - config['General']['rename_mp3s'] = 0 - config['General']['cleanup'] = 0 - config['General']['add_album_art'] = 0 - config['General']['music_download_dir'] = '' - config['General']['usenet_retention'] = 500 - config['SABnzbd'] = {} - config['SABnzbd']['sab_host'] = '' - config['SABnzbd']['sab_username'] = '' - config['SABnzbd']['sab_password'] = '' - config['SABnzbd']['sab_apikey'] = '' - config['SABnzbd']['sab_category'] = '' - config['NZBMatrix'] = {} - config['NZBMatrix']['nzbmatrix'] = 0 - config['NZBMatrix']['nzbmatrix_username'] = '' - config['NZBMatrix']['nzbmatrix_apikey'] = '' - config['Newznab'] = {} - config['Newznab']['newznab'] = 0 - config['Newznab']['newznab_host'] = '' - config['Newznab']['newznab_apikey'] = '' - config['NZBsorg'] = {} - config['NZBsorg']['nzbsorg'] = 0 - config['NZBsorg']['nzbsorg_uid'] = '' - config['NZBsorg']['nzbsorg_hash'] = '' - - config.write() \ No newline at end of file diff --git a/configobj.py b/configobj.py deleted file mode 100644 index c1f6e6df..00000000 --- a/configobj.py +++ /dev/null @@ -1,2468 +0,0 @@ -# configobj.py -# A config file reader/writer that supports nested sections in config files. -# Copyright (C) 2005-2010 Michael Foord, Nicola Larosa -# E-mail: fuzzyman AT voidspace DOT org DOT uk -# nico AT tekNico DOT net - -# ConfigObj 4 -# http://www.voidspace.org.uk/python/configobj.html - -# Released subject to the BSD License -# Please see http://www.voidspace.org.uk/python/license.shtml - -# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml -# For information about bugfixes, updates and support, please join the -# ConfigObj mailing list: -# http://lists.sourceforge.net/lists/listinfo/configobj-develop -# Comments, suggestions and bug reports welcome. - -from __future__ import generators - -import os -import re -import sys - -from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF16_BE, BOM_UTF16_LE - - -# imported lazily to avoid startup performance hit if it isn't used -compiler = None - -# A dictionary mapping BOM to -# the encoding to decode with, and what to set the -# encoding attribute to. -BOMS = { - BOM_UTF8: ('utf_8', None), - BOM_UTF16_BE: ('utf16_be', 'utf_16'), - BOM_UTF16_LE: ('utf16_le', 'utf_16'), - BOM_UTF16: ('utf_16', 'utf_16'), - } -# All legal variants of the BOM codecs. -# TODO: the list of aliases is not meant to be exhaustive, is there a -# better way ? -BOM_LIST = { - 'utf_16': 'utf_16', - 'u16': 'utf_16', - 'utf16': 'utf_16', - 'utf-16': 'utf_16', - 'utf16_be': 'utf16_be', - 'utf_16_be': 'utf16_be', - 'utf-16be': 'utf16_be', - 'utf16_le': 'utf16_le', - 'utf_16_le': 'utf16_le', - 'utf-16le': 'utf16_le', - 'utf_8': 'utf_8', - 'u8': 'utf_8', - 'utf': 'utf_8', - 'utf8': 'utf_8', - 'utf-8': 'utf_8', - } - -# Map of encodings to the BOM to write. -BOM_SET = { - 'utf_8': BOM_UTF8, - 'utf_16': BOM_UTF16, - 'utf16_be': BOM_UTF16_BE, - 'utf16_le': BOM_UTF16_LE, - None: BOM_UTF8 - } - - -def match_utf8(encoding): - return BOM_LIST.get(encoding.lower()) == 'utf_8' - - -# Quote strings used for writing values -squot = "'%s'" -dquot = '"%s"' -noquot = "%s" -wspace_plus = ' \r\n\v\t\'"' -tsquot = '"""%s"""' -tdquot = "'''%s'''" - -# Sentinel for use in getattr calls to replace hasattr -MISSING = object() - -__version__ = '4.7.2' - -try: - any -except NameError: - def any(iterable): - for entry in iterable: - if entry: - return True - return False - - -__all__ = ( - '__version__', - 'DEFAULT_INDENT_TYPE', - 'DEFAULT_INTERPOLATION', - 'ConfigObjError', - 'NestingError', - 'ParseError', - 'DuplicateError', - 'ConfigspecError', - 'ConfigObj', - 'SimpleVal', - 'InterpolationError', - 'InterpolationLoopError', - 'MissingInterpolationOption', - 'RepeatSectionError', - 'ReloadError', - 'UnreprError', - 'UnknownType', - 'flatten_errors', - 'get_extra_values' -) - -DEFAULT_INTERPOLATION = 'configparser' -DEFAULT_INDENT_TYPE = ' ' -MAX_INTERPOL_DEPTH = 10 - -OPTION_DEFAULTS = { - 'interpolation': True, - 'raise_errors': False, - 'list_values': True, - 'create_empty': False, - 'file_error': False, - 'configspec': None, - 'stringify': True, - # option may be set to one of ('', ' ', '\t') - 'indent_type': None, - 'encoding': None, - 'default_encoding': None, - 'unrepr': False, - 'write_empty_values': False, -} - - - -def getObj(s): - global compiler - if compiler is None: - import compiler - s = "a=" + s - p = compiler.parse(s) - return p.getChildren()[1].getChildren()[0].getChildren()[1] - - -class UnknownType(Exception): - pass - - -class Builder(object): - - def build(self, o): - m = getattr(self, 'build_' + o.__class__.__name__, None) - if m is None: - raise UnknownType(o.__class__.__name__) - return m(o) - - def build_List(self, o): - return map(self.build, o.getChildren()) - - def build_Const(self, o): - return o.value - - def build_Dict(self, o): - d = {} - i = iter(map(self.build, o.getChildren())) - for el in i: - d[el] = i.next() - return d - - def build_Tuple(self, o): - return tuple(self.build_List(o)) - - def build_Name(self, o): - if o.name == 'None': - return None - if o.name == 'True': - return True - if o.name == 'False': - return False - - # An undefined Name - raise UnknownType('Undefined Name') - - def build_Add(self, o): - real, imag = map(self.build_Const, o.getChildren()) - try: - real = float(real) - except TypeError: - raise UnknownType('Add') - if not isinstance(imag, complex) or imag.real != 0.0: - raise UnknownType('Add') - return real+imag - - def build_Getattr(self, o): - parent = self.build(o.expr) - return getattr(parent, o.attrname) - - def build_UnarySub(self, o): - return -self.build_Const(o.getChildren()[0]) - - def build_UnaryAdd(self, o): - return self.build_Const(o.getChildren()[0]) - - -_builder = Builder() - - -def unrepr(s): - if not s: - return s - return _builder.build(getObj(s)) - - - -class ConfigObjError(SyntaxError): - """ - This is the base class for all errors that ConfigObj raises. - It is a subclass of SyntaxError. - """ - def __init__(self, message='', line_number=None, line=''): - self.line = line - self.line_number = line_number - SyntaxError.__init__(self, message) - - -class NestingError(ConfigObjError): - """ - This error indicates a level of nesting that doesn't match. - """ - - -class ParseError(ConfigObjError): - """ - This error indicates that a line is badly written. - It is neither a valid ``key = value`` line, - nor a valid section marker line. - """ - - -class ReloadError(IOError): - """ - A 'reload' operation failed. - This exception is a subclass of ``IOError``. - """ - def __init__(self): - IOError.__init__(self, 'reload failed, filename is not set.') - - -class DuplicateError(ConfigObjError): - """ - The keyword or section specified already exists. - """ - - -class ConfigspecError(ConfigObjError): - """ - An error occured whilst parsing a configspec. - """ - - -class InterpolationError(ConfigObjError): - """Base class for the two interpolation errors.""" - - -class InterpolationLoopError(InterpolationError): - """Maximum interpolation depth exceeded in string interpolation.""" - - def __init__(self, option): - InterpolationError.__init__( - self, - 'interpolation loop detected in value "%s".' % option) - - -class RepeatSectionError(ConfigObjError): - """ - This error indicates additional sections in a section with a - ``__many__`` (repeated) section. - """ - - -class MissingInterpolationOption(InterpolationError): - """A value specified for interpolation was missing.""" - def __init__(self, option): - msg = 'missing option "%s" in interpolation.' % option - InterpolationError.__init__(self, msg) - - -class UnreprError(ConfigObjError): - """An error parsing in unrepr mode.""" - - - -class InterpolationEngine(object): - """ - A helper class to help perform string interpolation. - - This class is an abstract base class; its descendants perform - the actual work. - """ - - # compiled regexp to use in self.interpolate() - _KEYCRE = re.compile(r"%\(([^)]*)\)s") - _cookie = '%' - - def __init__(self, section): - # the Section instance that "owns" this engine - self.section = section - - - def interpolate(self, key, value): - # short-cut - if not self._cookie in value: - return value - - def recursive_interpolate(key, value, section, backtrail): - """The function that does the actual work. - - ``value``: the string we're trying to interpolate. - ``section``: the section in which that string was found - ``backtrail``: a dict to keep track of where we've been, - to detect and prevent infinite recursion loops - - This is similar to a depth-first-search algorithm. - """ - # Have we been here already? - if (key, section.name) in backtrail: - # Yes - infinite loop detected - raise InterpolationLoopError(key) - # Place a marker on our backtrail so we won't come back here again - backtrail[(key, section.name)] = 1 - - # Now start the actual work - match = self._KEYCRE.search(value) - while match: - # The actual parsing of the match is implementation-dependent, - # so delegate to our helper function - k, v, s = self._parse_match(match) - if k is None: - # That's the signal that no further interpolation is needed - replacement = v - else: - # Further interpolation may be needed to obtain final value - replacement = recursive_interpolate(k, v, s, backtrail) - # Replace the matched string with its final value - start, end = match.span() - value = ''.join((value[:start], replacement, value[end:])) - new_search_start = start + len(replacement) - # Pick up the next interpolation key, if any, for next time - # through the while loop - match = self._KEYCRE.search(value, new_search_start) - - # Now safe to come back here again; remove marker from backtrail - del backtrail[(key, section.name)] - - return value - - # Back in interpolate(), all we have to do is kick off the recursive - # function with appropriate starting values - value = recursive_interpolate(key, value, self.section, {}) - return value - - - def _fetch(self, key): - """Helper function to fetch values from owning section. - - Returns a 2-tuple: the value, and the section where it was found. - """ - # switch off interpolation before we try and fetch anything ! - save_interp = self.section.main.interpolation - self.section.main.interpolation = False - - # Start at section that "owns" this InterpolationEngine - current_section = self.section - while True: - # try the current section first - val = current_section.get(key) - if val is not None and not isinstance(val, Section): - break - # try "DEFAULT" next - val = current_section.get('DEFAULT', {}).get(key) - if val is not None and not isinstance(val, Section): - break - # move up to parent and try again - # top-level's parent is itself - if current_section.parent is current_section: - # reached top level, time to give up - break - current_section = current_section.parent - - # restore interpolation to previous value before returning - self.section.main.interpolation = save_interp - if val is None: - raise MissingInterpolationOption(key) - return val, current_section - - - def _parse_match(self, match): - """Implementation-dependent helper function. - - Will be passed a match object corresponding to the interpolation - key we just found (e.g., "%(foo)s" or "$foo"). Should look up that - key in the appropriate config file section (using the ``_fetch()`` - helper function) and return a 3-tuple: (key, value, section) - - ``key`` is the name of the key we're looking for - ``value`` is the value found for that key - ``section`` is a reference to the section where it was found - - ``key`` and ``section`` should be None if no further - interpolation should be performed on the resulting value - (e.g., if we interpolated "$$" and returned "$"). - """ - raise NotImplementedError() - - - -class ConfigParserInterpolation(InterpolationEngine): - """Behaves like ConfigParser.""" - _cookie = '%' - _KEYCRE = re.compile(r"%\(([^)]*)\)s") - - def _parse_match(self, match): - key = match.group(1) - value, section = self._fetch(key) - return key, value, section - - - -class TemplateInterpolation(InterpolationEngine): - """Behaves like string.Template.""" - _cookie = '$' - _delimiter = '$' - _KEYCRE = re.compile(r""" - \$(?: - (?P\$) | # Two $ signs - (?P[_a-z][_a-z0-9]*) | # $name format - {(?P[^}]*)} # ${name} format - ) - """, re.IGNORECASE | re.VERBOSE) - - def _parse_match(self, match): - # Valid name (in or out of braces): fetch value from section - key = match.group('named') or match.group('braced') - if key is not None: - value, section = self._fetch(key) - return key, value, section - # Escaped delimiter (e.g., $$): return single delimiter - if match.group('escaped') is not None: - # Return None for key and section to indicate it's time to stop - return None, self._delimiter, None - # Anything else: ignore completely, just return it unchanged - return None, match.group(), None - - -interpolation_engines = { - 'configparser': ConfigParserInterpolation, - 'template': TemplateInterpolation, -} - - -def __newobj__(cls, *args): - # Hack for pickle - return cls.__new__(cls, *args) - -class Section(dict): - """ - A dictionary-like object that represents a section in a config file. - - It does string interpolation if the 'interpolation' attribute - of the 'main' object is set to True. - - Interpolation is tried first from this object, then from the 'DEFAULT' - section of this object, next from the parent and its 'DEFAULT' section, - and so on until the main object is reached. - - A Section will behave like an ordered dictionary - following the - order of the ``scalars`` and ``sections`` attributes. - You can use this to change the order of members. - - Iteration follows the order: scalars, then sections. - """ - - - def __setstate__(self, state): - dict.update(self, state[0]) - self.__dict__.update(state[1]) - - def __reduce__(self): - state = (dict(self), self.__dict__) - return (__newobj__, (self.__class__,), state) - - - def __init__(self, parent, depth, main, indict=None, name=None): - """ - * parent is the section above - * depth is the depth level of this section - * main is the main ConfigObj - * indict is a dictionary to initialise the section with - """ - if indict is None: - indict = {} - dict.__init__(self) - # used for nesting level *and* interpolation - self.parent = parent - # used for the interpolation attribute - self.main = main - # level of nesting depth of this Section - self.depth = depth - # purely for information - self.name = name - # - self._initialise() - # we do this explicitly so that __setitem__ is used properly - # (rather than just passing to ``dict.__init__``) - for entry, value in indict.iteritems(): - self[entry] = value - - - def _initialise(self): - # the sequence of scalar values in this Section - self.scalars = [] - # the sequence of sections in this Section - self.sections = [] - # for comments :-) - self.comments = {} - self.inline_comments = {} - # the configspec - self.configspec = None - # for defaults - self.defaults = [] - self.default_values = {} - self.extra_values = [] - self._created = False - - - def _interpolate(self, key, value): - try: - # do we already have an interpolation engine? - engine = self._interpolation_engine - except AttributeError: - # not yet: first time running _interpolate(), so pick the engine - name = self.main.interpolation - if name == True: # note that "if name:" would be incorrect here - # backwards-compatibility: interpolation=True means use default - name = DEFAULT_INTERPOLATION - name = name.lower() # so that "Template", "template", etc. all work - class_ = interpolation_engines.get(name, None) - if class_ is None: - # invalid value for self.main.interpolation - self.main.interpolation = False - return value - else: - # save reference to engine so we don't have to do this again - engine = self._interpolation_engine = class_(self) - # let the engine do the actual work - return engine.interpolate(key, value) - - - def __getitem__(self, key): - """Fetch the item and do string interpolation.""" - val = dict.__getitem__(self, key) - if self.main.interpolation: - if isinstance(val, basestring): - return self._interpolate(key, val) - if isinstance(val, list): - def _check(entry): - if isinstance(entry, basestring): - return self._interpolate(key, entry) - return entry - new = [_check(entry) for entry in val] - if new != val: - return new - return val - - - def __setitem__(self, key, value, unrepr=False): - """ - Correctly set a value. - - Making dictionary values Section instances. - (We have to special case 'Section' instances - which are also dicts) - - Keys must be strings. - Values need only be strings (or lists of strings) if - ``main.stringify`` is set. - - ``unrepr`` must be set when setting a value to a dictionary, without - creating a new sub-section. - """ - if not isinstance(key, basestring): - raise ValueError('The key "%s" is not a string.' % key) - - # add the comment - if key not in self.comments: - self.comments[key] = [] - self.inline_comments[key] = '' - # remove the entry from defaults - if key in self.defaults: - self.defaults.remove(key) - # - if isinstance(value, Section): - if key not in self: - self.sections.append(key) - dict.__setitem__(self, key, value) - elif isinstance(value, dict) and not unrepr: - # First create the new depth level, - # then create the section - if key not in self: - self.sections.append(key) - new_depth = self.depth + 1 - dict.__setitem__( - self, - key, - Section( - self, - new_depth, - self.main, - indict=value, - name=key)) - else: - if key not in self: - self.scalars.append(key) - if not self.main.stringify: - if isinstance(value, basestring): - pass - elif isinstance(value, (list, tuple)): - for entry in value: - if not isinstance(entry, basestring): - raise TypeError('Value is not a string "%s".' % entry) - else: - raise TypeError('Value is not a string "%s".' % value) - dict.__setitem__(self, key, value) - - - def __delitem__(self, key): - """Remove items from the sequence when deleting.""" - dict. __delitem__(self, key) - if key in self.scalars: - self.scalars.remove(key) - else: - self.sections.remove(key) - del self.comments[key] - del self.inline_comments[key] - - - def get(self, key, default=None): - """A version of ``get`` that doesn't bypass string interpolation.""" - try: - return self[key] - except KeyError: - return default - - - def update(self, indict): - """ - A version of update that uses our ``__setitem__``. - """ - for entry in indict: - self[entry] = indict[entry] - - - def pop(self, key, default=MISSING): - """ - 'D.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised' - """ - try: - val = self[key] - except KeyError: - if default is MISSING: - raise - val = default - else: - del self[key] - return val - - - def popitem(self): - """Pops the first (key,val)""" - sequence = (self.scalars + self.sections) - if not sequence: - raise KeyError(": 'popitem(): dictionary is empty'") - key = sequence[0] - val = self[key] - del self[key] - return key, val - - - def clear(self): - """ - A version of clear that also affects scalars/sections - Also clears comments and configspec. - - Leaves other attributes alone : - depth/main/parent are not affected - """ - dict.clear(self) - self.scalars = [] - self.sections = [] - self.comments = {} - self.inline_comments = {} - self.configspec = None - self.defaults = [] - self.extra_values = [] - - - def setdefault(self, key, default=None): - """A version of setdefault that sets sequence if appropriate.""" - try: - return self[key] - except KeyError: - self[key] = default - return self[key] - - - def items(self): - """D.items() -> list of D's (key, value) pairs, as 2-tuples""" - return zip((self.scalars + self.sections), self.values()) - - - def keys(self): - """D.keys() -> list of D's keys""" - return (self.scalars + self.sections) - - - def values(self): - """D.values() -> list of D's values""" - return [self[key] for key in (self.scalars + self.sections)] - - - def iteritems(self): - """D.iteritems() -> an iterator over the (key, value) items of D""" - return iter(self.items()) - - - def iterkeys(self): - """D.iterkeys() -> an iterator over the keys of D""" - return iter((self.scalars + self.sections)) - - __iter__ = iterkeys - - - def itervalues(self): - """D.itervalues() -> an iterator over the values of D""" - return iter(self.values()) - - - def __repr__(self): - """x.__repr__() <==> repr(x)""" - def _getval(key): - try: - return self[key] - except MissingInterpolationOption: - return dict.__getitem__(self, key) - return '{%s}' % ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) - for key in (self.scalars + self.sections)]) - - __str__ = __repr__ - __str__.__doc__ = "x.__str__() <==> str(x)" - - - # Extra methods - not in a normal dictionary - - def dict(self): - """ - Return a deepcopy of self as a dictionary. - - All members that are ``Section`` instances are recursively turned to - ordinary dictionaries - by calling their ``dict`` method. - - >>> n = a.dict() - >>> n == a - 1 - >>> n is a - 0 - """ - newdict = {} - for entry in self: - this_entry = self[entry] - if isinstance(this_entry, Section): - this_entry = this_entry.dict() - elif isinstance(this_entry, list): - # create a copy rather than a reference - this_entry = list(this_entry) - elif isinstance(this_entry, tuple): - # create a copy rather than a reference - this_entry = tuple(this_entry) - newdict[entry] = this_entry - return newdict - - - def merge(self, indict): - """ - A recursive update - useful for merging config files. - - >>> a = '''[section1] - ... option1 = True - ... [[subsection]] - ... more_options = False - ... # end of file'''.splitlines() - >>> b = '''# File is user.ini - ... [section1] - ... option1 = False - ... # end of file'''.splitlines() - >>> c1 = ConfigObj(b) - >>> c2 = ConfigObj(a) - >>> c2.merge(c1) - >>> c2 - ConfigObj({'section1': {'option1': 'False', 'subsection': {'more_options': 'False'}}}) - """ - for key, val in indict.items(): - if (key in self and isinstance(self[key], dict) and - isinstance(val, dict)): - self[key].merge(val) - else: - self[key] = val - - - def rename(self, oldkey, newkey): - """ - Change a keyname to another, without changing position in sequence. - - Implemented so that transformations can be made on keys, - as well as on values. (used by encode and decode) - - Also renames comments. - """ - if oldkey in self.scalars: - the_list = self.scalars - elif oldkey in self.sections: - the_list = self.sections - else: - raise KeyError('Key "%s" not found.' % oldkey) - pos = the_list.index(oldkey) - # - val = self[oldkey] - dict.__delitem__(self, oldkey) - dict.__setitem__(self, newkey, val) - the_list.remove(oldkey) - the_list.insert(pos, newkey) - comm = self.comments[oldkey] - inline_comment = self.inline_comments[oldkey] - del self.comments[oldkey] - del self.inline_comments[oldkey] - self.comments[newkey] = comm - self.inline_comments[newkey] = inline_comment - - - def walk(self, function, raise_errors=True, - call_on_sections=False, **keywargs): - """ - Walk every member and call a function on the keyword and value. - - Return a dictionary of the return values - - If the function raises an exception, raise the errror - unless ``raise_errors=False``, in which case set the return value to - ``False``. - - Any unrecognised keyword arguments you pass to walk, will be pased on - to the function you pass in. - - Note: if ``call_on_sections`` is ``True`` then - on encountering a - subsection, *first* the function is called for the *whole* subsection, - and then recurses into it's members. This means your function must be - able to handle strings, dictionaries and lists. This allows you - to change the key of subsections as well as for ordinary members. The - return value when called on the whole subsection has to be discarded. - - See the encode and decode methods for examples, including functions. - - .. admonition:: caution - - You can use ``walk`` to transform the names of members of a section - but you mustn't add or delete members. - - >>> config = '''[XXXXsection] - ... XXXXkey = XXXXvalue'''.splitlines() - >>> cfg = ConfigObj(config) - >>> cfg - ConfigObj({'XXXXsection': {'XXXXkey': 'XXXXvalue'}}) - >>> def transform(section, key): - ... val = section[key] - ... newkey = key.replace('XXXX', 'CLIENT1') - ... section.rename(key, newkey) - ... if isinstance(val, (tuple, list, dict)): - ... pass - ... else: - ... val = val.replace('XXXX', 'CLIENT1') - ... section[newkey] = val - >>> cfg.walk(transform, call_on_sections=True) - {'CLIENT1section': {'CLIENT1key': None}} - >>> cfg - ConfigObj({'CLIENT1section': {'CLIENT1key': 'CLIENT1value'}}) - """ - out = {} - # scalars first - for i in range(len(self.scalars)): - entry = self.scalars[i] - try: - val = function(self, entry, **keywargs) - # bound again in case name has changed - entry = self.scalars[i] - out[entry] = val - except Exception: - if raise_errors: - raise - else: - entry = self.scalars[i] - out[entry] = False - # then sections - for i in range(len(self.sections)): - entry = self.sections[i] - if call_on_sections: - try: - function(self, entry, **keywargs) - except Exception: - if raise_errors: - raise - else: - entry = self.sections[i] - out[entry] = False - # bound again in case name has changed - entry = self.sections[i] - # previous result is discarded - out[entry] = self[entry].walk( - function, - raise_errors=raise_errors, - call_on_sections=call_on_sections, - **keywargs) - return out - - - def as_bool(self, key): - """ - Accepts a key as input. The corresponding value must be a string or - the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to - retain compatibility with Python 2.2. - - If the string is one of ``True``, ``On``, ``Yes``, or ``1`` it returns - ``True``. - - If the string is one of ``False``, ``Off``, ``No``, or ``0`` it returns - ``False``. - - ``as_bool`` is not case sensitive. - - Any other input will raise a ``ValueError``. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_bool('a') - Traceback (most recent call last): - ValueError: Value "fish" is neither True nor False - >>> a['b'] = 'True' - >>> a.as_bool('b') - 1 - >>> a['b'] = 'off' - >>> a.as_bool('b') - 0 - """ - val = self[key] - if val == True: - return True - elif val == False: - return False - else: - try: - if not isinstance(val, basestring): - # TODO: Why do we raise a KeyError here? - raise KeyError() - else: - return self.main._bools[val.lower()] - except KeyError: - raise ValueError('Value "%s" is neither True nor False' % val) - - - def as_int(self, key): - """ - A convenience method which coerces the specified value to an integer. - - If the value is an invalid literal for ``int``, a ``ValueError`` will - be raised. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_int('a') - Traceback (most recent call last): - ValueError: invalid literal for int() with base 10: 'fish' - >>> a['b'] = '1' - >>> a.as_int('b') - 1 - >>> a['b'] = '3.2' - >>> a.as_int('b') - Traceback (most recent call last): - ValueError: invalid literal for int() with base 10: '3.2' - """ - return int(self[key]) - - - def as_float(self, key): - """ - A convenience method which coerces the specified value to a float. - - If the value is an invalid literal for ``float``, a ``ValueError`` will - be raised. - - >>> a = ConfigObj() - >>> a['a'] = 'fish' - >>> a.as_float('a') - Traceback (most recent call last): - ValueError: invalid literal for float(): fish - >>> a['b'] = '1' - >>> a.as_float('b') - 1.0 - >>> a['b'] = '3.2' - >>> a.as_float('b') - 3.2000000000000002 - """ - return float(self[key]) - - - def as_list(self, key): - """ - A convenience method which fetches the specified value, guaranteeing - that it is a list. - - >>> a = ConfigObj() - >>> a['a'] = 1 - >>> a.as_list('a') - [1] - >>> a['a'] = (1,) - >>> a.as_list('a') - [1] - >>> a['a'] = [1] - >>> a.as_list('a') - [1] - """ - result = self[key] - if isinstance(result, (tuple, list)): - return list(result) - return [result] - - - def restore_default(self, key): - """ - Restore (and return) default value for the specified key. - - This method will only work for a ConfigObj that was created - with a configspec and has been validated. - - If there is no default value for this key, ``KeyError`` is raised. - """ - default = self.default_values[key] - dict.__setitem__(self, key, default) - if key not in self.defaults: - self.defaults.append(key) - return default - - - def restore_defaults(self): - """ - Recursively restore default values to all members - that have them. - - This method will only work for a ConfigObj that was created - with a configspec and has been validated. - - It doesn't delete or modify entries without default values. - """ - for key in self.default_values: - self.restore_default(key) - - for section in self.sections: - self[section].restore_defaults() - - -class ConfigObj(Section): - """An object to read, create, and write config files.""" - - _keyword = re.compile(r'''^ # line start - (\s*) # indentation - ( # keyword - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'"=].*?) # no quotes - ) - \s*=\s* # divider - (.*) # value (including list values and comments) - $ # line end - ''', - re.VERBOSE) - - _sectionmarker = re.compile(r'''^ - (\s*) # 1: indentation - ((?:\[\s*)+) # 2: section marker open - ( # 3: section name open - (?:"\s*\S.*?\s*")| # at least one non-space with double quotes - (?:'\s*\S.*?\s*')| # at least one non-space with single quotes - (?:[^'"\s].*?) # at least one non-space unquoted - ) # section name close - ((?:\s*\])+) # 4: section marker close - \s*(\#.*)? # 5: optional comment - $''', - re.VERBOSE) - - # this regexp pulls list values out as a single string - # or single values and comments - # FIXME: this regex adds a '' to the end of comma terminated lists - # workaround in ``_handle_value`` - _valueexp = re.compile(r'''^ - (?: - (?: - ( - (?: - (?: - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'",\#][^,\#]*?) # unquoted - ) - \s*,\s* # comma - )* # match all list items ending in a comma (if any) - ) - ( - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'",\#\s][^,]*?)| # unquoted - (?:(? 1: - msg = "Parsing failed with several errors.\nFirst error %s" % info - error = ConfigObjError(msg) - else: - error = self._errors[0] - # set the errors attribute; it's a list of tuples: - # (error_type, message, line_number) - error.errors = self._errors - # set the config attribute - error.config = self - raise error - # delete private attributes - del self._errors - - if configspec is None: - self.configspec = None - else: - self._handle_configspec(configspec) - - - def _initialise(self, options=None): - if options is None: - options = OPTION_DEFAULTS - - # initialise a few variables - self.filename = None - self._errors = [] - self.raise_errors = options['raise_errors'] - self.interpolation = options['interpolation'] - self.list_values = options['list_values'] - self.create_empty = options['create_empty'] - self.file_error = options['file_error'] - self.stringify = options['stringify'] - self.indent_type = options['indent_type'] - self.encoding = options['encoding'] - self.default_encoding = options['default_encoding'] - self.BOM = False - self.newlines = None - self.write_empty_values = options['write_empty_values'] - self.unrepr = options['unrepr'] - - self.initial_comment = [] - self.final_comment = [] - self.configspec = None - - if self._inspec: - self.list_values = False - - # Clear section attributes as well - Section._initialise(self) - - - def __repr__(self): - def _getval(key): - try: - return self[key] - except MissingInterpolationOption: - return dict.__getitem__(self, key) - return ('ConfigObj({%s})' % - ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) - for key in (self.scalars + self.sections)])) - - - def _handle_bom(self, infile): - """ - Handle any BOM, and decode if necessary. - - If an encoding is specified, that *must* be used - but the BOM should - still be removed (and the BOM attribute set). - - (If the encoding is wrongly specified, then a BOM for an alternative - encoding won't be discovered or removed.) - - If an encoding is not specified, UTF8 or UTF16 BOM will be detected and - removed. The BOM attribute will be set. UTF16 will be decoded to - unicode. - - NOTE: This method must not be called with an empty ``infile``. - - Specifying the *wrong* encoding is likely to cause a - ``UnicodeDecodeError``. - - ``infile`` must always be returned as a list of lines, but may be - passed in as a single string. - """ - if ((self.encoding is not None) and - (self.encoding.lower() not in BOM_LIST)): - # No need to check for a BOM - # the encoding specified doesn't have one - # just decode - return self._decode(infile, self.encoding) - - if isinstance(infile, (list, tuple)): - line = infile[0] - else: - line = infile - if self.encoding is not None: - # encoding explicitly supplied - # And it could have an associated BOM - # TODO: if encoding is just UTF16 - we ought to check for both - # TODO: big endian and little endian versions. - enc = BOM_LIST[self.encoding.lower()] - if enc == 'utf_16': - # For UTF16 we try big endian and little endian - for BOM, (encoding, final_encoding) in BOMS.items(): - if not final_encoding: - # skip UTF8 - continue - if infile.startswith(BOM): - ### BOM discovered - ##self.BOM = True - # Don't need to remove BOM - return self._decode(infile, encoding) - - # If we get this far, will *probably* raise a DecodeError - # As it doesn't appear to start with a BOM - return self._decode(infile, self.encoding) - - # Must be UTF8 - BOM = BOM_SET[enc] - if not line.startswith(BOM): - return self._decode(infile, self.encoding) - - newline = line[len(BOM):] - - # BOM removed - if isinstance(infile, (list, tuple)): - infile[0] = newline - else: - infile = newline - self.BOM = True - return self._decode(infile, self.encoding) - - # No encoding specified - so we need to check for UTF8/UTF16 - for BOM, (encoding, final_encoding) in BOMS.items(): - if not line.startswith(BOM): - continue - else: - # BOM discovered - self.encoding = final_encoding - if not final_encoding: - self.BOM = True - # UTF8 - # remove BOM - newline = line[len(BOM):] - if isinstance(infile, (list, tuple)): - infile[0] = newline - else: - infile = newline - # UTF8 - don't decode - if isinstance(infile, basestring): - return infile.splitlines(True) - else: - return infile - # UTF16 - have to decode - return self._decode(infile, encoding) - - # No BOM discovered and no encoding specified, just return - if isinstance(infile, basestring): - # infile read from a file will be a single string - return infile.splitlines(True) - return infile - - - def _a_to_u(self, aString): - """Decode ASCII strings to unicode if a self.encoding is specified.""" - if self.encoding: - return aString.decode('ascii') - else: - return aString - - - def _decode(self, infile, encoding): - """ - Decode infile to unicode. Using the specified encoding. - - if is a string, it also needs converting to a list. - """ - if isinstance(infile, basestring): - # can't be unicode - # NOTE: Could raise a ``UnicodeDecodeError`` - return infile.decode(encoding).splitlines(True) - for i, line in enumerate(infile): - if not isinstance(line, unicode): - # NOTE: The isinstance test here handles mixed lists of unicode/string - # NOTE: But the decode will break on any non-string values - # NOTE: Or could raise a ``UnicodeDecodeError`` - infile[i] = line.decode(encoding) - return infile - - - def _decode_element(self, line): - """Decode element to unicode if necessary.""" - if not self.encoding: - return line - if isinstance(line, str) and self.default_encoding: - return line.decode(self.default_encoding) - return line - - - def _str(self, value): - """ - Used by ``stringify`` within validate, to turn non-string values - into strings. - """ - if not isinstance(value, basestring): - return str(value) - else: - return value - - - def _parse(self, infile): - """Actually parse the config file.""" - temp_list_values = self.list_values - if self.unrepr: - self.list_values = False - - comment_list = [] - done_start = False - this_section = self - maxline = len(infile) - 1 - cur_index = -1 - reset_comment = False - - while cur_index < maxline: - if reset_comment: - comment_list = [] - cur_index += 1 - line = infile[cur_index] - sline = line.strip() - # do we have anything on the line ? - if not sline or sline.startswith('#'): - reset_comment = False - comment_list.append(line) - continue - - if not done_start: - # preserve initial comment - self.initial_comment = comment_list - comment_list = [] - done_start = True - - reset_comment = True - # first we check if it's a section marker - mat = self._sectionmarker.match(line) - if mat is not None: - # is a section line - (indent, sect_open, sect_name, sect_close, comment) = mat.groups() - if indent and (self.indent_type is None): - self.indent_type = indent - cur_depth = sect_open.count('[') - if cur_depth != sect_close.count(']'): - self._handle_error("Cannot compute the section depth at line %s.", - NestingError, infile, cur_index) - continue - - if cur_depth < this_section.depth: - # the new section is dropping back to a previous level - try: - parent = self._match_depth(this_section, - cur_depth).parent - except SyntaxError: - self._handle_error("Cannot compute nesting level at line %s.", - NestingError, infile, cur_index) - continue - elif cur_depth == this_section.depth: - # the new section is a sibling of the current section - parent = this_section.parent - elif cur_depth == this_section.depth + 1: - # the new section is a child the current section - parent = this_section - else: - self._handle_error("Section too nested at line %s.", - NestingError, infile, cur_index) - - sect_name = self._unquote(sect_name) - if sect_name in parent: - self._handle_error('Duplicate section name at line %s.', - DuplicateError, infile, cur_index) - continue - - # create the new section - this_section = Section( - parent, - cur_depth, - self, - name=sect_name) - parent[sect_name] = this_section - parent.inline_comments[sect_name] = comment - parent.comments[sect_name] = comment_list - continue - # - # it's not a section marker, - # so it should be a valid ``key = value`` line - mat = self._keyword.match(line) - if mat is None: - # it neither matched as a keyword - # or a section marker - self._handle_error( - 'Invalid line at line "%s".', - ParseError, infile, cur_index) - else: - # is a keyword value - # value will include any inline comment - (indent, key, value) = mat.groups() - if indent and (self.indent_type is None): - self.indent_type = indent - # check for a multiline value - if value[:3] in ['"""', "'''"]: - try: - value, comment, cur_index = self._multiline( - value, infile, cur_index, maxline) - except SyntaxError: - self._handle_error( - 'Parse error in value at line %s.', - ParseError, infile, cur_index) - continue - else: - if self.unrepr: - comment = '' - try: - value = unrepr(value) - except Exception, e: - if type(e) == UnknownType: - msg = 'Unknown name or type in value at line %s.' - else: - msg = 'Parse error in value at line %s.' - self._handle_error(msg, UnreprError, infile, - cur_index) - continue - else: - if self.unrepr: - comment = '' - try: - value = unrepr(value) - except Exception, e: - if isinstance(e, UnknownType): - msg = 'Unknown name or type in value at line %s.' - else: - msg = 'Parse error in value at line %s.' - self._handle_error(msg, UnreprError, infile, - cur_index) - continue - else: - # extract comment and lists - try: - (value, comment) = self._handle_value(value) - except SyntaxError: - self._handle_error( - 'Parse error in value at line %s.', - ParseError, infile, cur_index) - continue - # - key = self._unquote(key) - if key in this_section: - self._handle_error( - 'Duplicate keyword name at line %s.', - DuplicateError, infile, cur_index) - continue - # add the key. - # we set unrepr because if we have got this far we will never - # be creating a new section - this_section.__setitem__(key, value, unrepr=True) - this_section.inline_comments[key] = comment - this_section.comments[key] = comment_list - continue - # - if self.indent_type is None: - # no indentation used, set the type accordingly - self.indent_type = '' - - # preserve the final comment - if not self and not self.initial_comment: - self.initial_comment = comment_list - elif not reset_comment: - self.final_comment = comment_list - self.list_values = temp_list_values - - - def _match_depth(self, sect, depth): - """ - Given a section and a depth level, walk back through the sections - parents to see if the depth level matches a previous section. - - Return a reference to the right section, - or raise a SyntaxError. - """ - while depth < sect.depth: - if sect is sect.parent: - # we've reached the top level already - raise SyntaxError() - sect = sect.parent - if sect.depth == depth: - return sect - # shouldn't get here - raise SyntaxError() - - - def _handle_error(self, text, ErrorClass, infile, cur_index): - """ - Handle an error according to the error settings. - - Either raise the error or store it. - The error will have occured at ``cur_index`` - """ - line = infile[cur_index] - cur_index += 1 - message = text % cur_index - error = ErrorClass(message, cur_index, line) - if self.raise_errors: - # raise the error - parsing stops here - raise error - # store the error - # reraise when parsing has finished - self._errors.append(error) - - - def _unquote(self, value): - """Return an unquoted version of a value""" - if not value: - # should only happen during parsing of lists - raise SyntaxError - if (value[0] == value[-1]) and (value[0] in ('"', "'")): - value = value[1:-1] - return value - - - def _quote(self, value, multiline=True): - """ - Return a safely quoted version of a value. - - Raise a ConfigObjError if the value cannot be safely quoted. - If multiline is ``True`` (default) then use triple quotes - if necessary. - - * Don't quote values that don't need it. - * Recursively quote members of a list and return a comma joined list. - * Multiline is ``False`` for lists. - * Obey list syntax for empty and single member lists. - - If ``list_values=False`` then the value is only quoted if it contains - a ``\\n`` (is multiline) or '#'. - - If ``write_empty_values`` is set, and the value is an empty string, it - won't be quoted. - """ - if multiline and self.write_empty_values and value == '': - # Only if multiline is set, so that it is used for values not - # keys, and not values that are part of a list - return '' - - if multiline and isinstance(value, (list, tuple)): - if not value: - return ',' - elif len(value) == 1: - return self._quote(value[0], multiline=False) + ',' - return ', '.join([self._quote(val, multiline=False) - for val in value]) - if not isinstance(value, basestring): - if self.stringify: - value = str(value) - else: - raise TypeError('Value "%s" is not a string.' % value) - - if not value: - return '""' - - no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value - need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value )) - hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value) - check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote - - if check_for_single: - if not self.list_values: - # we don't quote if ``list_values=False`` - quot = noquot - # for normal values either single or double quotes will do - elif '\n' in value: - # will only happen if multiline is off - e.g. '\n' in key - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - elif ((value[0] not in wspace_plus) and - (value[-1] not in wspace_plus) and - (',' not in value)): - quot = noquot - else: - quot = self._get_single_quote(value) - else: - # if value has '\n' or "'" *and* '"', it will need triple quotes - quot = self._get_triple_quote(value) - - if quot == noquot and '#' in value and self.list_values: - quot = self._get_single_quote(value) - - return quot % value - - - def _get_single_quote(self, value): - if ("'" in value) and ('"' in value): - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - elif '"' in value: - quot = squot - else: - quot = dquot - return quot - - - def _get_triple_quote(self, value): - if (value.find('"""') != -1) and (value.find("'''") != -1): - raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) - if value.find('"""') == -1: - quot = tdquot - else: - quot = tsquot - return quot - - - def _handle_value(self, value): - """ - Given a value string, unquote, remove comment, - handle lists. (including empty and single member lists) - """ - if self._inspec: - # Parsing a configspec so don't handle comments - return (value, '') - # do we look for lists in values ? - if not self.list_values: - mat = self._nolistvalue.match(value) - if mat is None: - raise SyntaxError() - # NOTE: we don't unquote here - return mat.groups() - # - mat = self._valueexp.match(value) - if mat is None: - # the value is badly constructed, probably badly quoted, - # or an invalid list - raise SyntaxError() - (list_values, single, empty_list, comment) = mat.groups() - if (list_values == '') and (single is None): - # change this if you want to accept empty values - raise SyntaxError() - # NOTE: note there is no error handling from here if the regex - # is wrong: then incorrect values will slip through - if empty_list is not None: - # the single comma - meaning an empty list - return ([], comment) - if single is not None: - # handle empty values - if list_values and not single: - # FIXME: the '' is a workaround because our regex now matches - # '' at the end of a list if it has a trailing comma - single = None - else: - single = single or '""' - single = self._unquote(single) - if list_values == '': - # not a list value - return (single, comment) - the_list = self._listvalueexp.findall(list_values) - the_list = [self._unquote(val) for val in the_list] - if single is not None: - the_list += [single] - return (the_list, comment) - - - def _multiline(self, value, infile, cur_index, maxline): - """Extract the value, where we are in a multiline situation.""" - quot = value[:3] - newvalue = value[3:] - single_line = self._triple_quote[quot][0] - multi_line = self._triple_quote[quot][1] - mat = single_line.match(value) - if mat is not None: - retval = list(mat.groups()) - retval.append(cur_index) - return retval - elif newvalue.find(quot) != -1: - # somehow the triple quote is missing - raise SyntaxError() - # - while cur_index < maxline: - cur_index += 1 - newvalue += '\n' - line = infile[cur_index] - if line.find(quot) == -1: - newvalue += line - else: - # end of multiline, process it - break - else: - # we've got to the end of the config, oops... - raise SyntaxError() - mat = multi_line.match(line) - if mat is None: - # a badly formed line - raise SyntaxError() - (value, comment) = mat.groups() - return (newvalue + value, comment, cur_index) - - - def _handle_configspec(self, configspec): - """Parse the configspec.""" - # FIXME: Should we check that the configspec was created with the - # correct settings ? (i.e. ``list_values=False``) - if not isinstance(configspec, ConfigObj): - try: - configspec = ConfigObj(configspec, - raise_errors=True, - file_error=True, - _inspec=True) - except ConfigObjError, e: - # FIXME: Should these errors have a reference - # to the already parsed ConfigObj ? - raise ConfigspecError('Parsing configspec failed: %s' % e) - except IOError, e: - raise IOError('Reading configspec failed: %s' % e) - - self.configspec = configspec - - - - def _set_configspec(self, section, copy): - """ - Called by validate. Handles setting the configspec on subsections - including sections to be validated by __many__ - """ - configspec = section.configspec - many = configspec.get('__many__') - if isinstance(many, dict): - for entry in section.sections: - if entry not in configspec: - section[entry].configspec = many - - for entry in configspec.sections: - if entry == '__many__': - continue - if entry not in section: - section[entry] = {} - section[entry]._created = True - if copy: - # copy comments - section.comments[entry] = configspec.comments.get(entry, []) - section.inline_comments[entry] = configspec.inline_comments.get(entry, '') - - # Could be a scalar when we expect a section - if isinstance(section[entry], Section): - section[entry].configspec = configspec[entry] - - - def _write_line(self, indent_string, entry, this_entry, comment): - """Write an individual line, for the write method""" - # NOTE: the calls to self._quote here handles non-StringType values. - if not self.unrepr: - val = self._decode_element(self._quote(this_entry)) - else: - val = repr(this_entry) - return '%s%s%s%s%s' % (indent_string, - self._decode_element(self._quote(entry, multiline=False)), - self._a_to_u(' = '), - val, - self._decode_element(comment)) - - - def _write_marker(self, indent_string, depth, entry, comment): - """Write a section marker line""" - return '%s%s%s%s%s' % (indent_string, - self._a_to_u('[' * depth), - self._quote(self._decode_element(entry), multiline=False), - self._a_to_u(']' * depth), - self._decode_element(comment)) - - - def _handle_comment(self, comment): - """Deal with a comment.""" - if not comment: - return '' - start = self.indent_type - if not comment.startswith('#'): - start += self._a_to_u(' # ') - return (start + comment) - - - # Public methods - - def write(self, outfile=None, section=None): - """ - Write the current ConfigObj as a file - - tekNico: FIXME: use StringIO instead of real files - - >>> filename = a.filename - >>> a.filename = 'test.ini' - >>> a.write() - >>> a.filename = filename - >>> a == ConfigObj('test.ini', raise_errors=True) - 1 - >>> import os - >>> os.remove('test.ini') - """ - if self.indent_type is None: - # this can be true if initialised from a dictionary - self.indent_type = DEFAULT_INDENT_TYPE - - out = [] - cs = self._a_to_u('#') - csp = self._a_to_u('# ') - if section is None: - int_val = self.interpolation - self.interpolation = False - section = self - for line in self.initial_comment: - line = self._decode_element(line) - stripped_line = line.strip() - if stripped_line and not stripped_line.startswith(cs): - line = csp + line - out.append(line) - - indent_string = self.indent_type * section.depth - for entry in (section.scalars + section.sections): - if entry in section.defaults: - # don't write out default values - continue - for comment_line in section.comments[entry]: - comment_line = self._decode_element(comment_line.lstrip()) - if comment_line and not comment_line.startswith(cs): - comment_line = csp + comment_line - out.append(indent_string + comment_line) - this_entry = section[entry] - comment = self._handle_comment(section.inline_comments[entry]) - - if isinstance(this_entry, dict): - # a section - out.append(self._write_marker( - indent_string, - this_entry.depth, - entry, - comment)) - out.extend(self.write(section=this_entry)) - else: - out.append(self._write_line( - indent_string, - entry, - this_entry, - comment)) - - if section is self: - for line in self.final_comment: - line = self._decode_element(line) - stripped_line = line.strip() - if stripped_line and not stripped_line.startswith(cs): - line = csp + line - out.append(line) - self.interpolation = int_val - - if section is not self: - return out - - if (self.filename is None) and (outfile is None): - # output a list of lines - # might need to encode - # NOTE: This will *screw* UTF16, each line will start with the BOM - if self.encoding: - out = [l.encode(self.encoding) for l in out] - if (self.BOM and ((self.encoding is None) or - (BOM_LIST.get(self.encoding.lower()) == 'utf_8'))): - # Add the UTF8 BOM - if not out: - out.append('') - out[0] = BOM_UTF8 + out[0] - return out - - # Turn the list to a string, joined with correct newlines - newline = self.newlines or os.linesep - if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w' - and sys.platform == 'win32' and newline == '\r\n'): - # Windows specific hack to avoid writing '\r\r\n' - newline = '\n' - output = self._a_to_u(newline).join(out) - if self.encoding: - output = output.encode(self.encoding) - if self.BOM and ((self.encoding is None) or match_utf8(self.encoding)): - # Add the UTF8 BOM - output = BOM_UTF8 + output - - if not output.endswith(newline): - output += newline - if outfile is not None: - outfile.write(output) - else: - h = open(self.filename, 'wb') - h.write(output) - h.close() - - - def validate(self, validator, preserve_errors=False, copy=False, - section=None): - """ - Test the ConfigObj against a configspec. - - It uses the ``validator`` object from *validate.py*. - - To run ``validate`` on the current ConfigObj, call: :: - - test = config.validate(validator) - - (Normally having previously passed in the configspec when the ConfigObj - was created - you can dynamically assign a dictionary of checks to the - ``configspec`` attribute of a section though). - - It returns ``True`` if everything passes, or a dictionary of - pass/fails (True/False). If every member of a subsection passes, it - will just have the value ``True``. (It also returns ``False`` if all - members fail). - - In addition, it converts the values from strings to their native - types if their checks pass (and ``stringify`` is set). - - If ``preserve_errors`` is ``True`` (``False`` is default) then instead - of a marking a fail with a ``False``, it will preserve the actual - exception object. This can contain info about the reason for failure. - For example the ``VdtValueTooSmallError`` indicates that the value - supplied was too small. If a value (or section) is missing it will - still be marked as ``False``. - - You must have the validate module to use ``preserve_errors=True``. - - You can then use the ``flatten_errors`` function to turn your nested - results dictionary into a flattened list of failures - useful for - displaying meaningful error messages. - """ - if section is None: - if self.configspec is None: - raise ValueError('No configspec supplied.') - if preserve_errors: - # We do this once to remove a top level dependency on the validate module - # Which makes importing configobj faster - from validate import VdtMissingValue - self._vdtMissingValue = VdtMissingValue - - section = self - - if copy: - section.initial_comment = section.configspec.initial_comment - section.final_comment = section.configspec.final_comment - section.encoding = section.configspec.encoding - section.BOM = section.configspec.BOM - section.newlines = section.configspec.newlines - section.indent_type = section.configspec.indent_type - - # - # section.default_values.clear() #?? - configspec = section.configspec - self._set_configspec(section, copy) - - - def validate_entry(entry, spec, val, missing, ret_true, ret_false): - section.default_values.pop(entry, None) - - try: - section.default_values[entry] = validator.get_default_value(configspec[entry]) - except (KeyError, AttributeError, validator.baseErrorClass): - # No default, bad default or validator has no 'get_default_value' - # (e.g. SimpleVal) - pass - - try: - check = validator.check(spec, - val, - missing=missing - ) - except validator.baseErrorClass, e: - if not preserve_errors or isinstance(e, self._vdtMissingValue): - out[entry] = False - else: - # preserve the error - out[entry] = e - ret_false = False - ret_true = False - else: - ret_false = False - out[entry] = True - if self.stringify or missing: - # if we are doing type conversion - # or the value is a supplied default - if not self.stringify: - if isinstance(check, (list, tuple)): - # preserve lists - check = [self._str(item) for item in check] - elif missing and check is None: - # convert the None from a default to a '' - check = '' - else: - check = self._str(check) - if (check != val) or missing: - section[entry] = check - if not copy and missing and entry not in section.defaults: - section.defaults.append(entry) - return ret_true, ret_false - - # - out = {} - ret_true = True - ret_false = True - - unvalidated = [k for k in section.scalars if k not in configspec] - incorrect_sections = [k for k in configspec.sections if k in section.scalars] - incorrect_scalars = [k for k in configspec.scalars if k in section.sections] - - for entry in configspec.scalars: - if entry in ('__many__', '___many___'): - # reserved names - continue - if (not entry in section.scalars) or (entry in section.defaults): - # missing entries - # or entries from defaults - missing = True - val = None - if copy and entry not in section.scalars: - # copy comments - section.comments[entry] = ( - configspec.comments.get(entry, [])) - section.inline_comments[entry] = ( - configspec.inline_comments.get(entry, '')) - # - else: - missing = False - val = section[entry] - - ret_true, ret_false = validate_entry(entry, configspec[entry], val, - missing, ret_true, ret_false) - - many = None - if '__many__' in configspec.scalars: - many = configspec['__many__'] - elif '___many___' in configspec.scalars: - many = configspec['___many___'] - - if many is not None: - for entry in unvalidated: - val = section[entry] - ret_true, ret_false = validate_entry(entry, many, val, False, - ret_true, ret_false) - unvalidated = [] - - for entry in incorrect_scalars: - ret_true = False - if not preserve_errors: - out[entry] = False - else: - ret_false = False - msg = 'Value %r was provided as a section' % entry - out[entry] = validator.baseErrorClass(msg) - for entry in incorrect_sections: - ret_true = False - if not preserve_errors: - out[entry] = False - else: - ret_false = False - msg = 'Section %r was provided as a single value' % entry - out[entry] = validator.baseErrorClass(msg) - - # Missing sections will have been created as empty ones when the - # configspec was read. - for entry in section.sections: - # FIXME: this means DEFAULT is not copied in copy mode - if section is self and entry == 'DEFAULT': - continue - if section[entry].configspec is None: - unvalidated.append(entry) - continue - if copy: - section.comments[entry] = configspec.comments.get(entry, []) - section.inline_comments[entry] = configspec.inline_comments.get(entry, '') - check = self.validate(validator, preserve_errors=preserve_errors, copy=copy, section=section[entry]) - out[entry] = check - if check == False: - ret_true = False - elif check == True: - ret_false = False - else: - ret_true = False - - section.extra_values = unvalidated - if preserve_errors and not section._created: - # If the section wasn't created (i.e. it wasn't missing) - # then we can't return False, we need to preserve errors - ret_false = False - # - if ret_false and preserve_errors and out: - # If we are preserving errors, but all - # the failures are from missing sections / values - # then we can return False. Otherwise there is a - # real failure that we need to preserve. - ret_false = not any(out.values()) - if ret_true: - return True - elif ret_false: - return False - return out - - - def reset(self): - """Clear ConfigObj instance and restore to 'freshly created' state.""" - self.clear() - self._initialise() - # FIXME: Should be done by '_initialise', but ConfigObj constructor (and reload) - # requires an empty dictionary - self.configspec = None - # Just to be sure ;-) - self._original_configspec = None - - - def reload(self): - """ - Reload a ConfigObj from file. - - This method raises a ``ReloadError`` if the ConfigObj doesn't have - a filename attribute pointing to a file. - """ - if not isinstance(self.filename, basestring): - raise ReloadError() - - filename = self.filename - current_options = {} - for entry in OPTION_DEFAULTS: - if entry == 'configspec': - continue - current_options[entry] = getattr(self, entry) - - configspec = self._original_configspec - current_options['configspec'] = configspec - - self.clear() - self._initialise(current_options) - self._load(filename, configspec) - - - -class SimpleVal(object): - """ - A simple validator. - Can be used to check that all members expected are present. - - To use it, provide a configspec with all your members in (the value given - will be ignored). Pass an instance of ``SimpleVal`` to the ``validate`` - method of your ``ConfigObj``. ``validate`` will return ``True`` if all - members are present, or a dictionary with True/False meaning - present/missing. (Whole missing sections will be replaced with ``False``) - """ - - def __init__(self): - self.baseErrorClass = ConfigObjError - - def check(self, check, member, missing=False): - """A dummy check method, always returns the value unchanged.""" - if missing: - raise self.baseErrorClass() - return member - - -def flatten_errors(cfg, res, levels=None, results=None): - """ - An example function that will turn a nested dictionary of results - (as returned by ``ConfigObj.validate``) into a flat list. - - ``cfg`` is the ConfigObj instance being checked, ``res`` is the results - dictionary returned by ``validate``. - - (This is a recursive function, so you shouldn't use the ``levels`` or - ``results`` arguments - they are used by the function.) - - Returns a list of keys that failed. Each member of the list is a tuple:: - - ([list of sections...], key, result) - - If ``validate`` was called with ``preserve_errors=False`` (the default) - then ``result`` will always be ``False``. - - *list of sections* is a flattened list of sections that the key was found - in. - - If the section was missing (or a section was expected and a scalar provided - - or vice-versa) then key will be ``None``. - - If the value (or section) was missing then ``result`` will be ``False``. - - If ``validate`` was called with ``preserve_errors=True`` and a value - was present, but failed the check, then ``result`` will be the exception - object returned. You can use this as a string that describes the failure. - - For example *The value "3" is of the wrong type*. - """ - if levels is None: - # first time called - levels = [] - results = [] - if res == True: - return results - if res == False or isinstance(res, Exception): - results.append((levels[:], None, res)) - if levels: - levels.pop() - return results - for (key, val) in res.items(): - if val == True: - continue - if isinstance(cfg.get(key), dict): - # Go down one level - levels.append(key) - flatten_errors(cfg[key], val, levels, results) - continue - results.append((levels[:], key, val)) - # - # Go up one level - if levels: - levels.pop() - # - return results - - -def get_extra_values(conf, _prepend=()): - """ - Find all the values and sections not in the configspec from a validated - ConfigObj. - - ``get_extra_values`` returns a list of tuples where each tuple represents - either an extra section, or an extra value. - - The tuples contain two values, a tuple representing the section the value - is in and the name of the extra values. For extra values in the top level - section the first member will be an empty tuple. For values in the 'foo' - section the first member will be ``('foo',)``. For members in the 'bar' - subsection of the 'foo' section the first member will be ``('foo', 'bar')``. - - NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't - been validated it will return an empty list. - """ - out = [] - - out.extend([(_prepend, name) for name in conf.extra_values]) - for name in conf.sections: - if name not in conf.extra_values: - out.extend(get_extra_values(conf[name], _prepend + (name,))) - return out - - -"""*A programming language is a medium of expression.* - Paul Graham""" diff --git a/data/css/style.css b/data/css/style.css index 3a6ee99b..54c35afc 100644 --- a/data/css/style.css +++ b/data/css/style.css @@ -89,6 +89,9 @@ h1{ .bigtext{ font-size: 22px; } +.updatebar{ + text-align: center; + } a:link { color: #5E2612; text-decoration: none; @@ -121,4 +124,17 @@ a.green { a.externalred { color: red; font-size:12px; - } \ No newline at end of file + } +div.progress-container { + border: 1px solid #ccc; + width: 100px; + margin: 2px 5px 2px 0; + padding: 1px; + float: left; + background: white; +} + +div.progress-container > div { + background-color: #ACE97C; + height: 12px +} \ No newline at end of file diff --git a/feedparser.py b/feedparser.py deleted file mode 100644 index b9144a9e..00000000 --- a/feedparser.py +++ /dev/null @@ -1,3909 +0,0 @@ -#!/usr/bin/env python -"""Universal feed parser - -Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds - -Visit http://feedparser.org/ for the latest version -Visit http://feedparser.org/docs/ for the latest documentation - -Required: Python 2.4 or later -Recommended: CJKCodecs and iconv_codec -""" - -__version__ = "5.0.1" -__license__ = """Copyright (c) 2002-2008, Mark Pilgrim, All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS' -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE.""" -__author__ = "Mark Pilgrim " -__contributors__ = ["Jason Diamond ", - "John Beimler ", - "Fazal Majid ", - "Aaron Swartz ", - "Kevin Marks ", - "Sam Ruby ", - "Ade Oshineye ", - "Martin Pool ", - "Kurt McKee "] -_debug = 0 - -# HTTP "User-Agent" header to send to servers when downloading feeds. -# If you are embedding feedparser in a larger application, you should -# change this to your application name and URL. -USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % __version__ - -# HTTP "Accept" header to send to servers when downloading feeds. If you don't -# want to send an Accept header, set this to None. -ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1" - -# List of preferred XML parsers, by SAX driver name. These will be tried first, -# but if they're not installed, Python will keep searching through its own list -# of pre-installed parsers until it finds one that supports everything we need. -PREFERRED_XML_PARSERS = ["drv_libxml2"] - -# If you want feedparser to automatically run HTML markup through HTML Tidy, set -# this to 1. Requires mxTidy -# or utidylib . -TIDY_MARKUP = 0 - -# List of Python interfaces for HTML Tidy, in order of preference. Only useful -# if TIDY_MARKUP = 1 -PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] - -# If you want feedparser to automatically resolve all relative URIs, set this -# to 1. -RESOLVE_RELATIVE_URIS = 1 - -# If you want feedparser to automatically sanitize all potentially unsafe -# HTML content, set this to 1. -SANITIZE_HTML = 1 - -# ---------- Python 3 modules (make it work if possible) ---------- -try: - import rfc822 -except ImportError: - from email import _parseaddr as rfc822 - -try: - # Python 3.1 introduces bytes.maketrans and simultaneously - # deprecates string.maketrans; use bytes.maketrans if possible - _maketrans = bytes.maketrans -except (NameError, AttributeError): - import string - _maketrans = string.maketrans - -# base64 support for Atom feeds that contain embedded binary data -try: - import base64, binascii - # Python 3.1 deprecates decodestring in favor of decodebytes - _base64decode = getattr(base64, 'decodebytes', base64.decodestring) -except: - base64 = binascii = None - -def _s2bytes(s): - # Convert a UTF-8 str to bytes if the interpreter is Python 3 - try: - return bytes(s, 'utf8') - except (NameError, TypeError): - # In Python 2.5 and below, bytes doesn't exist (NameError) - # In Python 2.6 and above, bytes and str are the same (TypeError) - return s - -def _l2bytes(l): - # Convert a list of ints to bytes if the interpreter is Python 3 - try: - if bytes is not str: - # In Python 2.6 and above, this call won't raise an exception - # but it will return bytes([65]) as '[65]' instead of 'A' - return bytes(l) - raise NameError - except NameError: - return ''.join(map(chr, l)) - -# If you want feedparser to allow all URL schemes, set this to () -# List culled from Python's urlparse documentation at: -# http://docs.python.org/library/urlparse.html -# as well as from "URI scheme" at Wikipedia: -# https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme -# Many more will likely need to be added! -ACCEPTABLE_URI_SCHEMES = ( - 'file', 'ftp', 'gopher', 'h323', 'hdl', 'http', 'https', 'imap', 'mailto', - 'mms', 'news', 'nntp', 'prospero', 'rsync', 'rtsp', 'rtspu', 'sftp', - 'shttp', 'sip', 'sips', 'snews', 'svn', 'svn+ssh', 'telnet', 'wais', - # Additional common-but-unofficial schemes - 'aim', 'callto', 'cvs', 'facetime', 'feed', 'git', 'gtalk', 'irc', 'ircs', - 'irc6', 'itms', 'mms', 'msnim', 'skype', 'ssh', 'smb', 'svn', 'ymsg', -) -#ACCEPTABLE_URI_SCHEMES = () - -# ---------- required modules (should come with any Python distribution) ---------- -import sgmllib, re, sys, copy, urlparse, time, types, cgi, urllib, urllib2, datetime -try: - from io import BytesIO as _StringIO -except ImportError: - try: - from cStringIO import StringIO as _StringIO - except: - from StringIO import StringIO as _StringIO - -# ---------- optional modules (feedparser will work without these, but with reduced functionality) ---------- - -# gzip is included with most Python distributions, but may not be available if you compiled your own -try: - import gzip -except: - gzip = None -try: - import zlib -except: - zlib = None - -# If a real XML parser is available, feedparser will attempt to use it. feedparser has -# been tested with the built-in SAX parser, PyXML, and libxml2. On platforms where the -# Python distribution does not come with an XML parser (such as Mac OS X 10.2 and some -# versions of FreeBSD), feedparser will quietly fall back on regex-based parsing. -try: - import xml.sax - xml.sax.make_parser(PREFERRED_XML_PARSERS) # test for valid parsers - from xml.sax.saxutils import escape as _xmlescape - _XML_AVAILABLE = 1 -except: - _XML_AVAILABLE = 0 - def _xmlescape(data,entities={}): - data = data.replace('&', '&') - data = data.replace('>', '>') - data = data.replace('<', '<') - for char, entity in entities: - data = data.replace(char, entity) - return data - -# cjkcodecs and iconv_codec provide support for more character encodings. -# Both are available from http://cjkpython.i18n.org/ -try: - import cjkcodecs.aliases -except: - pass -try: - import iconv_codec -except: - pass - -# chardet library auto-detects character encodings -# Download from http://chardet.feedparser.org/ -try: - import chardet - if _debug: - import chardet.constants - chardet.constants._debug = 1 -except: - chardet = None - -# reversable htmlentitydefs mappings for Python 2.2 -try: - from htmlentitydefs import name2codepoint, codepoint2name -except: - import htmlentitydefs - name2codepoint={} - codepoint2name={} - for (name,codepoint) in htmlentitydefs.entitydefs.iteritems(): - if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1])) - name2codepoint[name]=ord(codepoint) - codepoint2name[ord(codepoint)]=name - -# BeautifulSoup parser used for parsing microformats from embedded HTML content -# http://www.crummy.com/software/BeautifulSoup/ -# feedparser is tested with BeautifulSoup 3.0.x, but it might work with the -# older 2.x series. If it doesn't, and you can figure out why, I'll accept a -# patch and modify the compatibility statement accordingly. -try: - import BeautifulSoup -except: - BeautifulSoup = None - -# ---------- don't touch these ---------- -class ThingsNobodyCaresAboutButMe(Exception): pass -class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe): pass -class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe): pass -class NonXMLContentType(ThingsNobodyCaresAboutButMe): pass -class UndeclaredNamespace(Exception): pass - -sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') -sgmllib.special = re.compile(']|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''') - def search(self,string,index=0): - match = self.endbracket.match(string,index) - if match is not None: - # Returning a new object in the calling thread's context - # resolves a thread-safety. - return EndBracketMatch(match) - return None - class EndBracketMatch: - def __init__(self, match): - self.match = match - def start(self, n): - return self.match.end(n) - sgmllib.endbracket = EndBracketRegEx() - -SUPPORTED_VERSIONS = {'': 'unknown', - 'rss090': 'RSS 0.90', - 'rss091n': 'RSS 0.91 (Netscape)', - 'rss091u': 'RSS 0.91 (Userland)', - 'rss092': 'RSS 0.92', - 'rss093': 'RSS 0.93', - 'rss094': 'RSS 0.94', - 'rss20': 'RSS 2.0', - 'rss10': 'RSS 1.0', - 'rss': 'RSS (unknown version)', - 'atom01': 'Atom 0.1', - 'atom02': 'Atom 0.2', - 'atom03': 'Atom 0.3', - 'atom10': 'Atom 1.0', - 'atom': 'Atom (unknown version)', - 'cdf': 'CDF', - 'hotrss': 'Hot RSS' - } - -try: - UserDict = dict -except NameError: - # Python 2.1 does not have dict - from UserDict import UserDict - def dict(aList): - rc = {} - for k, v in aList: - rc[k] = v - return rc - -class FeedParserDict(UserDict): - keymap = {'channel': 'feed', - 'items': 'entries', - 'guid': 'id', - 'date': 'updated', - 'date_parsed': 'updated_parsed', - 'description': ['summary', 'subtitle'], - 'url': ['href'], - 'modified': 'updated', - 'modified_parsed': 'updated_parsed', - 'issued': 'published', - 'issued_parsed': 'published_parsed', - 'copyright': 'rights', - 'copyright_detail': 'rights_detail', - 'tagline': 'subtitle', - 'tagline_detail': 'subtitle_detail'} - def __getitem__(self, key): - if key == 'category': - return UserDict.__getitem__(self, 'tags')[0]['term'] - if key == 'enclosures': - norel = lambda link: FeedParserDict([(name,value) for (name,value) in link.items() if name!='rel']) - return [norel(link) for link in UserDict.__getitem__(self, 'links') if link['rel']=='enclosure'] - if key == 'license': - for link in UserDict.__getitem__(self, 'links'): - if link['rel']=='license' and link.has_key('href'): - return link['href'] - if key == 'categories': - return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')] - realkey = self.keymap.get(key, key) - if type(realkey) == types.ListType: - for k in realkey: - if UserDict.__contains__(self, k): - return UserDict.__getitem__(self, k) - if UserDict.__contains__(self, key): - return UserDict.__getitem__(self, key) - return UserDict.__getitem__(self, realkey) - - def __setitem__(self, key, value): - for k in self.keymap.keys(): - if key == k: - key = self.keymap[k] - if type(key) == types.ListType: - key = key[0] - return UserDict.__setitem__(self, key, value) - - def get(self, key, default=None): - if self.has_key(key): - return self[key] - else: - return default - - def setdefault(self, key, value): - if not self.has_key(key): - self[key] = value - return self[key] - - def has_key(self, key): - try: - return hasattr(self, key) or UserDict.__contains__(self, key) - except AttributeError: - return False - # This alias prevents the 2to3 tool from changing the semantics of the - # __contains__ function below and exhausting the maximum recursion depth - __has_key = has_key - - def __getattr__(self, key): - try: - return self.__dict__[key] - except KeyError: - pass - try: - assert not key.startswith('_') - return self.__getitem__(key) - except: - raise AttributeError, "object has no attribute '%s'" % key - - def __setattr__(self, key, value): - if key.startswith('_') or key == 'data': - self.__dict__[key] = value - else: - return self.__setitem__(key, value) - - def __contains__(self, key): - return self.__has_key(key) - -def zopeCompatibilityHack(): - global FeedParserDict - del FeedParserDict - def FeedParserDict(aDict=None): - rc = {} - if aDict: - rc.update(aDict) - return rc - -_ebcdic_to_ascii_map = None -def _ebcdic_to_ascii(s): - global _ebcdic_to_ascii_map - if not _ebcdic_to_ascii_map: - emap = ( - 0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, - 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, - 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, - 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, - 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, - 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, - 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, - 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, - 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,201, - 202,106,107,108,109,110,111,112,113,114,203,204,205,206,207,208, - 209,126,115,116,117,118,119,120,121,122,210,211,212,213,214,215, - 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231, - 123,65,66,67,68,69,70,71,72,73,232,233,234,235,236,237, - 125,74,75,76,77,78,79,80,81,82,238,239,240,241,242,243, - 92,159,83,84,85,86,87,88,89,90,244,245,246,247,248,249, - 48,49,50,51,52,53,54,55,56,57,250,251,252,253,254,255 - ) - _ebcdic_to_ascii_map = _maketrans( \ - _l2bytes(range(256)), _l2bytes(emap)) - return s.translate(_ebcdic_to_ascii_map) - -_cp1252 = { - unichr(128): unichr(8364), # euro sign - unichr(130): unichr(8218), # single low-9 quotation mark - unichr(131): unichr( 402), # latin small letter f with hook - unichr(132): unichr(8222), # double low-9 quotation mark - unichr(133): unichr(8230), # horizontal ellipsis - unichr(134): unichr(8224), # dagger - unichr(135): unichr(8225), # double dagger - unichr(136): unichr( 710), # modifier letter circumflex accent - unichr(137): unichr(8240), # per mille sign - unichr(138): unichr( 352), # latin capital letter s with caron - unichr(139): unichr(8249), # single left-pointing angle quotation mark - unichr(140): unichr( 338), # latin capital ligature oe - unichr(142): unichr( 381), # latin capital letter z with caron - unichr(145): unichr(8216), # left single quotation mark - unichr(146): unichr(8217), # right single quotation mark - unichr(147): unichr(8220), # left double quotation mark - unichr(148): unichr(8221), # right double quotation mark - unichr(149): unichr(8226), # bullet - unichr(150): unichr(8211), # en dash - unichr(151): unichr(8212), # em dash - unichr(152): unichr( 732), # small tilde - unichr(153): unichr(8482), # trade mark sign - unichr(154): unichr( 353), # latin small letter s with caron - unichr(155): unichr(8250), # single right-pointing angle quotation mark - unichr(156): unichr( 339), # latin small ligature oe - unichr(158): unichr( 382), # latin small letter z with caron - unichr(159): unichr( 376)} # latin capital letter y with diaeresis - -_urifixer = re.compile('^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)') -def _urljoin(base, uri): - uri = _urifixer.sub(r'\1\3', uri) - try: - return urlparse.urljoin(base, uri) - except: - uri = urlparse.urlunparse([urllib.quote(part) for part in urlparse.urlparse(uri)]) - return urlparse.urljoin(base, uri) - -class _FeedParserMixin: - namespaces = {'': '', - 'http://backend.userland.com/rss': '', - 'http://blogs.law.harvard.edu/tech/rss': '', - 'http://purl.org/rss/1.0/': '', - 'http://my.netscape.com/rdf/simple/0.9/': '', - 'http://example.com/newformat#': '', - 'http://example.com/necho': '', - 'http://purl.org/echo/': '', - 'uri/of/echo/namespace#': '', - 'http://purl.org/pie/': '', - 'http://purl.org/atom/ns#': '', - 'http://www.w3.org/2005/Atom': '', - 'http://purl.org/rss/1.0/modules/rss091#': '', - - 'http://webns.net/mvcb/': 'admin', - 'http://purl.org/rss/1.0/modules/aggregation/': 'ag', - 'http://purl.org/rss/1.0/modules/annotate/': 'annotate', - 'http://media.tangent.org/rss/1.0/': 'audio', - 'http://backend.userland.com/blogChannelModule': 'blogChannel', - 'http://web.resource.org/cc/': 'cc', - 'http://backend.userland.com/creativeCommonsRssModule': 'creativeCommons', - 'http://purl.org/rss/1.0/modules/company': 'co', - 'http://purl.org/rss/1.0/modules/content/': 'content', - 'http://my.theinfo.org/changed/1.0/rss/': 'cp', - 'http://purl.org/dc/elements/1.1/': 'dc', - 'http://purl.org/dc/terms/': 'dcterms', - 'http://purl.org/rss/1.0/modules/email/': 'email', - 'http://purl.org/rss/1.0/modules/event/': 'ev', - 'http://rssnamespace.org/feedburner/ext/1.0': 'feedburner', - 'http://freshmeat.net/rss/fm/': 'fm', - 'http://xmlns.com/foaf/0.1/': 'foaf', - 'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo', - 'http://postneo.com/icbm/': 'icbm', - 'http://purl.org/rss/1.0/modules/image/': 'image', - 'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes', - 'http://example.com/DTDs/PodCast-1.0.dtd': 'itunes', - 'http://purl.org/rss/1.0/modules/link/': 'l', - 'http://search.yahoo.com/mrss': 'media', - #Version 1.1.2 of the Media RSS spec added the trailing slash on the namespace - 'http://search.yahoo.com/mrss/': 'media', - 'http://madskills.com/public/xml/rss/module/pingback/': 'pingback', - 'http://prismstandard.org/namespaces/1.2/basic/': 'prism', - 'http://www.w3.org/1999/02/22-rdf-syntax-ns#': 'rdf', - 'http://www.w3.org/2000/01/rdf-schema#': 'rdfs', - 'http://purl.org/rss/1.0/modules/reference/': 'ref', - 'http://purl.org/rss/1.0/modules/richequiv/': 'reqv', - 'http://purl.org/rss/1.0/modules/search/': 'search', - 'http://purl.org/rss/1.0/modules/slash/': 'slash', - 'http://schemas.xmlsoap.org/soap/envelope/': 'soap', - 'http://purl.org/rss/1.0/modules/servicestatus/': 'ss', - 'http://hacks.benhammersley.com/rss/streaming/': 'str', - 'http://purl.org/rss/1.0/modules/subscription/': 'sub', - 'http://purl.org/rss/1.0/modules/syndication/': 'sy', - 'http://schemas.pocketsoap.com/rss/myDescModule/': 'szf', - 'http://purl.org/rss/1.0/modules/taxonomy/': 'taxo', - 'http://purl.org/rss/1.0/modules/threading/': 'thr', - 'http://purl.org/rss/1.0/modules/textinput/': 'ti', - 'http://madskills.com/public/xml/rss/module/trackback/':'trackback', - 'http://wellformedweb.org/commentAPI/': 'wfw', - 'http://purl.org/rss/1.0/modules/wiki/': 'wiki', - 'http://www.w3.org/1999/xhtml': 'xhtml', - 'http://www.w3.org/1999/xlink': 'xlink', - 'http://www.w3.org/XML/1998/namespace': 'xml' -} - _matchnamespaces = {} - - can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'icon', 'logo'] - can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description'] - can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description'] - html_types = ['text/html', 'application/xhtml+xml'] - - def __init__(self, baseuri=None, baselang=None, encoding='utf-8'): - if _debug: sys.stderr.write('initializing FeedParser\n') - if not self._matchnamespaces: - for k, v in self.namespaces.items(): - self._matchnamespaces[k.lower()] = v - self.feeddata = FeedParserDict() # feed-level data - self.encoding = encoding # character encoding - self.entries = [] # list of entry-level data - self.version = '' # feed type/version, see SUPPORTED_VERSIONS - self.namespacesInUse = {} # dictionary of namespaces defined by the feed - - # the following are used internally to track state; - # this is really out of control and should be refactored - self.infeed = 0 - self.inentry = 0 - self.incontent = 0 - self.intextinput = 0 - self.inimage = 0 - self.inauthor = 0 - self.incontributor = 0 - self.inpublisher = 0 - self.insource = 0 - self.sourcedata = FeedParserDict() - self.contentparams = FeedParserDict() - self._summaryKey = None - self.namespacemap = {} - self.elementstack = [] - self.basestack = [] - self.langstack = [] - self.baseuri = baseuri or '' - self.lang = baselang or None - self.svgOK = 0 - self.hasTitle = 0 - if baselang: - self.feeddata['language'] = baselang.replace('_','-') - - def unknown_starttag(self, tag, attrs): - if _debug: sys.stderr.write('start %s with %s\n' % (tag, attrs)) - # normalize attrs - attrs = [(k.lower(), v) for k, v in attrs] - attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] - # the sgml parser doesn't handle entities in attributes, but - # strict xml parsers do -- account for this difference - if isinstance(self, _LooseFeedParser): - attrs = [(k, v.replace('&', '&')) for k, v in attrs] - - # track xml:base and xml:lang - attrsD = dict(attrs) - baseuri = attrsD.get('xml:base', attrsD.get('base')) or self.baseuri - if type(baseuri) != type(u''): - try: - baseuri = unicode(baseuri, self.encoding) - except: - baseuri = unicode(baseuri, 'iso-8859-1') - # ensure that self.baseuri is always an absolute URI that - # uses a whitelisted URI scheme (e.g. not `javscript:`) - if self.baseuri: - self.baseuri = _makeSafeAbsoluteURI(self.baseuri, baseuri) or self.baseuri - else: - self.baseuri = _urljoin(self.baseuri, baseuri) - lang = attrsD.get('xml:lang', attrsD.get('lang')) - if lang == '': - # xml:lang could be explicitly set to '', we need to capture that - lang = None - elif lang is None: - # if no xml:lang is specified, use parent lang - lang = self.lang - if lang: - if tag in ('feed', 'rss', 'rdf:RDF'): - self.feeddata['language'] = lang.replace('_','-') - self.lang = lang - self.basestack.append(self.baseuri) - self.langstack.append(lang) - - # track namespaces - for prefix, uri in attrs: - if prefix.startswith('xmlns:'): - self.trackNamespace(prefix[6:], uri) - elif prefix == 'xmlns': - self.trackNamespace(None, uri) - - # track inline content - if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): - if tag in ['xhtml:div', 'div']: return # typepad does this 10/2007 - # element declared itself as escaped markup, but it isn't really - self.contentparams['type'] = 'application/xhtml+xml' - if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': - if tag.find(':') <> -1: - prefix, tag = tag.split(':', 1) - namespace = self.namespacesInUse.get(prefix, '') - if tag=='math' and namespace=='http://www.w3.org/1998/Math/MathML': - attrs.append(('xmlns',namespace)) - if tag=='svg' and namespace=='http://www.w3.org/2000/svg': - attrs.append(('xmlns',namespace)) - if tag == 'svg': self.svgOK += 1 - return self.handle_data('<%s%s>' % (tag, self.strattrs(attrs)), escape=0) - - # match namespaces - if tag.find(':') <> -1: - prefix, suffix = tag.split(':', 1) - else: - prefix, suffix = '', tag - prefix = self.namespacemap.get(prefix, prefix) - if prefix: - prefix = prefix + '_' - - # special hack for better tracking of empty textinput/image elements in illformed feeds - if (not prefix) and tag not in ('title', 'link', 'description', 'name'): - self.intextinput = 0 - if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'): - self.inimage = 0 - - # call special handler (if defined) or default handler - methodname = '_start_' + prefix + suffix - try: - method = getattr(self, methodname) - return method(attrsD) - except AttributeError: - # Since there's no handler or something has gone wrong we explicitly add the element and its attributes - unknown_tag = prefix + suffix - if len(attrsD) == 0: - # No attributes so merge it into the encosing dictionary - return self.push(unknown_tag, 1) - else: - # Has attributes so create it in its own dictionary - context = self._getContext() - context[unknown_tag] = attrsD - - def unknown_endtag(self, tag): - if _debug: sys.stderr.write('end %s\n' % tag) - # match namespaces - if tag.find(':') <> -1: - prefix, suffix = tag.split(':', 1) - else: - prefix, suffix = '', tag - prefix = self.namespacemap.get(prefix, prefix) - if prefix: - prefix = prefix + '_' - if suffix == 'svg' and self.svgOK: self.svgOK -= 1 - - # call special handler (if defined) or default handler - methodname = '_end_' + prefix + suffix - try: - if self.svgOK: raise AttributeError() - method = getattr(self, methodname) - method() - except AttributeError: - self.pop(prefix + suffix) - - # track inline content - if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): - # element declared itself as escaped markup, but it isn't really - if tag in ['xhtml:div', 'div']: return # typepad does this 10/2007 - self.contentparams['type'] = 'application/xhtml+xml' - if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': - tag = tag.split(':')[-1] - self.handle_data('' % tag, escape=0) - - # track xml:base and xml:lang going out of scope - if self.basestack: - self.basestack.pop() - if self.basestack and self.basestack[-1]: - self.baseuri = self.basestack[-1] - if self.langstack: - self.langstack.pop() - if self.langstack: # and (self.langstack[-1] is not None): - self.lang = self.langstack[-1] - - def handle_charref(self, ref): - # called for each character reference, e.g. for ' ', ref will be '160' - if not self.elementstack: return - ref = ref.lower() - if ref in ('34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e'): - text = '&#%s;' % ref - else: - if ref[0] == 'x': - c = int(ref[1:], 16) - else: - c = int(ref) - text = unichr(c).encode('utf-8') - self.elementstack[-1][2].append(text) - - def handle_entityref(self, ref): - # called for each entity reference, e.g. for '©', ref will be 'copy' - if not self.elementstack: return - if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref) - if ref in ('lt', 'gt', 'quot', 'amp', 'apos'): - text = '&%s;' % ref - elif ref in self.entities.keys(): - text = self.entities[ref] - if text.startswith('&#') and text.endswith(';'): - return self.handle_entityref(text) - else: - try: name2codepoint[ref] - except KeyError: text = '&%s;' % ref - else: text = unichr(name2codepoint[ref]).encode('utf-8') - self.elementstack[-1][2].append(text) - - def handle_data(self, text, escape=1): - # called for each block of plain text, i.e. outside of any tag and - # not containing any character or entity references - if not self.elementstack: return - if escape and self.contentparams.get('type') == 'application/xhtml+xml': - text = _xmlescape(text) - self.elementstack[-1][2].append(text) - - def handle_comment(self, text): - # called for each comment, e.g. - pass - - def handle_pi(self, text): - # called for each processing instruction, e.g. - pass - - def handle_decl(self, text): - pass - - def parse_declaration(self, i): - # override internal declaration handler to handle CDATA blocks - if _debug: sys.stderr.write('entering parse_declaration\n') - if self.rawdata[i:i+9] == '', i) - if k == -1: - # CDATA block began but didn't finish - k = len(self.rawdata) - return k - self.handle_data(_xmlescape(self.rawdata[i+9:k]), 0) - return k+3 - else: - k = self.rawdata.find('>', i) - if k >= 0: - return k+1 - else: - # We have an incomplete CDATA block. - return k - - def mapContentType(self, contentType): - contentType = contentType.lower() - if contentType == 'text' or contentType == 'plain': - contentType = 'text/plain' - elif contentType == 'html': - contentType = 'text/html' - elif contentType == 'xhtml': - contentType = 'application/xhtml+xml' - return contentType - - def trackNamespace(self, prefix, uri): - loweruri = uri.lower() - if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version: - self.version = 'rss090' - if loweruri == 'http://purl.org/rss/1.0/' and not self.version: - self.version = 'rss10' - if loweruri == 'http://www.w3.org/2005/atom' and not self.version: - self.version = 'atom10' - if loweruri.find('backend.userland.com/rss') <> -1: - # match any backend.userland.com namespace - uri = 'http://backend.userland.com/rss' - loweruri = uri - if self._matchnamespaces.has_key(loweruri): - self.namespacemap[prefix] = self._matchnamespaces[loweruri] - self.namespacesInUse[self._matchnamespaces[loweruri]] = uri - else: - self.namespacesInUse[prefix or ''] = uri - - def resolveURI(self, uri): - return _urljoin(self.baseuri or '', uri) - - def decodeEntities(self, element, data): - return data - - def strattrs(self, attrs): - return ''.join([' %s="%s"' % (t[0],_xmlescape(t[1],{'"':'"'})) for t in attrs]) - - def push(self, element, expectingText): - self.elementstack.append([element, expectingText, []]) - - def pop(self, element, stripWhitespace=1): - if not self.elementstack: return - if self.elementstack[-1][0] != element: return - - element, expectingText, pieces = self.elementstack.pop() - - if self.version == 'atom10' and self.contentparams.get('type','text') == 'application/xhtml+xml': - # remove enclosing child element, but only if it is a
and - # only if all the remaining content is nested underneath it. - # This means that the divs would be retained in the following: - #
foo
bar
- while pieces and len(pieces)>1 and not pieces[-1].strip(): - del pieces[-1] - while pieces and len(pieces)>1 and not pieces[0].strip(): - del pieces[0] - if pieces and (pieces[0] == '
' or pieces[0].startswith('
': - depth = 0 - for piece in pieces[:-1]: - if piece.startswith(''): - depth += 1 - else: - pieces = pieces[1:-1] - - # Ensure each piece is a str for Python 3 - for (i, v) in enumerate(pieces): - if not isinstance(v, basestring): - pieces[i] = v.decode('utf-8') - - output = ''.join(pieces) - if stripWhitespace: - output = output.strip() - if not expectingText: return output - - # decode base64 content - if base64 and self.contentparams.get('base64', 0): - try: - output = _base64decode(output) - except binascii.Error: - pass - except binascii.Incomplete: - pass - except TypeError: - # In Python 3, base64 takes and outputs bytes, not str - # This may not be the most correct way to accomplish this - output = _base64decode(output.encode('utf-8')).decode('utf-8') - - # resolve relative URIs - if (element in self.can_be_relative_uri) and output: - output = self.resolveURI(output) - - # decode entities within embedded markup - if not self.contentparams.get('base64', 0): - output = self.decodeEntities(element, output) - - if self.lookslikehtml(output): - self.contentparams['type']='text/html' - - # remove temporary cruft from contentparams - try: - del self.contentparams['mode'] - except KeyError: - pass - try: - del self.contentparams['base64'] - except KeyError: - pass - - is_htmlish = self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types - # resolve relative URIs within embedded markup - if is_htmlish and RESOLVE_RELATIVE_URIS: - if element in self.can_contain_relative_uris: - output = _resolveRelativeURIs(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html')) - - # parse microformats - # (must do this before sanitizing because some microformats - # rely on elements that we sanitize) - if is_htmlish and element in ['content', 'description', 'summary']: - mfresults = _parseMicroformats(output, self.baseuri, self.encoding) - if mfresults: - for tag in mfresults.get('tags', []): - self._addTag(tag['term'], tag['scheme'], tag['label']) - for enclosure in mfresults.get('enclosures', []): - self._start_enclosure(enclosure) - for xfn in mfresults.get('xfn', []): - self._addXFN(xfn['relationships'], xfn['href'], xfn['name']) - vcard = mfresults.get('vcard') - if vcard: - self._getContext()['vcard'] = vcard - - # sanitize embedded markup - if is_htmlish and SANITIZE_HTML: - if element in self.can_contain_dangerous_markup: - output = _sanitizeHTML(output, self.encoding, self.contentparams.get('type', 'text/html')) - - if self.encoding and type(output) != type(u''): - try: - output = unicode(output, self.encoding) - except: - pass - - # address common error where people take data that is already - # utf-8, presume that it is iso-8859-1, and re-encode it. - if self.encoding in ('utf-8', 'utf-8_INVALID_PYTHON_3') and type(output) == type(u''): - try: - output = unicode(output.encode('iso-8859-1'), 'utf-8') - except: - pass - - # map win-1252 extensions to the proper code points - if type(output) == type(u''): - output = u''.join([c in _cp1252.keys() and _cp1252[c] or c for c in output]) - - # categories/tags/keywords/whatever are handled in _end_category - if element == 'category': - return output - - if element == 'title' and self.hasTitle: - return output - - # store output in appropriate place(s) - if self.inentry and not self.insource: - if element == 'content': - self.entries[-1].setdefault(element, []) - contentparams = copy.deepcopy(self.contentparams) - contentparams['value'] = output - self.entries[-1][element].append(contentparams) - elif element == 'link': - if not self.inimage: - # query variables in urls in link elements are improperly - # converted from `?a=1&b=2` to `?a=1&b;=2` as if they're - # unhandled character references. fix this special case. - output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) - self.entries[-1][element] = output - if output: - self.entries[-1]['links'][-1]['href'] = output - else: - if element == 'description': - element = 'summary' - self.entries[-1][element] = output - if self.incontent: - contentparams = copy.deepcopy(self.contentparams) - contentparams['value'] = output - self.entries[-1][element + '_detail'] = contentparams - elif (self.infeed or self.insource):# and (not self.intextinput) and (not self.inimage): - context = self._getContext() - if element == 'description': - element = 'subtitle' - context[element] = output - if element == 'link': - # fix query variables; see above for the explanation - output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) - context[element] = output - context['links'][-1]['href'] = output - elif self.incontent: - contentparams = copy.deepcopy(self.contentparams) - contentparams['value'] = output - context[element + '_detail'] = contentparams - return output - - def pushContent(self, tag, attrsD, defaultContentType, expectingText): - self.incontent += 1 - if self.lang: self.lang=self.lang.replace('_','-') - self.contentparams = FeedParserDict({ - 'type': self.mapContentType(attrsD.get('type', defaultContentType)), - 'language': self.lang, - 'base': self.baseuri}) - self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams) - self.push(tag, expectingText) - - def popContent(self, tag): - value = self.pop(tag) - self.incontent -= 1 - self.contentparams.clear() - return value - - # a number of elements in a number of RSS variants are nominally plain - # text, but this is routinely ignored. This is an attempt to detect - # the most common cases. As false positives often result in silent - # data loss, this function errs on the conservative side. - def lookslikehtml(self, s): - if self.version.startswith('atom'): return - if self.contentparams.get('type','text/html') != 'text/plain': return - - # must have a close tag or a entity reference to qualify - if not (re.search(r'',s) or re.search("&#?\w+;",s)): return - - # all tags must be in a restricted subset of valid HTML tags - if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements, - re.findall(r' -1: - prefix = name[:colonpos] - suffix = name[colonpos+1:] - prefix = self.namespacemap.get(prefix, prefix) - name = prefix + ':' + suffix - return name - - def _getAttribute(self, attrsD, name): - return attrsD.get(self._mapToStandardPrefix(name)) - - def _isBase64(self, attrsD, contentparams): - if attrsD.get('mode', '') == 'base64': - return 1 - if self.contentparams['type'].startswith('text/'): - return 0 - if self.contentparams['type'].endswith('+xml'): - return 0 - if self.contentparams['type'].endswith('/xml'): - return 0 - return 1 - - def _itsAnHrefDamnIt(self, attrsD): - href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None))) - if href: - try: - del attrsD['url'] - except KeyError: - pass - try: - del attrsD['uri'] - except KeyError: - pass - attrsD['href'] = href - return attrsD - - def _save(self, key, value, overwrite=False): - context = self._getContext() - if overwrite: - context[key] = value - else: - context.setdefault(key, value) - - def _start_rss(self, attrsD): - versionmap = {'0.91': 'rss091u', - '0.92': 'rss092', - '0.93': 'rss093', - '0.94': 'rss094'} - #If we're here then this is an RSS feed. - #If we don't have a version or have a version that starts with something - #other than RSS then there's been a mistake. Correct it. - if not self.version or not self.version.startswith('rss'): - attr_version = attrsD.get('version', '') - version = versionmap.get(attr_version) - if version: - self.version = version - elif attr_version.startswith('2.'): - self.version = 'rss20' - else: - self.version = 'rss' - - def _start_dlhottitles(self, attrsD): - self.version = 'hotrss' - - def _start_channel(self, attrsD): - self.infeed = 1 - self._cdf_common(attrsD) - _start_feedinfo = _start_channel - - def _cdf_common(self, attrsD): - if attrsD.has_key('lastmod'): - self._start_modified({}) - self.elementstack[-1][-1] = attrsD['lastmod'] - self._end_modified() - if attrsD.has_key('href'): - self._start_link({}) - self.elementstack[-1][-1] = attrsD['href'] - self._end_link() - - def _start_feed(self, attrsD): - self.infeed = 1 - versionmap = {'0.1': 'atom01', - '0.2': 'atom02', - '0.3': 'atom03'} - if not self.version: - attr_version = attrsD.get('version') - version = versionmap.get(attr_version) - if version: - self.version = version - else: - self.version = 'atom' - - def _end_channel(self): - self.infeed = 0 - _end_feed = _end_channel - - def _start_image(self, attrsD): - context = self._getContext() - if not self.inentry: - context.setdefault('image', FeedParserDict()) - self.inimage = 1 - self.hasTitle = 0 - self.push('image', 0) - - def _end_image(self): - self.pop('image') - self.inimage = 0 - - def _start_textinput(self, attrsD): - context = self._getContext() - context.setdefault('textinput', FeedParserDict()) - self.intextinput = 1 - self.hasTitle = 0 - self.push('textinput', 0) - _start_textInput = _start_textinput - - def _end_textinput(self): - self.pop('textinput') - self.intextinput = 0 - _end_textInput = _end_textinput - - def _start_author(self, attrsD): - self.inauthor = 1 - self.push('author', 1) - # Append a new FeedParserDict when expecting an author - context = self._getContext() - context.setdefault('authors', []) - context['authors'].append(FeedParserDict()) - _start_managingeditor = _start_author - _start_dc_author = _start_author - _start_dc_creator = _start_author - _start_itunes_author = _start_author - - def _end_author(self): - self.pop('author') - self.inauthor = 0 - self._sync_author_detail() - _end_managingeditor = _end_author - _end_dc_author = _end_author - _end_dc_creator = _end_author - _end_itunes_author = _end_author - - def _start_itunes_owner(self, attrsD): - self.inpublisher = 1 - self.push('publisher', 0) - - def _end_itunes_owner(self): - self.pop('publisher') - self.inpublisher = 0 - self._sync_author_detail('publisher') - - def _start_contributor(self, attrsD): - self.incontributor = 1 - context = self._getContext() - context.setdefault('contributors', []) - context['contributors'].append(FeedParserDict()) - self.push('contributor', 0) - - def _end_contributor(self): - self.pop('contributor') - self.incontributor = 0 - - def _start_dc_contributor(self, attrsD): - self.incontributor = 1 - context = self._getContext() - context.setdefault('contributors', []) - context['contributors'].append(FeedParserDict()) - self.push('name', 0) - - def _end_dc_contributor(self): - self._end_name() - self.incontributor = 0 - - def _start_name(self, attrsD): - self.push('name', 0) - _start_itunes_name = _start_name - - def _end_name(self): - value = self.pop('name') - if self.inpublisher: - self._save_author('name', value, 'publisher') - elif self.inauthor: - self._save_author('name', value) - elif self.incontributor: - self._save_contributor('name', value) - elif self.intextinput: - context = self._getContext() - context['name'] = value - _end_itunes_name = _end_name - - def _start_width(self, attrsD): - self.push('width', 0) - - def _end_width(self): - value = self.pop('width') - try: - value = int(value) - except: - value = 0 - if self.inimage: - context = self._getContext() - context['width'] = value - - def _start_height(self, attrsD): - self.push('height', 0) - - def _end_height(self): - value = self.pop('height') - try: - value = int(value) - except: - value = 0 - if self.inimage: - context = self._getContext() - context['height'] = value - - def _start_url(self, attrsD): - self.push('href', 1) - _start_homepage = _start_url - _start_uri = _start_url - - def _end_url(self): - value = self.pop('href') - if self.inauthor: - self._save_author('href', value) - elif self.incontributor: - self._save_contributor('href', value) - _end_homepage = _end_url - _end_uri = _end_url - - def _start_email(self, attrsD): - self.push('email', 0) - _start_itunes_email = _start_email - - def _end_email(self): - value = self.pop('email') - if self.inpublisher: - self._save_author('email', value, 'publisher') - elif self.inauthor: - self._save_author('email', value) - elif self.incontributor: - self._save_contributor('email', value) - _end_itunes_email = _end_email - - def _getContext(self): - if self.insource: - context = self.sourcedata - elif self.inimage and self.feeddata.has_key('image'): - context = self.feeddata['image'] - elif self.intextinput: - context = self.feeddata['textinput'] - elif self.inentry: - context = self.entries[-1] - else: - context = self.feeddata - return context - - def _save_author(self, key, value, prefix='author'): - context = self._getContext() - context.setdefault(prefix + '_detail', FeedParserDict()) - context[prefix + '_detail'][key] = value - self._sync_author_detail() - context.setdefault('authors', [FeedParserDict()]) - context['authors'][-1][key] = value - - def _save_contributor(self, key, value): - context = self._getContext() - context.setdefault('contributors', [FeedParserDict()]) - context['contributors'][-1][key] = value - - def _sync_author_detail(self, key='author'): - context = self._getContext() - detail = context.get('%s_detail' % key) - if detail: - name = detail.get('name') - email = detail.get('email') - if name and email: - context[key] = '%s (%s)' % (name, email) - elif name: - context[key] = name - elif email: - context[key] = email - else: - author, email = context.get(key), None - if not author: return - emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?''', author) - if emailmatch: - email = emailmatch.group(0) - # probably a better way to do the following, but it passes all the tests - author = author.replace(email, '') - author = author.replace('()', '') - author = author.replace('<>', '') - author = author.replace('<>', '') - author = author.strip() - if author and (author[0] == '('): - author = author[1:] - if author and (author[-1] == ')'): - author = author[:-1] - author = author.strip() - if author or email: - context.setdefault('%s_detail' % key, FeedParserDict()) - if author: - context['%s_detail' % key]['name'] = author - if email: - context['%s_detail' % key]['email'] = email - - def _start_subtitle(self, attrsD): - self.pushContent('subtitle', attrsD, 'text/plain', 1) - _start_tagline = _start_subtitle - _start_itunes_subtitle = _start_subtitle - - def _end_subtitle(self): - self.popContent('subtitle') - _end_tagline = _end_subtitle - _end_itunes_subtitle = _end_subtitle - - def _start_rights(self, attrsD): - self.pushContent('rights', attrsD, 'text/plain', 1) - _start_dc_rights = _start_rights - _start_copyright = _start_rights - - def _end_rights(self): - self.popContent('rights') - _end_dc_rights = _end_rights - _end_copyright = _end_rights - - def _start_item(self, attrsD): - self.entries.append(FeedParserDict()) - self.push('item', 0) - self.inentry = 1 - self.guidislink = 0 - self.hasTitle = 0 - id = self._getAttribute(attrsD, 'rdf:about') - if id: - context = self._getContext() - context['id'] = id - self._cdf_common(attrsD) - _start_entry = _start_item - _start_product = _start_item - - def _end_item(self): - self.pop('item') - self.inentry = 0 - _end_entry = _end_item - - def _start_dc_language(self, attrsD): - self.push('language', 1) - _start_language = _start_dc_language - - def _end_dc_language(self): - self.lang = self.pop('language') - _end_language = _end_dc_language - - def _start_dc_publisher(self, attrsD): - self.push('publisher', 1) - _start_webmaster = _start_dc_publisher - - def _end_dc_publisher(self): - self.pop('publisher') - self._sync_author_detail('publisher') - _end_webmaster = _end_dc_publisher - - def _start_published(self, attrsD): - self.push('published', 1) - _start_dcterms_issued = _start_published - _start_issued = _start_published - - def _end_published(self): - value = self.pop('published') - self._save('published_parsed', _parse_date(value), overwrite=True) - _end_dcterms_issued = _end_published - _end_issued = _end_published - - def _start_updated(self, attrsD): - self.push('updated', 1) - _start_modified = _start_updated - _start_dcterms_modified = _start_updated - _start_pubdate = _start_updated - _start_dc_date = _start_updated - _start_lastbuilddate = _start_updated - - def _end_updated(self): - value = self.pop('updated') - parsed_value = _parse_date(value) - self._save('updated_parsed', parsed_value, overwrite=True) - _end_modified = _end_updated - _end_dcterms_modified = _end_updated - _end_pubdate = _end_updated - _end_dc_date = _end_updated - _end_lastbuilddate = _end_updated - - def _start_created(self, attrsD): - self.push('created', 1) - _start_dcterms_created = _start_created - - def _end_created(self): - value = self.pop('created') - self._save('created_parsed', _parse_date(value), overwrite=True) - _end_dcterms_created = _end_created - - def _start_expirationdate(self, attrsD): - self.push('expired', 1) - - def _end_expirationdate(self): - self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True) - - def _start_cc_license(self, attrsD): - context = self._getContext() - value = self._getAttribute(attrsD, 'rdf:resource') - attrsD = FeedParserDict() - attrsD['rel']='license' - if value: attrsD['href']=value - context.setdefault('links', []).append(attrsD) - - def _start_creativecommons_license(self, attrsD): - self.push('license', 1) - _start_creativeCommons_license = _start_creativecommons_license - - def _end_creativecommons_license(self): - value = self.pop('license') - context = self._getContext() - attrsD = FeedParserDict() - attrsD['rel']='license' - if value: attrsD['href']=value - context.setdefault('links', []).append(attrsD) - del context['license'] - _end_creativeCommons_license = _end_creativecommons_license - - def _addXFN(self, relationships, href, name): - context = self._getContext() - xfn = context.setdefault('xfn', []) - value = FeedParserDict({'relationships': relationships, 'href': href, 'name': name}) - if value not in xfn: - xfn.append(value) - - def _addTag(self, term, scheme, label): - context = self._getContext() - tags = context.setdefault('tags', []) - if (not term) and (not scheme) and (not label): return - value = FeedParserDict({'term': term, 'scheme': scheme, 'label': label}) - if value not in tags: - tags.append(value) - - def _start_category(self, attrsD): - if _debug: sys.stderr.write('entering _start_category with %s\n' % repr(attrsD)) - term = attrsD.get('term') - scheme = attrsD.get('scheme', attrsD.get('domain')) - label = attrsD.get('label') - self._addTag(term, scheme, label) - self.push('category', 1) - _start_dc_subject = _start_category - _start_keywords = _start_category - - def _start_media_category(self, attrsD): - attrsD.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema') - self._start_category(attrsD) - - def _end_itunes_keywords(self): - for term in self.pop('itunes_keywords').split(): - self._addTag(term, 'http://www.itunes.com/', None) - - def _start_itunes_category(self, attrsD): - self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None) - self.push('category', 1) - - def _end_category(self): - value = self.pop('category') - if not value: return - context = self._getContext() - tags = context['tags'] - if value and len(tags) and not tags[-1]['term']: - tags[-1]['term'] = value - else: - self._addTag(value, None, None) - _end_dc_subject = _end_category - _end_keywords = _end_category - _end_itunes_category = _end_category - _end_media_category = _end_category - - def _start_cloud(self, attrsD): - self._getContext()['cloud'] = FeedParserDict(attrsD) - - def _start_link(self, attrsD): - attrsD.setdefault('rel', 'alternate') - if attrsD['rel'] == 'self': - attrsD.setdefault('type', 'application/atom+xml') - else: - attrsD.setdefault('type', 'text/html') - context = self._getContext() - attrsD = self._itsAnHrefDamnIt(attrsD) - if attrsD.has_key('href'): - attrsD['href'] = self.resolveURI(attrsD['href']) - expectingText = self.infeed or self.inentry or self.insource - context.setdefault('links', []) - if not (self.inentry and self.inimage): - context['links'].append(FeedParserDict(attrsD)) - if attrsD.has_key('href'): - expectingText = 0 - if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): - context['link'] = attrsD['href'] - else: - self.push('link', expectingText) - _start_producturl = _start_link - - def _end_link(self): - value = self.pop('link') - context = self._getContext() - _end_producturl = _end_link - - def _start_guid(self, attrsD): - self.guidislink = (attrsD.get('ispermalink', 'true') == 'true') - self.push('id', 1) - - def _end_guid(self): - value = self.pop('id') - self._save('guidislink', self.guidislink and not self._getContext().has_key('link')) - if self.guidislink: - # guid acts as link, but only if 'ispermalink' is not present or is 'true', - # and only if the item doesn't already have a link element - self._save('link', value) - - def _start_title(self, attrsD): - if self.svgOK: return self.unknown_starttag('title', attrsD.items()) - self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) - _start_dc_title = _start_title - _start_media_title = _start_title - - def _end_title(self): - if self.svgOK: return - value = self.popContent('title') - if not value: return - context = self._getContext() - self.hasTitle = 1 - _end_dc_title = _end_title - - def _end_media_title(self): - hasTitle = self.hasTitle - self._end_title() - self.hasTitle = hasTitle - - def _start_description(self, attrsD): - context = self._getContext() - if context.has_key('summary'): - self._summaryKey = 'content' - self._start_content(attrsD) - else: - self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource) - _start_dc_description = _start_description - - def _start_abstract(self, attrsD): - self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) - - def _end_description(self): - if self._summaryKey == 'content': - self._end_content() - else: - value = self.popContent('description') - self._summaryKey = None - _end_abstract = _end_description - _end_dc_description = _end_description - - def _start_info(self, attrsD): - self.pushContent('info', attrsD, 'text/plain', 1) - _start_feedburner_browserfriendly = _start_info - - def _end_info(self): - self.popContent('info') - _end_feedburner_browserfriendly = _end_info - - def _start_generator(self, attrsD): - if attrsD: - attrsD = self._itsAnHrefDamnIt(attrsD) - if attrsD.has_key('href'): - attrsD['href'] = self.resolveURI(attrsD['href']) - self._getContext()['generator_detail'] = FeedParserDict(attrsD) - self.push('generator', 1) - - def _end_generator(self): - value = self.pop('generator') - context = self._getContext() - if context.has_key('generator_detail'): - context['generator_detail']['name'] = value - - def _start_admin_generatoragent(self, attrsD): - self.push('generator', 1) - value = self._getAttribute(attrsD, 'rdf:resource') - if value: - self.elementstack[-1][2].append(value) - self.pop('generator') - self._getContext()['generator_detail'] = FeedParserDict({'href': value}) - - def _start_admin_errorreportsto(self, attrsD): - self.push('errorreportsto', 1) - value = self._getAttribute(attrsD, 'rdf:resource') - if value: - self.elementstack[-1][2].append(value) - self.pop('errorreportsto') - - def _start_summary(self, attrsD): - context = self._getContext() - if context.has_key('summary'): - self._summaryKey = 'content' - self._start_content(attrsD) - else: - self._summaryKey = 'summary' - self.pushContent(self._summaryKey, attrsD, 'text/plain', 1) - _start_itunes_summary = _start_summary - - def _end_summary(self): - if self._summaryKey == 'content': - self._end_content() - else: - self.popContent(self._summaryKey or 'summary') - self._summaryKey = None - _end_itunes_summary = _end_summary - - def _start_enclosure(self, attrsD): - attrsD = self._itsAnHrefDamnIt(attrsD) - context = self._getContext() - attrsD['rel']='enclosure' - context.setdefault('links', []).append(FeedParserDict(attrsD)) - - def _start_source(self, attrsD): - if 'url' in attrsD: - # This means that we're processing a source element from an RSS 2.0 feed - self.sourcedata['href'] = attrsD[u'url'] - self.push('source', 1) - self.insource = 1 - self.hasTitle = 0 - - def _end_source(self): - self.insource = 0 - value = self.pop('source') - if value: - self.sourcedata['title'] = value - self._getContext()['source'] = copy.deepcopy(self.sourcedata) - self.sourcedata.clear() - - def _start_content(self, attrsD): - self.pushContent('content', attrsD, 'text/plain', 1) - src = attrsD.get('src') - if src: - self.contentparams['src'] = src - self.push('content', 1) - - def _start_prodlink(self, attrsD): - self.pushContent('content', attrsD, 'text/html', 1) - - def _start_body(self, attrsD): - self.pushContent('content', attrsD, 'application/xhtml+xml', 1) - _start_xhtml_body = _start_body - - def _start_content_encoded(self, attrsD): - self.pushContent('content', attrsD, 'text/html', 1) - _start_fullitem = _start_content_encoded - - def _end_content(self): - copyToSummary = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types) - value = self.popContent('content') - if copyToSummary: - self._save('summary', value) - - _end_body = _end_content - _end_xhtml_body = _end_content - _end_content_encoded = _end_content - _end_fullitem = _end_content - _end_prodlink = _end_content - - def _start_itunes_image(self, attrsD): - self.push('itunes_image', 0) - if attrsD.get('href'): - self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')}) - _start_itunes_link = _start_itunes_image - - def _end_itunes_block(self): - value = self.pop('itunes_block', 0) - self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0 - - def _end_itunes_explicit(self): - value = self.pop('itunes_explicit', 0) - # Convert 'yes' -> True, 'clean' to False, and any other value to None - # False and None both evaluate as False, so the difference can be ignored - # by applications that only need to know if the content is explicit. - self._getContext()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0] - - def _start_media_content(self, attrsD): - context = self._getContext() - context.setdefault('media_content', []) - context['media_content'].append(attrsD) - - def _start_media_thumbnail(self, attrsD): - context = self._getContext() - context.setdefault('media_thumbnail', []) - self.push('url', 1) # new - context['media_thumbnail'].append(attrsD) - - def _end_media_thumbnail(self): - url = self.pop('url') - context = self._getContext() - if url != None and len(url.strip()) != 0: - if not context['media_thumbnail'][-1].has_key('url'): - context['media_thumbnail'][-1]['url'] = url - - def _start_media_player(self, attrsD): - self.push('media_player', 0) - self._getContext()['media_player'] = FeedParserDict(attrsD) - - def _end_media_player(self): - value = self.pop('media_player') - context = self._getContext() - context['media_player']['content'] = value - - def _start_newlocation(self, attrsD): - self.push('newlocation', 1) - - def _end_newlocation(self): - url = self.pop('newlocation') - context = self._getContext() - # don't set newlocation if the context isn't right - if context is not self.feeddata: - return - context['newlocation'] = _makeSafeAbsoluteURI(self.baseuri, url.strip()) - -if _XML_AVAILABLE: - class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler): - def __init__(self, baseuri, baselang, encoding): - if _debug: sys.stderr.write('trying StrictFeedParser\n') - xml.sax.handler.ContentHandler.__init__(self) - _FeedParserMixin.__init__(self, baseuri, baselang, encoding) - self.bozo = 0 - self.exc = None - self.decls = {} - - def startPrefixMapping(self, prefix, uri): - self.trackNamespace(prefix, uri) - if uri == 'http://www.w3.org/1999/xlink': - self.decls['xmlns:'+prefix] = uri - - def startElementNS(self, name, qname, attrs): - namespace, localname = name - lowernamespace = str(namespace or '').lower() - if lowernamespace.find('backend.userland.com/rss') <> -1: - # match any backend.userland.com namespace - namespace = 'http://backend.userland.com/rss' - lowernamespace = namespace - if qname and qname.find(':') > 0: - givenprefix = qname.split(':')[0] - else: - givenprefix = None - prefix = self._matchnamespaces.get(lowernamespace, givenprefix) - if givenprefix and (prefix == None or (prefix == '' and lowernamespace == '')) and not self.namespacesInUse.has_key(givenprefix): - raise UndeclaredNamespace, "'%s' is not associated with a namespace" % givenprefix - localname = str(localname).lower() - - # qname implementation is horribly broken in Python 2.1 (it - # doesn't report any), and slightly broken in Python 2.2 (it - # doesn't report the xml: namespace). So we match up namespaces - # with a known list first, and then possibly override them with - # the qnames the SAX parser gives us (if indeed it gives us any - # at all). Thanks to MatejC for helping me test this and - # tirelessly telling me that it didn't work yet. - attrsD, self.decls = self.decls, {} - if localname=='math' and namespace=='http://www.w3.org/1998/Math/MathML': - attrsD['xmlns']=namespace - if localname=='svg' and namespace=='http://www.w3.org/2000/svg': - attrsD['xmlns']=namespace - - if prefix: - localname = prefix.lower() + ':' + localname - elif namespace and not qname: #Expat - for name,value in self.namespacesInUse.items(): - if name and value == namespace: - localname = name + ':' + localname - break - if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname)) - - for (namespace, attrlocalname), attrvalue in attrs._attrs.items(): - lowernamespace = (namespace or '').lower() - prefix = self._matchnamespaces.get(lowernamespace, '') - if prefix: - attrlocalname = prefix + ':' + attrlocalname - attrsD[str(attrlocalname).lower()] = attrvalue - for qname in attrs.getQNames(): - attrsD[str(qname).lower()] = attrs.getValueByQName(qname) - self.unknown_starttag(localname, attrsD.items()) - - def characters(self, text): - self.handle_data(text) - - def endElementNS(self, name, qname): - namespace, localname = name - lowernamespace = str(namespace or '').lower() - if qname and qname.find(':') > 0: - givenprefix = qname.split(':')[0] - else: - givenprefix = '' - prefix = self._matchnamespaces.get(lowernamespace, givenprefix) - if prefix: - localname = prefix + ':' + localname - elif namespace and not qname: #Expat - for name,value in self.namespacesInUse.items(): - if name and value == namespace: - localname = name + ':' + localname - break - localname = str(localname).lower() - self.unknown_endtag(localname) - - def error(self, exc): - self.bozo = 1 - self.exc = exc - - def fatalError(self, exc): - self.error(exc) - raise exc - -class _BaseHTMLProcessor(sgmllib.SGMLParser): - special = re.compile('''[<>'"]''') - bare_ampersand = re.compile("&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)") - elements_no_end_tag = [ - 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', - 'hr', 'img', 'input', 'isindex', 'keygen', 'link', 'meta', 'param', - 'source', 'track', 'wbr' - ] - - def __init__(self, encoding, _type): - self.encoding = encoding - self._type = _type - if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding) - sgmllib.SGMLParser.__init__(self) - - def reset(self): - self.pieces = [] - sgmllib.SGMLParser.reset(self) - - def _shorttag_replace(self, match): - tag = match.group(1) - if tag in self.elements_no_end_tag: - return '<' + tag + ' />' - else: - return '<' + tag + '>' - - def parse_starttag(self,i): - j=sgmllib.SGMLParser.parse_starttag(self, i) - if self._type == 'application/xhtml+xml': - if j>2 and self.rawdata[j-2:j]=='/>': - self.unknown_endtag(self.lasttag) - return j - - def feed(self, data): - data = re.compile(r'', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace - data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) - data = data.replace(''', "'") - data = data.replace('"', '"') - try: - bytes - if bytes is str: - raise NameError - self.encoding = self.encoding + '_INVALID_PYTHON_3' - except NameError: - if self.encoding and type(data) == type(u''): - data = data.encode(self.encoding) - sgmllib.SGMLParser.feed(self, data) - sgmllib.SGMLParser.close(self) - - def normalize_attrs(self, attrs): - if not attrs: return attrs - # utility method to be called by descendants - attrs = dict([(k.lower(), v) for k, v in attrs]).items() - attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] - attrs.sort() - return attrs - - def unknown_starttag(self, tag, attrs): - # called for each start tag - # attrs is a list of (attr, value) tuples - # e.g. for
, tag='pre', attrs=[('class', 'screen')]
-        if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
-        uattrs = []
-        strattrs=''
-        if attrs:
-            for key, value in attrs:
-                value=value.replace('>','>').replace('<','<').replace('"','"')
-                value = self.bare_ampersand.sub("&", value)
-                # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
-                if type(value) != type(u''):
-                    try:
-                        value = unicode(value, self.encoding)
-                    except:
-                        value = unicode(value, 'iso-8859-1')
-                try:
-                    # Currently, in Python 3 the key is already a str, and cannot be decoded again
-                    uattrs.append((unicode(key, self.encoding), value))
-                except TypeError:
-                    uattrs.append((key, value))
-            strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs])
-            if self.encoding:
-                try:
-                    strattrs=strattrs.encode(self.encoding)
-                except:
-                    pass
-        if tag in self.elements_no_end_tag:
-            self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
-        else:
-            self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
-
-    def unknown_endtag(self, tag):
-        # called for each end tag, e.g. for 
, tag will be 'pre' - # Reconstruct the original end tag. - if tag not in self.elements_no_end_tag: - self.pieces.append("" % locals()) - - def handle_charref(self, ref): - # called for each character reference, e.g. for ' ', ref will be '160' - # Reconstruct the original character reference. - if ref.startswith('x'): - value = unichr(int(ref[1:],16)) - else: - value = unichr(int(ref)) - - if value in _cp1252.keys(): - self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:]) - else: - self.pieces.append('&#%(ref)s;' % locals()) - - def handle_entityref(self, ref): - # called for each entity reference, e.g. for '©', ref will be 'copy' - # Reconstruct the original entity reference. - if name2codepoint.has_key(ref): - self.pieces.append('&%(ref)s;' % locals()) - else: - self.pieces.append('&%(ref)s' % locals()) - - def handle_data(self, text): - # called for each block of plain text, i.e. outside of any tag and - # not containing any character or entity references - # Store the original text verbatim. - if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_data, text=%s\n' % text) - self.pieces.append(text) - - def handle_comment(self, text): - # called for each HTML comment, e.g. - # Reconstruct the original comment. - self.pieces.append('' % locals()) - - def handle_pi(self, text): - # called for each processing instruction, e.g. - # Reconstruct original processing instruction. - self.pieces.append('' % locals()) - - def handle_decl(self, text): - # called for the DOCTYPE, if present, e.g. - # - # Reconstruct original DOCTYPE - self.pieces.append('' % locals()) - - _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match - def _scan_name(self, i, declstartpos): - rawdata = self.rawdata - n = len(rawdata) - if i == n: - return None, -1 - m = self._new_declname_match(rawdata, i) - if m: - s = m.group() - name = s.strip() - if (i + len(s)) == n: - return None, -1 # end of buffer - return name.lower(), m.end() - else: - self.handle_data(rawdata) -# self.updatepos(declstartpos, i) - return None, -1 - - def convert_charref(self, name): - return '&#%s;' % name - - def convert_entityref(self, name): - return '&%s;' % name - - def output(self): - '''Return processed HTML as a single string''' - return ''.join([str(p) for p in self.pieces]) - - def parse_declaration(self, i): - try: - return sgmllib.SGMLParser.parse_declaration(self, i) - except sgmllib.SGMLParseError: - # escape the doctype declaration and continue parsing - self.handle_data('<') - return i+1 - -class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor): - def __init__(self, baseuri, baselang, encoding, entities): - sgmllib.SGMLParser.__init__(self) - _FeedParserMixin.__init__(self, baseuri, baselang, encoding) - _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml') - self.entities=entities - - def decodeEntities(self, element, data): - data = data.replace('<', '<') - data = data.replace('<', '<') - data = data.replace('<', '<') - data = data.replace('>', '>') - data = data.replace('>', '>') - data = data.replace('>', '>') - data = data.replace('&', '&') - data = data.replace('&', '&') - data = data.replace('"', '"') - data = data.replace('"', '"') - data = data.replace(''', ''') - data = data.replace(''', ''') - if self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): - data = data.replace('<', '<') - data = data.replace('>', '>') - data = data.replace('&', '&') - data = data.replace('"', '"') - data = data.replace(''', "'") - return data - - def strattrs(self, attrs): - return ''.join([' %s="%s"' % (n,v.replace('"','"')) for n,v in attrs]) - -class _MicroformatsParser: - STRING = 1 - DATE = 2 - URI = 3 - NODE = 4 - EMAIL = 5 - - known_xfn_relationships = ['contact', 'acquaintance', 'friend', 'met', 'co-worker', 'coworker', 'colleague', 'co-resident', 'coresident', 'neighbor', 'child', 'parent', 'sibling', 'brother', 'sister', 'spouse', 'wife', 'husband', 'kin', 'relative', 'muse', 'crush', 'date', 'sweetheart', 'me'] - known_binary_extensions = ['zip','rar','exe','gz','tar','tgz','tbz2','bz2','z','7z','dmg','img','sit','sitx','hqx','deb','rpm','bz2','jar','rar','iso','bin','msi','mp2','mp3','ogg','ogm','mp4','m4v','m4a','avi','wma','wmv'] - - def __init__(self, data, baseuri, encoding): - self.document = BeautifulSoup.BeautifulSoup(data) - self.baseuri = baseuri - self.encoding = encoding - if type(data) == type(u''): - data = data.encode(encoding) - self.tags = [] - self.enclosures = [] - self.xfn = [] - self.vcard = None - - def vcardEscape(self, s): - if type(s) in (type(''), type(u'')): - s = s.replace(',', '\\,').replace(';', '\\;').replace('\n', '\\n') - return s - - def vcardFold(self, s): - s = re.sub(';+$', '', s) - sFolded = '' - iMax = 75 - sPrefix = '' - while len(s) > iMax: - sFolded += sPrefix + s[:iMax] + '\n' - s = s[iMax:] - sPrefix = ' ' - iMax = 74 - sFolded += sPrefix + s - return sFolded - - def normalize(self, s): - return re.sub(r'\s+', ' ', s).strip() - - def unique(self, aList): - results = [] - for element in aList: - if element not in results: - results.append(element) - return results - - def toISO8601(self, dt): - return time.strftime('%Y-%m-%dT%H:%M:%SZ', dt) - - def getPropertyValue(self, elmRoot, sProperty, iPropertyType=4, bAllowMultiple=0, bAutoEscape=0): - all = lambda x: 1 - sProperty = sProperty.lower() - bFound = 0 - bNormalize = 1 - propertyMatch = {'class': re.compile(r'\b%s\b' % sProperty)} - if bAllowMultiple and (iPropertyType != self.NODE): - snapResults = [] - containers = elmRoot(['ul', 'ol'], propertyMatch) - for container in containers: - snapResults.extend(container('li')) - bFound = (len(snapResults) != 0) - if not bFound: - snapResults = elmRoot(all, propertyMatch) - bFound = (len(snapResults) != 0) - if (not bFound) and (sProperty == 'value'): - snapResults = elmRoot('pre') - bFound = (len(snapResults) != 0) - bNormalize = not bFound - if not bFound: - snapResults = [elmRoot] - bFound = (len(snapResults) != 0) - arFilter = [] - if sProperty == 'vcard': - snapFilter = elmRoot(all, propertyMatch) - for node in snapFilter: - if node.findParent(all, propertyMatch): - arFilter.append(node) - arResults = [] - for node in snapResults: - if node not in arFilter: - arResults.append(node) - bFound = (len(arResults) != 0) - if not bFound: - if bAllowMultiple: return [] - elif iPropertyType == self.STRING: return '' - elif iPropertyType == self.DATE: return None - elif iPropertyType == self.URI: return '' - elif iPropertyType == self.NODE: return None - else: return None - arValues = [] - for elmResult in arResults: - sValue = None - if iPropertyType == self.NODE: - if bAllowMultiple: - arValues.append(elmResult) - continue - else: - return elmResult - sNodeName = elmResult.name.lower() - if (iPropertyType == self.EMAIL) and (sNodeName == 'a'): - sValue = (elmResult.get('href') or '').split('mailto:').pop().split('?')[0] - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if (not sValue) and (sNodeName == 'abbr'): - sValue = elmResult.get('title') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if (not sValue) and (iPropertyType == self.URI): - if sNodeName == 'a': sValue = elmResult.get('href') - elif sNodeName == 'img': sValue = elmResult.get('src') - elif sNodeName == 'object': sValue = elmResult.get('data') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if (not sValue) and (sNodeName == 'img'): - sValue = elmResult.get('alt') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if not sValue: - sValue = elmResult.renderContents() - sValue = re.sub(r'<\S[^>]*>', '', sValue) - sValue = sValue.replace('\r\n', '\n') - sValue = sValue.replace('\r', '\n') - if sValue: - sValue = bNormalize and self.normalize(sValue) or sValue.strip() - if not sValue: continue - if iPropertyType == self.DATE: - sValue = _parse_date_iso8601(sValue) - if bAllowMultiple: - arValues.append(bAutoEscape and self.vcardEscape(sValue) or sValue) - else: - return bAutoEscape and self.vcardEscape(sValue) or sValue - return arValues - - def findVCards(self, elmRoot, bAgentParsing=0): - sVCards = '' - - if not bAgentParsing: - arCards = self.getPropertyValue(elmRoot, 'vcard', bAllowMultiple=1) - else: - arCards = [elmRoot] - - for elmCard in arCards: - arLines = [] - - def processSingleString(sProperty): - sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1).decode(self.encoding) - if sValue: - arLines.append(self.vcardFold(sProperty.upper() + ':' + sValue)) - return sValue or u'' - - def processSingleURI(sProperty): - sValue = self.getPropertyValue(elmCard, sProperty, self.URI) - if sValue: - sContentType = '' - sEncoding = '' - sValueKey = '' - if sValue.startswith('data:'): - sEncoding = ';ENCODING=b' - sContentType = sValue.split(';')[0].split('/').pop() - sValue = sValue.split(',', 1).pop() - else: - elmValue = self.getPropertyValue(elmCard, sProperty) - if elmValue: - if sProperty != 'url': - sValueKey = ';VALUE=uri' - sContentType = elmValue.get('type', '').strip().split('/').pop().strip() - sContentType = sContentType.upper() - if sContentType == 'OCTET-STREAM': - sContentType = '' - if sContentType: - sContentType = ';TYPE=' + sContentType.upper() - arLines.append(self.vcardFold(sProperty.upper() + sEncoding + sContentType + sValueKey + ':' + sValue)) - - def processTypeValue(sProperty, arDefaultType, arForceType=None): - arResults = self.getPropertyValue(elmCard, sProperty, bAllowMultiple=1) - for elmResult in arResults: - arType = self.getPropertyValue(elmResult, 'type', self.STRING, 1, 1) - if arForceType: - arType = self.unique(arForceType + arType) - if not arType: - arType = arDefaultType - sValue = self.getPropertyValue(elmResult, 'value', self.EMAIL, 0) - if sValue: - arLines.append(self.vcardFold(sProperty.upper() + ';TYPE=' + ','.join(arType) + ':' + sValue)) - - # AGENT - # must do this before all other properties because it is destructive - # (removes nested class="vcard" nodes so they don't interfere with - # this vcard's other properties) - arAgent = self.getPropertyValue(elmCard, 'agent', bAllowMultiple=1) - for elmAgent in arAgent: - if re.compile(r'\bvcard\b').search(elmAgent.get('class')): - sAgentValue = self.findVCards(elmAgent, 1) + '\n' - sAgentValue = sAgentValue.replace('\n', '\\n') - sAgentValue = sAgentValue.replace(';', '\\;') - if sAgentValue: - arLines.append(self.vcardFold('AGENT:' + sAgentValue)) - # Completely remove the agent element from the parse tree - elmAgent.extract() - else: - sAgentValue = self.getPropertyValue(elmAgent, 'value', self.URI, bAutoEscape=1); - if sAgentValue: - arLines.append(self.vcardFold('AGENT;VALUE=uri:' + sAgentValue)) - - # FN (full name) - sFN = processSingleString('fn') - - # N (name) - elmName = self.getPropertyValue(elmCard, 'n') - if elmName: - sFamilyName = self.getPropertyValue(elmName, 'family-name', self.STRING, bAutoEscape=1) - sGivenName = self.getPropertyValue(elmName, 'given-name', self.STRING, bAutoEscape=1) - arAdditionalNames = self.getPropertyValue(elmName, 'additional-name', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'additional-names', self.STRING, 1, 1) - arHonorificPrefixes = self.getPropertyValue(elmName, 'honorific-prefix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-prefixes', self.STRING, 1, 1) - arHonorificSuffixes = self.getPropertyValue(elmName, 'honorific-suffix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-suffixes', self.STRING, 1, 1) - arLines.append(self.vcardFold('N:' + sFamilyName + ';' + - sGivenName + ';' + - ','.join(arAdditionalNames) + ';' + - ','.join(arHonorificPrefixes) + ';' + - ','.join(arHonorificSuffixes))) - elif sFN: - # implied "N" optimization - # http://microformats.org/wiki/hcard#Implied_.22N.22_Optimization - arNames = self.normalize(sFN).split() - if len(arNames) == 2: - bFamilyNameFirst = (arNames[0].endswith(',') or - len(arNames[1]) == 1 or - ((len(arNames[1]) == 2) and (arNames[1].endswith('.')))) - if bFamilyNameFirst: - arLines.append(self.vcardFold('N:' + arNames[0] + ';' + arNames[1])) - else: - arLines.append(self.vcardFold('N:' + arNames[1] + ';' + arNames[0])) - - # SORT-STRING - sSortString = self.getPropertyValue(elmCard, 'sort-string', self.STRING, bAutoEscape=1) - if sSortString: - arLines.append(self.vcardFold('SORT-STRING:' + sSortString)) - - # NICKNAME - arNickname = self.getPropertyValue(elmCard, 'nickname', self.STRING, 1, 1) - if arNickname: - arLines.append(self.vcardFold('NICKNAME:' + ','.join(arNickname))) - - # PHOTO - processSingleURI('photo') - - # BDAY - dtBday = self.getPropertyValue(elmCard, 'bday', self.DATE) - if dtBday: - arLines.append(self.vcardFold('BDAY:' + self.toISO8601(dtBday))) - - # ADR (address) - arAdr = self.getPropertyValue(elmCard, 'adr', bAllowMultiple=1) - for elmAdr in arAdr: - arType = self.getPropertyValue(elmAdr, 'type', self.STRING, 1, 1) - if not arType: - arType = ['intl','postal','parcel','work'] # default adr types, see RFC 2426 section 3.2.1 - sPostOfficeBox = self.getPropertyValue(elmAdr, 'post-office-box', self.STRING, 0, 1) - sExtendedAddress = self.getPropertyValue(elmAdr, 'extended-address', self.STRING, 0, 1) - sStreetAddress = self.getPropertyValue(elmAdr, 'street-address', self.STRING, 0, 1) - sLocality = self.getPropertyValue(elmAdr, 'locality', self.STRING, 0, 1) - sRegion = self.getPropertyValue(elmAdr, 'region', self.STRING, 0, 1) - sPostalCode = self.getPropertyValue(elmAdr, 'postal-code', self.STRING, 0, 1) - sCountryName = self.getPropertyValue(elmAdr, 'country-name', self.STRING, 0, 1) - arLines.append(self.vcardFold('ADR;TYPE=' + ','.join(arType) + ':' + - sPostOfficeBox + ';' + - sExtendedAddress + ';' + - sStreetAddress + ';' + - sLocality + ';' + - sRegion + ';' + - sPostalCode + ';' + - sCountryName)) - - # LABEL - processTypeValue('label', ['intl','postal','parcel','work']) - - # TEL (phone number) - processTypeValue('tel', ['voice']) - - # EMAIL - processTypeValue('email', ['internet'], ['internet']) - - # MAILER - processSingleString('mailer') - - # TZ (timezone) - processSingleString('tz') - - # GEO (geographical information) - elmGeo = self.getPropertyValue(elmCard, 'geo') - if elmGeo: - sLatitude = self.getPropertyValue(elmGeo, 'latitude', self.STRING, 0, 1) - sLongitude = self.getPropertyValue(elmGeo, 'longitude', self.STRING, 0, 1) - arLines.append(self.vcardFold('GEO:' + sLatitude + ';' + sLongitude)) - - # TITLE - processSingleString('title') - - # ROLE - processSingleString('role') - - # LOGO - processSingleURI('logo') - - # ORG (organization) - elmOrg = self.getPropertyValue(elmCard, 'org') - if elmOrg: - sOrganizationName = self.getPropertyValue(elmOrg, 'organization-name', self.STRING, 0, 1) - if not sOrganizationName: - # implied "organization-name" optimization - # http://microformats.org/wiki/hcard#Implied_.22organization-name.22_Optimization - sOrganizationName = self.getPropertyValue(elmCard, 'org', self.STRING, 0, 1) - if sOrganizationName: - arLines.append(self.vcardFold('ORG:' + sOrganizationName)) - else: - arOrganizationUnit = self.getPropertyValue(elmOrg, 'organization-unit', self.STRING, 1, 1) - arLines.append(self.vcardFold('ORG:' + sOrganizationName + ';' + ';'.join(arOrganizationUnit))) - - # CATEGORY - arCategory = self.getPropertyValue(elmCard, 'category', self.STRING, 1, 1) + self.getPropertyValue(elmCard, 'categories', self.STRING, 1, 1) - if arCategory: - arLines.append(self.vcardFold('CATEGORIES:' + ','.join(arCategory))) - - # NOTE - processSingleString('note') - - # REV - processSingleString('rev') - - # SOUND - processSingleURI('sound') - - # UID - processSingleString('uid') - - # URL - processSingleURI('url') - - # CLASS - processSingleString('class') - - # KEY - processSingleURI('key') - - if arLines: - arLines = [u'BEGIN:vCard',u'VERSION:3.0'] + arLines + [u'END:vCard'] - sVCards += u'\n'.join(arLines) + u'\n' - - return sVCards.strip() - - def isProbablyDownloadable(self, elm): - attrsD = elm.attrMap - if not attrsD.has_key('href'): return 0 - linktype = attrsD.get('type', '').strip() - if linktype.startswith('audio/') or \ - linktype.startswith('video/') or \ - (linktype.startswith('application/') and not linktype.endswith('xml')): - return 1 - path = urlparse.urlparse(attrsD['href'])[2] - if path.find('.') == -1: return 0 - fileext = path.split('.').pop().lower() - return fileext in self.known_binary_extensions - - def findTags(self): - all = lambda x: 1 - for elm in self.document(all, {'rel': re.compile(r'\btag\b')}): - href = elm.get('href') - if not href: continue - urlscheme, domain, path, params, query, fragment = \ - urlparse.urlparse(_urljoin(self.baseuri, href)) - segments = path.split('/') - tag = segments.pop() - if not tag: - tag = segments.pop() - tagscheme = urlparse.urlunparse((urlscheme, domain, '/'.join(segments), '', '', '')) - if not tagscheme.endswith('/'): - tagscheme += '/' - self.tags.append(FeedParserDict({"term": tag, "scheme": tagscheme, "label": elm.string or ''})) - - def findEnclosures(self): - all = lambda x: 1 - enclosure_match = re.compile(r'\benclosure\b') - for elm in self.document(all, {'href': re.compile(r'.+')}): - if not enclosure_match.search(elm.get('rel', '')) and not self.isProbablyDownloadable(elm): continue - if elm.attrMap not in self.enclosures: - self.enclosures.append(elm.attrMap) - if elm.string and not elm.get('title'): - self.enclosures[-1]['title'] = elm.string - - def findXFN(self): - all = lambda x: 1 - for elm in self.document(all, {'rel': re.compile('.+'), 'href': re.compile('.+')}): - rels = elm.get('rel', '').split() - xfn_rels = [] - for rel in rels: - if rel in self.known_xfn_relationships: - xfn_rels.append(rel) - if xfn_rels: - self.xfn.append({"relationships": xfn_rels, "href": elm.get('href', ''), "name": elm.string}) - -def _parseMicroformats(htmlSource, baseURI, encoding): - if not BeautifulSoup: return - if _debug: sys.stderr.write('entering _parseMicroformats\n') - try: - p = _MicroformatsParser(htmlSource, baseURI, encoding) - except UnicodeEncodeError: - # sgmllib throws this exception when performing lookups of tags - # with non-ASCII characters in them. - return - p.vcard = p.findVCards(p.document) - p.findTags() - p.findEnclosures() - p.findXFN() - return {"tags": p.tags, "enclosures": p.enclosures, "xfn": p.xfn, "vcard": p.vcard} - -class _RelativeURIResolver(_BaseHTMLProcessor): - relative_uris = [('a', 'href'), - ('applet', 'codebase'), - ('area', 'href'), - ('blockquote', 'cite'), - ('body', 'background'), - ('del', 'cite'), - ('form', 'action'), - ('frame', 'longdesc'), - ('frame', 'src'), - ('iframe', 'longdesc'), - ('iframe', 'src'), - ('head', 'profile'), - ('img', 'longdesc'), - ('img', 'src'), - ('img', 'usemap'), - ('input', 'src'), - ('input', 'usemap'), - ('ins', 'cite'), - ('link', 'href'), - ('object', 'classid'), - ('object', 'codebase'), - ('object', 'data'), - ('object', 'usemap'), - ('q', 'cite'), - ('script', 'src')] - - def __init__(self, baseuri, encoding, _type): - _BaseHTMLProcessor.__init__(self, encoding, _type) - self.baseuri = baseuri - - def resolveURI(self, uri): - return _makeSafeAbsoluteURI(_urljoin(self.baseuri, uri.strip())) - - def unknown_starttag(self, tag, attrs): - if _debug: - sys.stderr.write('tag: [%s] with attributes: [%s]\n' % (tag, str(attrs))) - attrs = self.normalize_attrs(attrs) - attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs] - _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) - -def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type): - if _debug: - sys.stderr.write('entering _resolveRelativeURIs\n') - - p = _RelativeURIResolver(baseURI, encoding, _type) - p.feed(htmlSource) - return p.output() - -def _makeSafeAbsoluteURI(base, rel=None): - # bail if ACCEPTABLE_URI_SCHEMES is empty - if not ACCEPTABLE_URI_SCHEMES: - return _urljoin(base, rel or u'') - if not base: - return rel or u'' - if not rel: - scheme = urlparse.urlparse(base)[0] - if not scheme or scheme in ACCEPTABLE_URI_SCHEMES: - return base - return u'' - uri = _urljoin(base, rel) - if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES: - return u'' - return uri - -class _HTMLSanitizer(_BaseHTMLProcessor): - acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', - 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', - 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', - 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', - 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', - 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1', - 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', - 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', - 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', - 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', - 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', - 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', - 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript'] - - acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', - 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis', - 'background', 'balance', 'bgcolor', 'bgproperties', 'border', - 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding', - 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff', - 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', 'cols', - 'colspan', 'compact', 'contenteditable', 'controls', 'coords', 'data', - 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', 'delay', - 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', 'face', 'for', - 'form', 'frame', 'galleryimg', 'gutter', 'headers', 'height', 'hidefocus', - 'hidden', 'high', 'href', 'hreflang', 'hspace', 'icon', 'id', 'inputmode', - 'ismap', 'keytype', 'label', 'leftspacing', 'lang', 'list', 'longdesc', - 'loop', 'loopcount', 'loopend', 'loopstart', 'low', 'lowsrc', 'max', - 'maxlength', 'media', 'method', 'min', 'multiple', 'name', 'nohref', - 'noshade', 'nowrap', 'open', 'optimum', 'pattern', 'ping', 'point-size', - 'prompt', 'pqg', 'radiogroup', 'readonly', 'rel', 'repeat-max', - 'repeat-min', 'replace', 'required', 'rev', 'rightspacing', 'rows', - 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', - 'start', 'step', 'summary', 'suppress', 'tabindex', 'target', 'template', - 'title', 'toppadding', 'type', 'unselectable', 'usemap', 'urn', 'valign', - 'value', 'variable', 'volume', 'vspace', 'vrml', 'width', 'wrap', - 'xml:lang'] - - unacceptable_elements_with_end_tag = ['script', 'applet', 'style'] - - acceptable_css_properties = ['azimuth', 'background-color', - 'border-bottom-color', 'border-collapse', 'border-color', - 'border-left-color', 'border-right-color', 'border-top-color', 'clear', - 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', - 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', - 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', - 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', - 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', - 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', - 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', - 'white-space', 'width'] - - # survey of common keywords found in feeds - acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue', - 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', - 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', - 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', - 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', - 'transparent', 'underline', 'white', 'yellow'] - - valid_css_values = re.compile('^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' + - '\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$') - - mathml_elements = ['annotation', 'annotation-xml', 'maction', 'math', - 'merror', 'mfenced', 'mfrac', 'mi', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', - 'mphantom', 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', - 'msub', 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', - 'munderover', 'none', 'semantics'] - - mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign', - 'columnalign', 'close', 'columnlines', 'columnspacing', 'columnspan', 'depth', - 'display', 'displaystyle', 'encoding', 'equalcolumns', 'equalrows', - 'fence', 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', - 'lspace', 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', - 'maxsize', 'minsize', 'open', 'other', 'rowalign', 'rowalign', 'rowalign', - 'rowlines', 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection', - 'separator', 'separators', 'stretchy', 'width', 'width', 'xlink:href', - 'xlink:show', 'xlink:type', 'xmlns', 'xmlns:xlink'] - - # svgtiny - foreignObject + linearGradient + radialGradient + stop - svg_elements = ['a', 'animate', 'animateColor', 'animateMotion', - 'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'foreignObject', - 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', - 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', 'mpath', - 'path', 'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop', - 'svg', 'switch', 'text', 'title', 'tspan', 'use'] - - # svgtiny + class + opacity + offset + xmlns + xmlns:xlink - svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic', - 'arabic-form', 'ascent', 'attributeName', 'attributeType', - 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height', - 'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd', 'dx', - 'dy', 'descent', 'display', 'dur', 'end', 'fill', 'fill-opacity', - 'fill-rule', 'font-family', 'font-size', 'font-stretch', 'font-style', - 'font-variant', 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', - 'glyph-name', 'gradientUnits', 'hanging', 'height', 'horiz-adv-x', - 'horiz-origin-x', 'id', 'ideographic', 'k', 'keyPoints', 'keySplines', - 'keyTimes', 'lang', 'mathematical', 'marker-end', 'marker-mid', - 'marker-start', 'markerHeight', 'markerUnits', 'markerWidth', 'max', - 'min', 'name', 'offset', 'opacity', 'orient', 'origin', - 'overline-position', 'overline-thickness', 'panose-1', 'path', - 'pathLength', 'points', 'preserveAspectRatio', 'r', 'refX', 'refY', - 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures', - 'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', - 'stop-color', 'stop-opacity', 'strikethrough-position', - 'strikethrough-thickness', 'stroke', 'stroke-dasharray', - 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', - 'stroke-miterlimit', 'stroke-opacity', 'stroke-width', 'systemLanguage', - 'target', 'text-anchor', 'to', 'transform', 'type', 'u1', 'u2', - 'underline-position', 'underline-thickness', 'unicode', 'unicode-range', - 'units-per-em', 'values', 'version', 'viewBox', 'visibility', 'width', - 'widths', 'x', 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole', - 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type', - 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', - 'y2', 'zoomAndPan'] - - svg_attr_map = None - svg_elem_map = None - - acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule', - 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', - 'stroke-opacity'] - - def reset(self): - _BaseHTMLProcessor.reset(self) - self.unacceptablestack = 0 - self.mathmlOK = 0 - self.svgOK = 0 - - def unknown_starttag(self, tag, attrs): - acceptable_attributes = self.acceptable_attributes - keymap = {} - if not tag in self.acceptable_elements or self.svgOK: - if tag in self.unacceptable_elements_with_end_tag: - self.unacceptablestack += 1 - - # add implicit namespaces to html5 inline svg/mathml - if self._type.endswith('html'): - if not dict(attrs).get('xmlns'): - if tag=='svg': - attrs.append( ('xmlns','http://www.w3.org/2000/svg') ) - if tag=='math': - attrs.append( ('xmlns','http://www.w3.org/1998/Math/MathML') ) - - # not otherwise acceptable, perhaps it is MathML or SVG? - if tag=='math' and ('xmlns','http://www.w3.org/1998/Math/MathML') in attrs: - self.mathmlOK += 1 - if tag=='svg' and ('xmlns','http://www.w3.org/2000/svg') in attrs: - self.svgOK += 1 - - # chose acceptable attributes based on tag class, else bail - if self.mathmlOK and tag in self.mathml_elements: - acceptable_attributes = self.mathml_attributes - elif self.svgOK and tag in self.svg_elements: - # for most vocabularies, lowercasing is a good idea. Many - # svg elements, however, are camel case - if not self.svg_attr_map: - lower=[attr.lower() for attr in self.svg_attributes] - mix=[a for a in self.svg_attributes if a not in lower] - self.svg_attributes = lower - self.svg_attr_map = dict([(a.lower(),a) for a in mix]) - - lower=[attr.lower() for attr in self.svg_elements] - mix=[a for a in self.svg_elements if a not in lower] - self.svg_elements = lower - self.svg_elem_map = dict([(a.lower(),a) for a in mix]) - acceptable_attributes = self.svg_attributes - tag = self.svg_elem_map.get(tag,tag) - keymap = self.svg_attr_map - elif not tag in self.acceptable_elements: - return - - # declare xlink namespace, if needed - if self.mathmlOK or self.svgOK: - if filter(lambda (n,v): n.startswith('xlink:'),attrs): - if not ('xmlns:xlink','http://www.w3.org/1999/xlink') in attrs: - attrs.append(('xmlns:xlink','http://www.w3.org/1999/xlink')) - - clean_attrs = [] - for key, value in self.normalize_attrs(attrs): - if key in acceptable_attributes: - key=keymap.get(key,key) - # make sure the uri uses an acceptable uri scheme - if key == u'href': - value = _makeSafeAbsoluteURI(value) - clean_attrs.append((key,value)) - elif key=='style': - clean_value = self.sanitize_style(value) - if clean_value: clean_attrs.append((key,clean_value)) - _BaseHTMLProcessor.unknown_starttag(self, tag, clean_attrs) - - def unknown_endtag(self, tag): - if not tag in self.acceptable_elements: - if tag in self.unacceptable_elements_with_end_tag: - self.unacceptablestack -= 1 - if self.mathmlOK and tag in self.mathml_elements: - if tag == 'math' and self.mathmlOK: self.mathmlOK -= 1 - elif self.svgOK and tag in self.svg_elements: - tag = self.svg_elem_map.get(tag,tag) - if tag == 'svg' and self.svgOK: self.svgOK -= 1 - else: - return - _BaseHTMLProcessor.unknown_endtag(self, tag) - - def handle_pi(self, text): - pass - - def handle_decl(self, text): - pass - - def handle_data(self, text): - if not self.unacceptablestack: - _BaseHTMLProcessor.handle_data(self, text) - - def sanitize_style(self, style): - # disallow urls - style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style) - - # gauntlet - if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return '' - # This replaced a regexp that used re.match and was prone to pathological back-tracking. - if re.sub("\s*[-\w]+\s*:\s*[^:;]*;?", '', style).strip(): return '' - - clean = [] - for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style): - if not value: continue - if prop.lower() in self.acceptable_css_properties: - clean.append(prop + ': ' + value + ';') - elif prop.split('-')[0].lower() in ['background','border','margin','padding']: - for keyword in value.split(): - if not keyword in self.acceptable_css_keywords and \ - not self.valid_css_values.match(keyword): - break - else: - clean.append(prop + ': ' + value + ';') - elif self.svgOK and prop.lower() in self.acceptable_svg_properties: - clean.append(prop + ': ' + value + ';') - - return ' '.join(clean) - - def parse_comment(self, i, report=1): - ret = _BaseHTMLProcessor.parse_comment(self, i, report) - if ret >= 0: - return ret - # if ret == -1, this may be a malicious attempt to circumvent - # sanitization, or a page-destroying unclosed comment - match = re.compile(r'--[^>]*>').search(self.rawdata, i+4) - if match: - return match.end() - # unclosed comment; deliberately fail to handle_data() - return len(self.rawdata) - - -def _sanitizeHTML(htmlSource, encoding, _type): - p = _HTMLSanitizer(encoding, _type) - htmlSource = htmlSource.replace(''): - data = data.split('>', 1)[1] - if data.count('= '2.3.3' - assert base64 != None - user, passw = _base64decode(req.headers['Authorization'].split(' ')[1]).split(':') - realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0] - self.add_password(realm, host, user, passw) - retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) - self.reset_retry_count() - return retry - except: - return self.http_error_default(req, fp, code, msg, headers) - -def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers): - """URL, filename, or string --> stream - - This function lets you define parsers that take any input source - (URL, pathname to local or network file, or actual data as a string) - and deal with it in a uniform manner. Returned object is guaranteed - to have all the basic stdio read methods (read, readline, readlines). - Just .close() the object when you're done with it. - - If the etag argument is supplied, it will be used as the value of an - If-None-Match request header. - - If the modified argument is supplied, it can be a tuple of 9 integers - (as returned by gmtime() in the standard Python time module) or a date - string in any format supported by feedparser. Regardless, it MUST - be in GMT (Greenwich Mean Time). It will be reformatted into an - RFC 1123-compliant date and used as the value of an If-Modified-Since - request header. - - If the agent argument is supplied, it will be used as the value of a - User-Agent request header. - - If the referrer argument is supplied, it will be used as the value of a - Referer[sic] request header. - - If handlers is supplied, it is a list of handlers used to build a - urllib2 opener. - - if request_headers is supplied it is a dictionary of HTTP request headers - that will override the values generated by FeedParser. - """ - - if hasattr(url_file_stream_or_string, 'read'): - return url_file_stream_or_string - - if url_file_stream_or_string == '-': - return sys.stdin - - if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'): - # Deal with the feed URI scheme - if url_file_stream_or_string.startswith('feed:http'): - url_file_stream_or_string = url_file_stream_or_string[5:] - elif url_file_stream_or_string.startswith('feed:'): - url_file_stream_or_string = 'http:' + url_file_stream_or_string[5:] - if not agent: - agent = USER_AGENT - # test for inline user:password for basic auth - auth = None - if base64: - urltype, rest = urllib.splittype(url_file_stream_or_string) - realhost, rest = urllib.splithost(rest) - if realhost: - user_passwd, realhost = urllib.splituser(realhost) - if user_passwd: - url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest) - auth = base64.standard_b64encode(user_passwd).strip() - - # iri support - try: - if isinstance(url_file_stream_or_string,unicode): - url_file_stream_or_string = url_file_stream_or_string.encode('idna').decode('utf-8') - else: - url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna').decode('utf-8') - except: - pass - - # try to open with urllib2 (to use optional headers) - request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, request_headers) - opener = apply(urllib2.build_opener, tuple(handlers + [_FeedURLHandler()])) - opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent - try: - return opener.open(request) - finally: - opener.close() # JohnD - - # try to open with native open function (if url_file_stream_or_string is a filename) - try: - return open(url_file_stream_or_string, 'rb') - except: - pass - - # treat url_file_stream_or_string as string - return _StringIO(str(url_file_stream_or_string)) - -def _build_urllib2_request(url, agent, etag, modified, referrer, auth, request_headers): - request = urllib2.Request(url) - request.add_header('User-Agent', agent) - if etag: - request.add_header('If-None-Match', etag) - if type(modified) == type(''): - modified = _parse_date(modified) - elif isinstance(modified, datetime.datetime): - modified = modified.utctimetuple() - if modified: - # format into an RFC 1123-compliant timestamp. We can't use - # time.strftime() since the %a and %b directives can be affected - # by the current locale, but RFC 2616 states that dates must be - # in English. - short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - request.add_header('If-Modified-Since', '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5])) - if referrer: - request.add_header('Referer', referrer) - if gzip and zlib: - request.add_header('Accept-encoding', 'gzip, deflate') - elif gzip: - request.add_header('Accept-encoding', 'gzip') - elif zlib: - request.add_header('Accept-encoding', 'deflate') - else: - request.add_header('Accept-encoding', '') - if auth: - request.add_header('Authorization', 'Basic %s' % auth) - if ACCEPT_HEADER: - request.add_header('Accept', ACCEPT_HEADER) - # use this for whatever -- cookies, special headers, etc - # [('Cookie','Something'),('x-special-header','Another Value')] - for header_name, header_value in request_headers.items(): - request.add_header(header_name, header_value) - request.add_header('A-IM', 'feed') # RFC 3229 support - return request - -_date_handlers = [] -def registerDateHandler(func): - '''Register a date handler function (takes string, returns 9-tuple date in GMT)''' - _date_handlers.insert(0, func) - -# ISO-8601 date parsing routines written by Fazal Majid. -# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601 -# parser is beyond the scope of feedparser and would be a worthwhile addition -# to the Python library. -# A single regular expression cannot parse ISO 8601 date formats into groups -# as the standard is highly irregular (for instance is 030104 2003-01-04 or -# 0301-04-01), so we use templates instead. -# Please note the order in templates is significant because we need a -# greedy match. -_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-0MM?-?DD', 'YYYY-MM', 'YYYY-?OOO', - 'YY-?MM-?DD', 'YY-?OOO', 'YYYY', - '-YY-?MM', '-OOO', '-YY', - '--MM-?DD', '--MM', - '---DD', - 'CC', ''] -_iso8601_re = [ - tmpl.replace( - 'YYYY', r'(?P\d{4})').replace( - 'YY', r'(?P\d\d)').replace( - 'MM', r'(?P[01]\d)').replace( - 'DD', r'(?P[0123]\d)').replace( - 'OOO', r'(?P[0123]\d\d)').replace( - 'CC', r'(?P\d\d$)') - + r'(T?(?P\d{2}):(?P\d{2})' - + r'(:(?P\d{2}))?' - + r'(\.(?P\d+))?' - + r'(?P[+-](?P\d{2})(:(?P\d{2}))?|Z)?)?' - for tmpl in _iso8601_tmpl] -try: - del tmpl -except NameError: - pass -_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re] -try: - del regex -except NameError: - pass -def _parse_date_iso8601(dateString): - '''Parse a variety of ISO-8601-compatible formats like 20040105''' - m = None - for _iso8601_match in _iso8601_matches: - m = _iso8601_match(dateString) - if m: break - if not m: return - if m.span() == (0, 0): return - params = m.groupdict() - ordinal = params.get('ordinal', 0) - if ordinal: - ordinal = int(ordinal) - else: - ordinal = 0 - year = params.get('year', '--') - if not year or year == '--': - year = time.gmtime()[0] - elif len(year) == 2: - # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993 - year = 100 * int(time.gmtime()[0] / 100) + int(year) - else: - year = int(year) - month = params.get('month', '-') - if not month or month == '-': - # ordinals are NOT normalized by mktime, we simulate them - # by setting month=1, day=ordinal - if ordinal: - month = 1 - else: - month = time.gmtime()[1] - month = int(month) - day = params.get('day', 0) - if not day: - # see above - if ordinal: - day = ordinal - elif params.get('century', 0) or \ - params.get('year', 0) or params.get('month', 0): - day = 1 - else: - day = time.gmtime()[2] - else: - day = int(day) - # special case of the century - is the first year of the 21st century - # 2000 or 2001 ? The debate goes on... - if 'century' in params.keys(): - year = (int(params['century']) - 1) * 100 + 1 - # in ISO 8601 most fields are optional - for field in ['hour', 'minute', 'second', 'tzhour', 'tzmin']: - if not params.get(field, None): - params[field] = 0 - hour = int(params.get('hour', 0)) - minute = int(params.get('minute', 0)) - second = int(float(params.get('second', 0))) - # weekday is normalized by mktime(), we can ignore it - weekday = 0 - daylight_savings_flag = -1 - tm = [year, month, day, hour, minute, second, weekday, - ordinal, daylight_savings_flag] - # ISO 8601 time zone adjustments - tz = params.get('tz') - if tz and tz != 'Z': - if tz[0] == '-': - tm[3] += int(params.get('tzhour', 0)) - tm[4] += int(params.get('tzmin', 0)) - elif tz[0] == '+': - tm[3] -= int(params.get('tzhour', 0)) - tm[4] -= int(params.get('tzmin', 0)) - else: - return None - # Python's time.mktime() is a wrapper around the ANSI C mktime(3c) - # which is guaranteed to normalize d/m/y/h/m/s. - # Many implementations have bugs, but we'll pretend they don't. - return time.localtime(time.mktime(tuple(tm))) -registerDateHandler(_parse_date_iso8601) - -# 8-bit date handling routines written by ytrewq1. -_korean_year = u'\ub144' # b3e2 in euc-kr -_korean_month = u'\uc6d4' # bff9 in euc-kr -_korean_day = u'\uc77c' # c0cf in euc-kr -_korean_am = u'\uc624\uc804' # bfc0 c0fc in euc-kr -_korean_pm = u'\uc624\ud6c4' # bfc0 c8c4 in euc-kr - -_korean_onblog_date_re = \ - re.compile('(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})' % \ - (_korean_year, _korean_month, _korean_day)) -_korean_nate_date_re = \ - re.compile(u'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})' % \ - (_korean_am, _korean_pm)) -def _parse_date_onblog(dateString): - '''Parse a string according to the OnBlog 8-bit date format''' - m = _korean_onblog_date_re.match(dateString) - if not m: return - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ - {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ - 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ - 'zonediff': '+09:00'} - if _debug: sys.stderr.write('OnBlog date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_onblog) - -def _parse_date_nate(dateString): - '''Parse a string according to the Nate 8-bit date format''' - m = _korean_nate_date_re.match(dateString) - if not m: return - hour = int(m.group(5)) - ampm = m.group(4) - if (ampm == _korean_pm): - hour += 12 - hour = str(hour) - if len(hour) == 1: - hour = '0' + hour - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ - {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ - 'hour': hour, 'minute': m.group(6), 'second': m.group(7),\ - 'zonediff': '+09:00'} - if _debug: sys.stderr.write('Nate date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_nate) - -_mssql_date_re = \ - re.compile('(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?') -def _parse_date_mssql(dateString): - '''Parse a string according to the MS SQL date format''' - m = _mssql_date_re.match(dateString) - if not m: return - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ - {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ - 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ - 'zonediff': '+09:00'} - if _debug: sys.stderr.write('MS SQL date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_mssql) - -# Unicode strings for Greek date strings -_greek_months = \ - { \ - u'\u0399\u03b1\u03bd': u'Jan', # c9e1ed in iso-8859-7 - u'\u03a6\u03b5\u03b2': u'Feb', # d6e5e2 in iso-8859-7 - u'\u039c\u03ac\u03ce': u'Mar', # ccdcfe in iso-8859-7 - u'\u039c\u03b1\u03ce': u'Mar', # cce1fe in iso-8859-7 - u'\u0391\u03c0\u03c1': u'Apr', # c1f0f1 in iso-8859-7 - u'\u039c\u03ac\u03b9': u'May', # ccdce9 in iso-8859-7 - u'\u039c\u03b1\u03ca': u'May', # cce1fa in iso-8859-7 - u'\u039c\u03b1\u03b9': u'May', # cce1e9 in iso-8859-7 - u'\u0399\u03bf\u03cd\u03bd': u'Jun', # c9effded in iso-8859-7 - u'\u0399\u03bf\u03bd': u'Jun', # c9efed in iso-8859-7 - u'\u0399\u03bf\u03cd\u03bb': u'Jul', # c9effdeb in iso-8859-7 - u'\u0399\u03bf\u03bb': u'Jul', # c9f9eb in iso-8859-7 - u'\u0391\u03cd\u03b3': u'Aug', # c1fde3 in iso-8859-7 - u'\u0391\u03c5\u03b3': u'Aug', # c1f5e3 in iso-8859-7 - u'\u03a3\u03b5\u03c0': u'Sep', # d3e5f0 in iso-8859-7 - u'\u039f\u03ba\u03c4': u'Oct', # cfeaf4 in iso-8859-7 - u'\u039d\u03bf\u03ad': u'Nov', # cdefdd in iso-8859-7 - u'\u039d\u03bf\u03b5': u'Nov', # cdefe5 in iso-8859-7 - u'\u0394\u03b5\u03ba': u'Dec', # c4e5ea in iso-8859-7 - } - -_greek_wdays = \ - { \ - u'\u039a\u03c5\u03c1': u'Sun', # caf5f1 in iso-8859-7 - u'\u0394\u03b5\u03c5': u'Mon', # c4e5f5 in iso-8859-7 - u'\u03a4\u03c1\u03b9': u'Tue', # d4f1e9 in iso-8859-7 - u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7 - u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7 - u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7 - u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7 - } - -_greek_date_format_re = \ - re.compile(u'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)') - -def _parse_date_greek(dateString): - '''Parse a string according to a Greek 8-bit date format.''' - m = _greek_date_format_re.match(dateString) - if not m: return - try: - wday = _greek_wdays[m.group(1)] - month = _greek_months[m.group(3)] - except: - return - rfc822date = '%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)s' % \ - {'wday': wday, 'day': m.group(2), 'month': month, 'year': m.group(4),\ - 'hour': m.group(5), 'minute': m.group(6), 'second': m.group(7),\ - 'zonediff': m.group(8)} - if _debug: sys.stderr.write('Greek date parsed as: %s\n' % rfc822date) - return _parse_date_rfc822(rfc822date) -registerDateHandler(_parse_date_greek) - -# Unicode strings for Hungarian date strings -_hungarian_months = \ - { \ - u'janu\u00e1r': u'01', # e1 in iso-8859-2 - u'febru\u00e1ri': u'02', # e1 in iso-8859-2 - u'm\u00e1rcius': u'03', # e1 in iso-8859-2 - u'\u00e1prilis': u'04', # e1 in iso-8859-2 - u'm\u00e1ujus': u'05', # e1 in iso-8859-2 - u'j\u00fanius': u'06', # fa in iso-8859-2 - u'j\u00falius': u'07', # fa in iso-8859-2 - u'augusztus': u'08', - u'szeptember': u'09', - u'okt\u00f3ber': u'10', # f3 in iso-8859-2 - u'november': u'11', - u'december': u'12', - } - -_hungarian_date_format_re = \ - re.compile(u'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))') - -def _parse_date_hungarian(dateString): - '''Parse a string according to a Hungarian 8-bit date format.''' - m = _hungarian_date_format_re.match(dateString) - if not m: return - try: - month = _hungarian_months[m.group(2)] - day = m.group(3) - if len(day) == 1: - day = '0' + day - hour = m.group(4) - if len(hour) == 1: - hour = '0' + hour - except: - return - w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s%(zonediff)s' % \ - {'year': m.group(1), 'month': month, 'day': day,\ - 'hour': hour, 'minute': m.group(5),\ - 'zonediff': m.group(6)} - if _debug: sys.stderr.write('Hungarian date parsed as: %s\n' % w3dtfdate) - return _parse_date_w3dtf(w3dtfdate) -registerDateHandler(_parse_date_hungarian) - -# W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by -# Drake and licensed under the Python license. Removed all range checking -# for month, day, hour, minute, and second, since mktime will normalize -# these later -def _parse_date_w3dtf(dateString): - def __extract_date(m): - year = int(m.group('year')) - if year < 100: - year = 100 * int(time.gmtime()[0] / 100) + int(year) - if year < 1000: - return 0, 0, 0 - julian = m.group('julian') - if julian: - julian = int(julian) - month = julian / 30 + 1 - day = julian % 30 + 1 - jday = None - while jday != julian: - t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) - jday = time.gmtime(t)[-2] - diff = abs(jday - julian) - if jday > julian: - if diff < day: - day = day - diff - else: - month = month - 1 - day = 31 - elif jday < julian: - if day + diff < 28: - day = day + diff - else: - month = month + 1 - return year, month, day - month = m.group('month') - day = 1 - if month is None: - month = 1 - else: - month = int(month) - day = m.group('day') - if day: - day = int(day) - else: - day = 1 - return year, month, day - - def __extract_time(m): - if not m: - return 0, 0, 0 - hours = m.group('hours') - if not hours: - return 0, 0, 0 - hours = int(hours) - minutes = int(m.group('minutes')) - seconds = m.group('seconds') - if seconds: - seconds = int(seconds) - else: - seconds = 0 - return hours, minutes, seconds - - def __extract_tzd(m): - '''Return the Time Zone Designator as an offset in seconds from UTC.''' - if not m: - return 0 - tzd = m.group('tzd') - if not tzd: - return 0 - if tzd == 'Z': - return 0 - hours = int(m.group('tzdhours')) - minutes = m.group('tzdminutes') - if minutes: - minutes = int(minutes) - else: - minutes = 0 - offset = (hours*60 + minutes) * 60 - if tzd[0] == '+': - return -offset - return offset - - __date_re = ('(?P\d\d\d\d)' - '(?:(?P-|)' - '(?:(?P\d\d)(?:(?P=dsep)(?P\d\d))?' - '|(?P\d\d\d)))?') - __tzd_re = '(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)' - __tzd_rx = re.compile(__tzd_re) - __time_re = ('(?P\d\d)(?P:|)(?P\d\d)' - '(?:(?P=tsep)(?P\d\d)(?:[.,]\d+)?)?' - + __tzd_re) - __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) - __datetime_rx = re.compile(__datetime_re) - m = __datetime_rx.match(dateString) - if (m is None) or (m.group() != dateString): return - gmt = __extract_date(m) + __extract_time(m) + (0, 0, 0) - if gmt[0] == 0: return - return time.gmtime(time.mktime(gmt) + __extract_tzd(m) - time.timezone) -registerDateHandler(_parse_date_w3dtf) - -def _parse_date_rfc822(dateString): - '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' - data = dateString.split() - if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames: - del data[0] - if len(data) == 4: - s = data[3] - i = s.find('+') - if i > 0: - data[3:] = [s[:i], s[i+1:]] - else: - data.append('') - dateString = " ".join(data) - # Account for the Etc/GMT timezone by stripping 'Etc/' - elif len(data) == 5 and data[4].lower().startswith('etc/'): - data[4] = data[4][4:] - dateString = " ".join(data) - if len(data) < 5: - dateString += ' 00:00:00 GMT' - tm = rfc822.parsedate_tz(dateString) - if tm: - return time.gmtime(rfc822.mktime_tz(tm)) -# rfc822.py defines several time zones, but we define some extra ones. -# 'ET' is equivalent to 'EST', etc. -_additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -800} -rfc822._timezones.update(_additional_timezones) -registerDateHandler(_parse_date_rfc822) - -def _parse_date_perforce(aDateString): - """parse a date in yyyy/mm/dd hh:mm:ss TTT format""" - # Fri, 2006/09/15 08:19:53 EDT - _my_date_pattern = re.compile( \ - r'(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})') - - dow, year, month, day, hour, minute, second, tz = \ - _my_date_pattern.search(aDateString).groups() - months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - dateString = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz) - tm = rfc822.parsedate_tz(dateString) - if tm: - return time.gmtime(rfc822.mktime_tz(tm)) -registerDateHandler(_parse_date_perforce) - -def _parse_date(dateString): - '''Parses a variety of date formats into a 9-tuple in GMT''' - for handler in _date_handlers: - try: - date9tuple = handler(dateString) - if not date9tuple: continue - if len(date9tuple) != 9: - if _debug: sys.stderr.write('date handler function must return 9-tuple\n') - raise ValueError - map(int, date9tuple) - return date9tuple - except Exception, e: - if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e))) - pass - return None - -def _getCharacterEncoding(http_headers, xml_data): - '''Get the character encoding of the XML document - - http_headers is a dictionary - xml_data is a raw string (not Unicode) - - This is so much trickier than it sounds, it's not even funny. - According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type - is application/xml, application/*+xml, - application/xml-external-parsed-entity, or application/xml-dtd, - the encoding given in the charset parameter of the HTTP Content-Type - takes precedence over the encoding given in the XML prefix within the - document, and defaults to 'utf-8' if neither are specified. But, if - the HTTP Content-Type is text/xml, text/*+xml, or - text/xml-external-parsed-entity, the encoding given in the XML prefix - within the document is ALWAYS IGNORED and only the encoding given in - the charset parameter of the HTTP Content-Type header should be - respected, and it defaults to 'us-ascii' if not specified. - - Furthermore, discussion on the atom-syntax mailing list with the - author of RFC 3023 leads me to the conclusion that any document - served with a Content-Type of text/* and no charset parameter - must be treated as us-ascii. (We now do this.) And also that it - must always be flagged as non-well-formed. (We now do this too.) - - If Content-Type is unspecified (input was local file or non-HTTP source) - or unrecognized (server just got it totally wrong), then go by the - encoding given in the XML prefix of the document and default to - 'iso-8859-1' as per the HTTP specification (RFC 2616). - - Then, assuming we didn't find a character encoding in the HTTP headers - (and the HTTP Content-type allowed us to look in the body), we need - to sniff the first few bytes of the XML data and try to determine - whether the encoding is ASCII-compatible. Section F of the XML - specification shows the way here: - http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info - - If the sniffed encoding is not ASCII-compatible, we need to make it - ASCII compatible so that we can sniff further into the XML declaration - to find the encoding attribute, which will tell us the true encoding. - - Of course, none of this guarantees that we will be able to parse the - feed in the declared character encoding (assuming it was declared - correctly, which many are not). CJKCodecs and iconv_codec help a lot; - you should definitely install them if you can. - http://cjkpython.i18n.org/ - ''' - - def _parseHTTPContentType(content_type): - '''takes HTTP Content-Type header and returns (content type, charset) - - If no charset is specified, returns (content type, '') - If no content type is specified, returns ('', '') - Both return parameters are guaranteed to be lowercase strings - ''' - content_type = content_type or '' - content_type, params = cgi.parse_header(content_type) - return content_type, params.get('charset', '').replace("'", '') - - sniffed_xml_encoding = '' - xml_encoding = '' - true_encoding = '' - http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type', http_headers.get('Content-type'))) - # Must sniff for non-ASCII-compatible character encodings before - # searching for XML declaration. This heuristic is defined in - # section F of the XML specification: - # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info - try: - if xml_data[:4] == _l2bytes([0x4c, 0x6f, 0xa7, 0x94]): - # EBCDIC - xml_data = _ebcdic_to_ascii(xml_data) - elif xml_data[:4] == _l2bytes([0x00, 0x3c, 0x00, 0x3f]): - # UTF-16BE - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xfe, 0xff])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): - # UTF-16BE with BOM - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x3f, 0x00]): - # UTF-16LE - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xff, 0xfe])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): - # UTF-16LE with BOM - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x00, 0x00, 0x00, 0x3c]): - # UTF-32BE - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x00, 0x00]): - # UTF-32LE - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') - elif xml_data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): - # UTF-32BE with BOM - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') - elif xml_data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): - # UTF-32LE with BOM - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') - elif xml_data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): - # UTF-8 with BOM - sniffed_xml_encoding = 'utf-8' - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') - else: - # ASCII-compatible - pass - xml_encoding_match = re.compile(_s2bytes('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')).match(xml_data) - except: - xml_encoding_match = None - if xml_encoding_match: - xml_encoding = xml_encoding_match.groups()[0].decode('utf-8').lower() - if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')): - xml_encoding = sniffed_xml_encoding - acceptable_content_type = 0 - application_content_types = ('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity') - text_content_types = ('text/xml', 'text/xml-external-parsed-entity') - if (http_content_type in application_content_types) or \ - (http_content_type.startswith('application/') and http_content_type.endswith('+xml')): - acceptable_content_type = 1 - true_encoding = http_encoding or xml_encoding or 'utf-8' - elif (http_content_type in text_content_types) or \ - (http_content_type.startswith('text/')) and http_content_type.endswith('+xml'): - acceptable_content_type = 1 - true_encoding = http_encoding or 'us-ascii' - elif http_content_type.startswith('text/'): - true_encoding = http_encoding or 'us-ascii' - elif http_headers and (not (http_headers.has_key('content-type') or http_headers.has_key('Content-type'))): - true_encoding = xml_encoding or 'iso-8859-1' - else: - true_encoding = xml_encoding or 'utf-8' - # some feeds claim to be gb2312 but are actually gb18030. - # apparently MSIE and Firefox both do the following switch: - if true_encoding.lower() == 'gb2312': - true_encoding = 'gb18030' - return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type - -def _toUTF8(data, encoding): - '''Changes an XML data stream on the fly to specify a new encoding - - data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already - encoding is a string recognized by encodings.aliases - ''' - if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding) - # strip Byte Order Mark (if present) - if (len(data) >= 4) and (data[:2] == _l2bytes([0xfe, 0xff])) and (data[2:4] != _l2bytes([0x00, 0x00])): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-16be': - sys.stderr.write('trying utf-16be instead\n') - encoding = 'utf-16be' - data = data[2:] - elif (len(data) >= 4) and (data[:2] == _l2bytes([0xff, 0xfe])) and (data[2:4] != _l2bytes([0x00, 0x00])): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-16le': - sys.stderr.write('trying utf-16le instead\n') - encoding = 'utf-16le' - data = data[2:] - elif data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-8': - sys.stderr.write('trying utf-8 instead\n') - encoding = 'utf-8' - data = data[3:] - elif data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-32be': - sys.stderr.write('trying utf-32be instead\n') - encoding = 'utf-32be' - data = data[4:] - elif data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): - if _debug: - sys.stderr.write('stripping BOM\n') - if encoding != 'utf-32le': - sys.stderr.write('trying utf-32le instead\n') - encoding = 'utf-32le' - data = data[4:] - newdata = unicode(data, encoding) - if _debug: sys.stderr.write('successfully converted %s data to unicode\n' % encoding) - declmatch = re.compile('^<\?xml[^>]*?>') - newdecl = '''''' - if declmatch.search(newdata): - newdata = declmatch.sub(newdecl, newdata) - else: - newdata = newdecl + u'\n' + newdata - return newdata.encode('utf-8') - -def _stripDoctype(data): - '''Strips DOCTYPE from XML document, returns (rss_version, stripped_data) - - rss_version may be 'rss091n' or None - stripped_data is the same XML document, minus the DOCTYPE - ''' - start = re.search(_s2bytes('<\w'), data) - start = start and start.start() or -1 - head,data = data[:start+1], data[start+1:] - - entity_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE) - entity_results=entity_pattern.findall(head) - head = entity_pattern.sub(_s2bytes(''), head) - doctype_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE) - doctype_results = doctype_pattern.findall(head) - doctype = doctype_results and doctype_results[0] or _s2bytes('') - if doctype.lower().count(_s2bytes('netscape')): - version = 'rss091n' - else: - version = None - - # only allow in 'safe' inline entity definitions - replacement=_s2bytes('') - if len(doctype_results)==1 and entity_results: - safe_pattern=re.compile(_s2bytes('\s+(\w+)\s+"(&#\w+;|[^&"]*)"')) - safe_entities=filter(lambda e: safe_pattern.match(e),entity_results) - if safe_entities: - replacement=_s2bytes('\n \n]>') - data = doctype_pattern.sub(replacement, head) + data - - return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)]) - -def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}): - '''Parse a feed from a URL, file, stream, or string. - - request_headers, if given, is a dict from http header name to value to add - to the request; this overrides internally generated values. - ''' - result = FeedParserDict() - result['feed'] = FeedParserDict() - result['entries'] = [] - if _XML_AVAILABLE: - result['bozo'] = 0 - if not isinstance(handlers, list): - handlers = [handlers] - try: - f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers) - data = f.read() - except Exception, e: - result['bozo'] = 1 - result['bozo_exception'] = e - data = None - f = None - - if hasattr(f, 'headers'): - result['headers'] = dict(f.headers) - # overwrite existing headers using response_headers - if 'headers' in result: - result['headers'].update(response_headers) - elif response_headers: - result['headers'] = copy.deepcopy(response_headers) - - # if feed is gzip-compressed, decompress it - if f and data and 'headers' in result: - if gzip and result['headers'].get('content-encoding') == 'gzip': - try: - data = gzip.GzipFile(fileobj=_StringIO(data)).read() - except Exception, e: - # Some feeds claim to be gzipped but they're not, so - # we get garbage. Ideally, we should re-request the - # feed without the 'Accept-encoding: gzip' header, - # but we don't. - result['bozo'] = 1 - result['bozo_exception'] = e - data = '' - elif zlib and result['headers'].get('content-encoding') == 'deflate': - try: - data = zlib.decompress(data, -zlib.MAX_WBITS) - except Exception, e: - result['bozo'] = 1 - result['bozo_exception'] = e - data = '' - - # save HTTP headers - if 'headers' in result: - if 'etag' in result['headers'] or 'ETag' in result['headers']: - etag = result['headers'].get('etag', result['headers'].get('ETag')) - if etag: - result['etag'] = etag - if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']: - modified = result['headers'].get('last-modified', result['headers'].get('Last-Modified')) - if modified: - result['modified'] = _parse_date(modified) - if hasattr(f, 'url'): - result['href'] = f.url - result['status'] = 200 - if hasattr(f, 'status'): - result['status'] = f.status - if hasattr(f, 'close'): - f.close() - - # there are four encodings to keep track of: - # - http_encoding is the encoding declared in the Content-Type HTTP header - # - xml_encoding is the encoding declared in the latestrelease[0][1]: - logger.log(results.title + u" is an upcoming album. Setting its status to 'Wanted'...") - c.execute('UPDATE albums SET Status = "Wanted" WHERE AlbumID="%s"' % u.extractUuid(results.id)) - else: - pass - - for track in results.tracks: - c.execute('INSERT INTO tracks VALUES( ?, ?, ?, ?, ?, ?, ?, ?)', (artistid, results.artist.name, results.title, results.asin, u.extractUuid(results.id), track.title, track.duration, u.extractUuid(track.id))) - time.sleep(1) - time.sleep(1) - - conn.commit() - c.close() \ No newline at end of file diff --git a/logger.py b/logger.py deleted file mode 100644 index 251bfca6..00000000 --- a/logger.py +++ /dev/null @@ -1,177 +0,0 @@ -# Author: Nic Wolfe -# URL: http://code.google.com/p/sickbeard/ -# -# This file is part of Sick Beard. -# -# Sick Beard is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Sick Beard is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Sick Beard. If not, see . - -from __future__ import with_statement - -import os -import threading - -import headphones - -import logging - - -# number of log files to keep -NUM_LOGS = 3 - -# log size in bytes -LOG_SIZE = 10000000 # 10 megs - -ERROR = logging.ERROR -WARNING = logging.WARNING -MESSAGE = logging.INFO -DEBUG = logging.DEBUG - -reverseNames = {u'ERROR': ERROR, - u'WARNING': WARNING, - u'INFO': MESSAGE, - u'DEBUG': DEBUG} - -class SBRotatingLogHandler(object): - - def __init__(self, log_file, num_files, num_bytes): - self.num_files = num_files - self.num_bytes = num_bytes - - self.log_file = log_file - self.cur_handler = None - - self.writes_since_check = 0 - - self.log_lock = threading.Lock() - - def initLogging(self, consoleLogging=True): - - self.log_file = os.path.join(headphones.LOG_DIR, self.log_file) - - self.cur_handler = self._config_handler() - - logging.getLogger('headphones').addHandler(self.cur_handler) - - # define a Handler which writes INFO messages or higher to the sys.stderr - if consoleLogging: - console = logging.StreamHandler() - - console.setLevel(logging.INFO) - - # set a format which is simpler for console use - console.setFormatter(logging.Formatter('%(asctime)s %(levelname)s::%(message)s', '%H:%M:%S')) - - # add the handler to the root logger - logging.getLogger('headphones').addHandler(console) - - logging.getLogger('headphones').setLevel(logging.DEBUG) - - def _config_handler(self): - """ - Configure a file handler to log at file_name and return it. - """ - - file_handler = logging.FileHandler(self.log_file) - file_handler.setLevel(logging.DEBUG) - file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)-8s %(message)s', '%b-%d %H:%M:%S')) - return file_handler - - def _log_file_name(self, i): - """ - Returns a numbered log file name depending on i. If i==0 it just uses logName, if not it appends - it to the extension (blah.log.3 for i == 3) - - i: Log number to ues - """ - return self.log_file + ('.' + str(i) if i else '') - - def _num_logs(self): - """ - Scans the log folder and figures out how many log files there are already on disk - - Returns: The number of the last used file (eg. mylog.log.3 would return 3). If there are no logs it returns -1 - """ - cur_log = 0 - while os.path.isfile(self._log_file_name(cur_log)): - cur_log += 1 - return cur_log - 1 - - def _rotate_logs(self): - - sb_logger = logging.getLogger('headphones') - - # delete the old handler - if self.cur_handler: - self.cur_handler.flush() - self.cur_handler.close() - sb_logger.removeHandler(self.cur_handler) - - # rename or delete all the old log files - for i in range(self._num_logs(), -1, -1): - cur_file_name = self._log_file_name(i) - try: - if i >= NUM_LOGS: - os.remove(cur_file_name) - else: - os.rename(cur_file_name, self._log_file_name(i+1)) - except WindowsError: - pass - - # the new log handler will always be on the un-numbered .log file - new_file_handler = self._config_handler() - - self.cur_handler = new_file_handler - - sb_logger.addHandler(new_file_handler) - - def log(self, toLog, logLevel=MESSAGE): - - with self.log_lock: - - # check the size and see if we need to rotate - if self.writes_since_check >= 10: - if os.path.isfile(self.log_file) and os.path.getsize(self.log_file) >= LOG_SIZE: - self._rotate_logs() - self.writes_since_check = 0 - else: - self.writes_since_check += 1 - - meThread = threading.currentThread().getName() - message = meThread + u" :: " + toLog - - out_line = message.encode('utf-8') - - sb_logger = logging.getLogger('headphones') - - try: - if logLevel == DEBUG: - sb_logger.debug(out_line) - elif logLevel == MESSAGE: - sb_logger.info(out_line) - elif logLevel == WARNING: - sb_logger.warning(out_line) - elif logLevel == ERROR: - sb_logger.error(out_line) - - # add errors to the UI logger - #classes.ErrorViewer.add(classes.UIError(message)) - else: - sb_logger.log(logLevel, out_line) - except ValueError: - pass - -sb_log_instance = SBRotatingLogHandler('headphones.log', NUM_LOGS, LOG_SIZE) - -def log(toLog, logLevel=MESSAGE): - sb_log_instance.log(toLog, logLevel) \ No newline at end of file diff --git a/mb.py b/mb.py deleted file mode 100644 index 73d5edc2..00000000 --- a/mb.py +++ /dev/null @@ -1,84 +0,0 @@ -import time - -import musicbrainz2.webservice as ws -import musicbrainz2.model as m -import musicbrainz2.utils as u - -from musicbrainz2.webservice import WebServiceError - -from helpers import multikeysort - -q = ws.Query() - - -def findArtist(name, limit=1): - - artistlist = [] - - artistResults = q.getArtists(ws.ArtistFilter(name=name, limit=limit)) - - for result in artistResults: - - artistid = u.extractUuid(result.artist.id) - artistlist.append([result.artist.name, artistid]) - - return artistlist - -def getArtist(artistid): - - - rglist = [] - - #Get all official release groups - inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), ratings=False, releaseGroups=True) - artist = q.getArtistById(artistid, inc) - - for rg in artist.getReleaseGroups(): - - rgid = u.extractUuid(rg.id) - rglist.append([rg.title, rgid]) - - return rglist - -def getReleaseGroup(rgid): - - releaselist = [] - - inc = ws.ReleaseGroupIncludes(releases=True) - releaseGroup = q.getReleaseGroupById(rgid, inc) - - # I think for now we have to make separate queries for each release, in order - # to get more detailed release info (ASIN, track count, etc.) - for release in releaseGroup.releases: - - releaseid = u.extractUuid(release.id) - inc = ws.ReleaseIncludes(tracks=True) - - releaseResult = q.getReleaseById(releaseid, inc) - - release_dict = { - 'asin': bool(releaseResult.asin), - 'tracks': len(releaseResult.getTracks()), - 'releaseid': u.extractUuid(releaseResult.id) - } - - releaselist.append(release_dict) - time.sleep(1) - - a = multikeysort(releaselist, ['-asin', '-tracks']) - - releaseid = a[0]['releaseid'] - - return releaseid - -def getRelease(releaseid): - """ - Given a release id, gather all the info and return it as a list - """ - inc = ws.ReleaseIncludes(artist=True, tracks=True, releaseGroup=True) - release = q.getReleaseById(releaseid, inc) - - releasedetail = [] - - releasedetail.append(release.id) - diff --git a/mover.py b/mover.py deleted file mode 100644 index 4bc4858c..00000000 --- a/mover.py +++ /dev/null @@ -1,21 +0,0 @@ -import glob, os, shutil -from configobj import ConfigObj -from headphones import config_file - -config = ConfigObj(config_file) - -General = config['General'] -move_to_itunes = General['move_to_itunes'] -path_to_itunes = General['path_to_itunes'] -rename_mp3s = General['rename_mp3s'] -cleanup = General['cleanup'] -add_album_art = General['add_album_art'] -music_download_dir = General['music_download_dir'] - -def moveFiles(): - for root, dirs, files in os.walk(music_download_dir): - for file in files: - if file[-4:].lower() == '.mp3' and os.path.isfile(file): - print file - shutil.copy2(os.path.join(root, file), - os.path.join(path_to_itunes, file)) diff --git a/musicbrainz2/__init__.py b/musicbrainz2/__init__.py deleted file mode 100644 index f2edb508..00000000 --- a/musicbrainz2/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -"""A collection of classes for MusicBrainz. - -To get started quickly, have a look at L{webservice.Query} and the examples -there. The source distribution also contains example code you might find -interesting. - -This package contains the following modules: - - 1. L{model}: The MusicBrainz domain model, containing classes like - L{Artist }, L{Release }, or - L{Track } - - 2. L{webservice}: An interface to the MusicBrainz XML web service. - - 3. L{wsxml}: A parser for the web service XML format (MMD). - - 4. L{disc}: Functions for creating and submitting DiscIDs. - - 5. L{utils}: Utilities for working with URIs and other commonly needed tools. - -@author: Matthias Friedrich -""" -__revision__ = '$Id: __init__.py 12974 2011-05-01 08:43:54Z luks $' -__version__ = '0.7.3' - -# EOF diff --git a/musicbrainz2/data/__init__.py b/musicbrainz2/data/__init__.py deleted file mode 100644 index 3067fabc..00000000 --- a/musicbrainz2/data/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Support data for the musicbrainz2 package. - -This package is I{not} part of the public API, it has been added to work -around shortcomings in python and may thus be removed at any time. - -Please use the L{musicbrainz2.utils} module instead. -""" -__revision__ = '$Id: __init__.py 7386 2006-04-30 11:12:55Z matt $' - -# EOF diff --git a/musicbrainz2/data/countrynames.py b/musicbrainz2/data/countrynames.py deleted file mode 100644 index 7c4ab023..00000000 --- a/musicbrainz2/data/countrynames.py +++ /dev/null @@ -1,253 +0,0 @@ -# -*- coding: utf-8 -*- - -__revision__ = '$Id: countrynames.py 7386 2006-04-30 11:12:55Z matt $' - -countryNames = { - u'BD': u'Bangladesh', - u'BE': u'Belgium', - u'BF': u'Burkina Faso', - u'BG': u'Bulgaria', - u'BB': u'Barbados', - u'WF': u'Wallis and Futuna Islands', - u'BM': u'Bermuda', - u'BN': u'Brunei Darussalam', - u'BO': u'Bolivia', - u'BH': u'Bahrain', - u'BI': u'Burundi', - u'BJ': u'Benin', - u'BT': u'Bhutan', - u'JM': u'Jamaica', - u'BV': u'Bouvet Island', - u'BW': u'Botswana', - u'WS': u'Samoa', - u'BR': u'Brazil', - u'BS': u'Bahamas', - u'BY': u'Belarus', - u'BZ': u'Belize', - u'RU': u'Russian Federation', - u'RW': u'Rwanda', - u'RE': u'Reunion', - u'TM': u'Turkmenistan', - u'TJ': u'Tajikistan', - u'RO': u'Romania', - u'TK': u'Tokelau', - u'GW': u'Guinea-Bissau', - u'GU': u'Guam', - u'GT': u'Guatemala', - u'GR': u'Greece', - u'GQ': u'Equatorial Guinea', - u'GP': u'Guadeloupe', - u'JP': u'Japan', - u'GY': u'Guyana', - u'GF': u'French Guiana', - u'GE': u'Georgia', - u'GD': u'Grenada', - u'GB': u'United Kingdom', - u'GA': u'Gabon', - u'SV': u'El Salvador', - u'GN': u'Guinea', - u'GM': u'Gambia', - u'GL': u'Greenland', - u'GI': u'Gibraltar', - u'GH': u'Ghana', - u'OM': u'Oman', - u'TN': u'Tunisia', - u'JO': u'Jordan', - u'HT': u'Haiti', - u'HU': u'Hungary', - u'HK': u'Hong Kong', - u'HN': u'Honduras', - u'HM': u'Heard and Mc Donald Islands', - u'VE': u'Venezuela', - u'PR': u'Puerto Rico', - u'PW': u'Palau', - u'PT': u'Portugal', - u'SJ': u'Svalbard and Jan Mayen Islands', - u'PY': u'Paraguay', - u'IQ': u'Iraq', - u'PA': u'Panama', - u'PF': u'French Polynesia', - u'PG': u'Papua New Guinea', - u'PE': u'Peru', - u'PK': u'Pakistan', - u'PH': u'Philippines', - u'PN': u'Pitcairn', - u'PL': u'Poland', - u'PM': u'St. Pierre and Miquelon', - u'ZM': u'Zambia', - u'EH': u'Western Sahara', - u'EE': u'Estonia', - u'EG': u'Egypt', - u'ZA': u'South Africa', - u'EC': u'Ecuador', - u'IT': u'Italy', - u'VN': u'Viet Nam', - u'SB': u'Solomon Islands', - u'ET': u'Ethiopia', - u'SO': u'Somalia', - u'ZW': u'Zimbabwe', - u'SA': u'Saudi Arabia', - u'ES': u'Spain', - u'ER': u'Eritrea', - u'MD': u'Moldova, Republic of', - u'MG': u'Madagascar', - u'MA': u'Morocco', - u'MC': u'Monaco', - u'UZ': u'Uzbekistan', - u'MM': u'Myanmar', - u'ML': u'Mali', - u'MO': u'Macau', - u'MN': u'Mongolia', - u'MH': u'Marshall Islands', - u'MK': u'Macedonia, The Former Yugoslav Republic of', - u'MU': u'Mauritius', - u'MT': u'Malta', - u'MW': u'Malawi', - u'MV': u'Maldives', - u'MQ': u'Martinique', - u'MP': u'Northern Mariana Islands', - u'MS': u'Montserrat', - u'MR': u'Mauritania', - u'UG': u'Uganda', - u'MY': u'Malaysia', - u'MX': u'Mexico', - u'IL': u'Israel', - u'FR': u'France', - u'IO': u'British Indian Ocean Territory', - u'SH': u'St. Helena', - u'FI': u'Finland', - u'FJ': u'Fiji', - u'FK': u'Falkland Islands (Malvinas)', - u'FM': u'Micronesia, Federated States of', - u'FO': u'Faroe Islands', - u'NI': u'Nicaragua', - u'NL': u'Netherlands', - u'NO': u'Norway', - u'NA': u'Namibia', - u'VU': u'Vanuatu', - u'NC': u'New Caledonia', - u'NE': u'Niger', - u'NF': u'Norfolk Island', - u'NG': u'Nigeria', - u'NZ': u'New Zealand', - u'ZR': u'Zaire', - u'NP': u'Nepal', - u'NR': u'Nauru', - u'NU': u'Niue', - u'CK': u'Cook Islands', - u'CI': u'Cote d\'Ivoire', - u'CH': u'Switzerland', - u'CO': u'Colombia', - u'CN': u'China', - u'CM': u'Cameroon', - u'CL': u'Chile', - u'CC': u'Cocos (Keeling) Islands', - u'CA': u'Canada', - u'CG': u'Congo', - u'CF': u'Central African Republic', - u'CZ': u'Czech Republic', - u'CY': u'Cyprus', - u'CX': u'Christmas Island', - u'CR': u'Costa Rica', - u'CV': u'Cape Verde', - u'CU': u'Cuba', - u'SZ': u'Swaziland', - u'SY': u'Syrian Arab Republic', - u'KG': u'Kyrgyzstan', - u'KE': u'Kenya', - u'SR': u'Suriname', - u'KI': u'Kiribati', - u'KH': u'Cambodia', - u'KN': u'Saint Kitts and Nevis', - u'KM': u'Comoros', - u'ST': u'Sao Tome and Principe', - u'SI': u'Slovenia', - u'KW': u'Kuwait', - u'SN': u'Senegal', - u'SM': u'San Marino', - u'SL': u'Sierra Leone', - u'SC': u'Seychelles', - u'KZ': u'Kazakhstan', - u'KY': u'Cayman Islands', - u'SG': u'Singapore', - u'SE': u'Sweden', - u'SD': u'Sudan', - u'DO': u'Dominican Republic', - u'DM': u'Dominica', - u'DJ': u'Djibouti', - u'DK': u'Denmark', - u'VG': u'Virgin Islands (British)', - u'DE': u'Germany', - u'YE': u'Yemen', - u'DZ': u'Algeria', - u'US': u'United States', - u'UY': u'Uruguay', - u'YT': u'Mayotte', - u'UM': u'United States Minor Outlying Islands', - u'LB': u'Lebanon', - u'LC': u'Saint Lucia', - u'LA': u'Lao People\'s Democratic Republic', - u'TV': u'Tuvalu', - u'TW': u'Taiwan', - u'TT': u'Trinidad and Tobago', - u'TR': u'Turkey', - u'LK': u'Sri Lanka', - u'LI': u'Liechtenstein', - u'LV': u'Latvia', - u'TO': u'Tonga', - u'LT': u'Lithuania', - u'LU': u'Luxembourg', - u'LR': u'Liberia', - u'LS': u'Lesotho', - u'TH': u'Thailand', - u'TF': u'French Southern Territories', - u'TG': u'Togo', - u'TD': u'Chad', - u'TC': u'Turks and Caicos Islands', - u'LY': u'Libyan Arab Jamahiriya', - u'VA': u'Vatican City State (Holy See)', - u'VC': u'Saint Vincent and The Grenadines', - u'AE': u'United Arab Emirates', - u'AD': u'Andorra', - u'AG': u'Antigua and Barbuda', - u'AF': u'Afghanistan', - u'AI': u'Anguilla', - u'VI': u'Virgin Islands (U.S.)', - u'IS': u'Iceland', - u'IR': u'Iran (Islamic Republic of)', - u'AM': u'Armenia', - u'AL': u'Albania', - u'AO': u'Angola', - u'AN': u'Netherlands Antilles', - u'AQ': u'Antarctica', - u'AS': u'American Samoa', - u'AR': u'Argentina', - u'AU': u'Australia', - u'AT': u'Austria', - u'AW': u'Aruba', - u'IN': u'India', - u'TZ': u'Tanzania, United Republic of', - u'AZ': u'Azerbaijan', - u'IE': u'Ireland', - u'ID': u'Indonesia', - u'UA': u'Ukraine', - u'QA': u'Qatar', - u'MZ': u'Mozambique', - u'BA': u'Bosnia and Herzegovina', - u'CD': u'Congo, The Democratic Republic of the', - u'CS': u'Serbia and Montenegro', - u'HR': u'Croatia', - u'KP': u'Korea (North), Democratic People\'s Republic of', - u'KR': u'Korea (South), Republic of', - u'SK': u'Slovakia', - u'SU': u'Soviet Union (historical, 1922-1991)', - u'TL': u'East Timor', - u'XC': u'Czechoslovakia (historical, 1918-1992)', - u'XE': u'Europe', - u'XG': u'East Germany (historical, 1949-1990)', - u'XU': u'[Unknown Country]', - u'XW': u'[Worldwide]', - u'YU': u'Yugoslavia (historical, 1918-1992)', -} - -# EOF diff --git a/musicbrainz2/data/languagenames.py b/musicbrainz2/data/languagenames.py deleted file mode 100644 index 7f4252dc..00000000 --- a/musicbrainz2/data/languagenames.py +++ /dev/null @@ -1,400 +0,0 @@ -# -*- coding: utf-8 -*- - -__revision__ = '$Id: languagenames.py 8725 2006-12-17 22:39:07Z luks $' - -languageNames = { - u'ART': u'Artificial (Other)', - u'ROH': u'Raeto-Romance', - u'SCO': u'Scots', - u'SCN': u'Sicilian', - u'ROM': u'Romany', - u'RON': u'Romanian', - u'OSS': u'Ossetian; Ossetic', - u'ALE': u'Aleut', - u'MNI': u'Manipuri', - u'NWC': u'Classical Newari; Old Newari; Classical Nepal Bhasa', - u'OSA': u'Osage', - u'MNC': u'Manchu', - u'MWR': u'Marwari', - u'VEN': u'Venda', - u'MWL': u'Mirandese', - u'FAS': u'Persian', - u'FAT': u'Fanti', - u'FAN': u'Fang', - u'FAO': u'Faroese', - u'DIN': u'Dinka', - u'HYE': u'Armenian', - u'DSB': u'Lower Sorbian', - u'CAR': u'Carib', - u'DIV': u'Divehi', - u'TEL': u'Telugu', - u'TEM': u'Timne', - u'NBL': u'Ndebele, South; South Ndebele', - u'TER': u'Tereno', - u'TET': u'Tetum', - u'SUN': u'Sundanese', - u'KUT': u'Kutenai', - u'SUK': u'Sukuma', - u'KUR': u'Kurdish', - u'KUM': u'Kumyk', - u'SUS': u'Susu', - u'NEW': u'Newari; Nepal Bhasa', - u'KUA': u'Kuanyama; Kwanyama', - u'MEN': u'Mende', - u'LEZ': u'Lezghian', - u'GLA': u'Gaelic; Scottish Gaelic', - u'BOS': u'Bosnian', - u'GLE': u'Irish', - u'EKA': u'Ekajuk', - u'GLG': u'Gallegan', - u'AKA': u'Akan', - u'BOD': u'Tibetan', - u'GLV': u'Manx', - u'JRB': u'Judeo-Arabic', - u'VIE': u'Vietnamese', - u'IPK': u'Inupiaq', - u'UZB': u'Uzbek', - u'BRE': u'Breton', - u'BRA': u'Braj', - u'AYM': u'Aymara', - u'CHA': u'Chamorro', - u'CHB': u'Chibcha', - u'CHE': u'Chechen', - u'CHG': u'Chagatai', - u'CHK': u'Chuukese', - u'CHM': u'Mari', - u'CHN': u'Chinook jargon', - u'CHO': u'Choctaw', - u'CHP': u'Chipewyan', - u'CHR': u'Cherokee', - u'CHU': u'Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic', - u'CHV': u'Chuvash', - u'CHY': u'Cheyenne', - u'MSA': u'Malay', - u'III': u'Sichuan Yi', - u'ACE': u'Achinese', - u'IBO': u'Igbo', - u'IBA': u'Iban', - u'XHO': u'Xhosa', - u'DEU': u'German', - u'CAT': u'Catalan; Valencian', - u'DEL': u'Delaware', - u'DEN': u'Slave (Athapascan)', - u'CAD': u'Caddo', - u'TAT': u'Tatar', - u'RAJ': u'Rajasthani', - u'SPA': u'Spanish; Castilian', - u'TAM': u'Tamil', - u'TAH': u'Tahitian', - u'AFH': u'Afrihili', - u'ENG': u'English', - u'CSB': u'Kashubian', - u'NYN': u'Nyankole', - u'NYO': u'Nyoro', - u'SID': u'Sidamo', - u'NYA': u'Chichewa; Chewa; Nyanja', - u'SIN': u'Sinhala; Sinhalese', - u'AFR': u'Afrikaans', - u'LAM': u'Lamba', - u'SND': u'Sindhi', - u'MAR': u'Marathi', - u'LAH': u'Lahnda', - u'NYM': u'Nyamwezi', - u'SNA': u'Shona', - u'LAD': u'Ladino', - u'SNK': u'Soninke', - u'MAD': u'Madurese', - u'MAG': u'Magahi', - u'MAI': u'Maithili', - u'MAH': u'Marshallese', - u'LAV': u'Latvian', - u'MAL': u'Malayalam', - u'MAN': u'Mandingo', - u'ZND': u'Zande', - u'ZEN': u'Zenaga', - u'KBD': u'Kabardian', - u'ITA': u'Italian', - u'VAI': u'Vai', - u'TSN': u'Tswana', - u'TSO': u'Tsonga', - u'TSI': u'Tsimshian', - u'BYN': u'Blin; Bilin', - u'FIJ': u'Fijian', - u'FIN': u'Finnish', - u'EUS': u'Basque', - u'CEB': u'Cebuano', - u'DAN': u'Danish', - u'NOG': u'Nogai', - u'NOB': u'Norwegian BokmÃ¥l; BokmÃ¥l, Norwegian', - u'DAK': u'Dakota', - u'CES': u'Czech', - u'DAR': u'Dargwa', - u'DAY': u'Dayak', - u'NOR': u'Norwegian', - u'KPE': u'Kpelle', - u'GUJ': u'Gujarati', - u'MDF': u'Moksha', - u'MAS': u'Masai', - u'LAO': u'Lao', - u'MDR': u'Mandar', - u'GON': u'Gondi', - u'SMS': u'Skolt Sami', - u'SMO': u'Samoan', - u'SMN': u'Inari Sami', - u'SMJ': u'Lule Sami', - u'GOT': u'Gothic', - u'SME': u'Northern Sami', - u'BLA': u'Siksika', - u'SMA': u'Southern Sami', - u'GOR': u'Gorontalo', - u'AST': u'Asturian; Bable', - u'ORM': u'Oromo', - u'QUE': u'Quechua', - u'ORI': u'Oriya', - u'CRH': u'Crimean Tatar; Crimean Turkish', - u'ASM': u'Assamese', - u'PUS': u'Pushto', - u'DGR': u'Dogrib', - u'LTZ': u'Luxembourgish; Letzeburgesch', - u'NDO': u'Ndonga', - u'GEZ': u'Geez', - u'ISL': u'Icelandic', - u'LAT': u'Latin', - u'MAK': u'Makasar', - u'ZAP': u'Zapotec', - u'YID': u'Yiddish', - u'KOK': u'Konkani', - u'KOM': u'Komi', - u'KON': u'Kongo', - u'UKR': u'Ukrainian', - u'TON': u'Tonga (Tonga Islands)', - u'KOS': u'Kosraean', - u'KOR': u'Korean', - u'TOG': u'Tonga (Nyasa)', - u'HUN': u'Hungarian', - u'HUP': u'Hupa', - u'CYM': u'Welsh', - u'UDM': u'Udmurt', - u'BEJ': u'Beja', - u'BEN': u'Bengali', - u'BEL': u'Belarusian', - u'BEM': u'Bemba', - u'AAR': u'Afar', - u'NZI': u'Nzima', - u'SAH': u'Yakut', - u'SAN': u'Sanskrit', - u'SAM': u'Samaritan Aramaic', - u'SAG': u'Sango', - u'SAD': u'Sandawe', - u'RAR': u'Rarotongan', - u'RAP': u'Rapanui', - u'SAS': u'Sasak', - u'SAT': u'Santali', - u'MIN': u'Minangkabau', - u'LIM': u'Limburgan; Limburger; Limburgish', - u'LIN': u'Lingala', - u'LIT': u'Lithuanian', - u'EFI': u'Efik', - u'BTK': u'Batak (Indonesia)', - u'KAC': u'Kachin', - u'KAB': u'Kabyle', - u'KAA': u'Kara-Kalpak', - u'KAN': u'Kannada', - u'KAM': u'Kamba', - u'KAL': u'Kalaallisut; Greenlandic', - u'KAS': u'Kashmiri', - u'KAR': u'Karen', - u'KAU': u'Kanuri', - u'KAT': u'Georgian', - u'KAZ': u'Kazakh', - u'TYV': u'Tuvinian', - u'AWA': u'Awadhi', - u'URD': u'Urdu', - u'DOI': u'Dogri', - u'TPI': u'Tok Pisin', - u'MRI': u'Maori', - u'ABK': u'Abkhazian', - u'TKL': u'Tokelau', - u'NLD': u'Dutch; Flemish', - u'OJI': u'Ojibwa', - u'OCI': u'Occitan (post 1500); Provençal', - u'WOL': u'Wolof', - u'JAV': u'Javanese', - u'HRV': u'Croatian', - u'DYU': u'Dyula', - u'SSW': u'Swati', - u'MUL': u'Multiple languages', - u'HIL': u'Hiligaynon', - u'HIM': u'Himachali', - u'HIN': u'Hindi', - u'BAS': u'Basa', - u'GBA': u'Gbaya', - u'WLN': u'Walloon', - u'BAD': u'Banda', - u'NEP': u'Nepali', - u'CRE': u'Cree', - u'BAN': u'Balinese', - u'BAL': u'Baluchi', - u'BAM': u'Bambara', - u'BAK': u'Bashkir', - u'SHN': u'Shan', - u'ARP': u'Arapaho', - u'ARW': u'Arawak', - u'ARA': u'Arabic', - u'ARC': u'Aramaic', - u'ARG': u'Aragonese', - u'SEL': u'Selkup', - u'ARN': u'Araucanian', - u'LUS': u'Lushai', - u'MUS': u'Creek', - u'LUA': u'Luba-Lulua', - u'LUB': u'Luba-Katanga', - u'LUG': u'Ganda', - u'LUI': u'Luiseno', - u'LUN': u'Lunda', - u'LUO': u'Luo (Kenya and Tanzania)', - u'IKU': u'Inuktitut', - u'TUR': u'Turkish', - u'TUK': u'Turkmen', - u'TUM': u'Tumbuka', - u'COP': u'Coptic', - u'COS': u'Corsican', - u'COR': u'Cornish', - u'ILO': u'Iloko', - u'GWI': u'Gwich´in', - u'TLI': u'Tlingit', - u'TLH': u'Klingon; tlhIngan-Hol', - u'POR': u'Portuguese', - u'PON': u'Pohnpeian', - u'POL': u'Polish', - u'TGK': u'Tajik', - u'TGL': u'Tagalog', - u'FRA': u'French', - u'BHO': u'Bhojpuri', - u'SWA': u'Swahili', - u'DUA': u'Duala', - u'SWE': u'Swedish', - u'YAP': u'Yapese', - u'TIV': u'Tiv', - u'YAO': u'Yao', - u'XAL': u'Kalmyk', - u'FRY': u'Frisian', - u'GAY': u'Gayo', - u'OTA': u'Turkish, Ottoman (1500-1928)', - u'HMN': u'Hmong', - u'HMO': u'Hiri Motu', - u'GAA': u'Ga', - u'FUR': u'Friulian', - u'MLG': u'Malagasy', - u'SLV': u'Slovenian', - u'FIL': u'Filipino; Pilipino', - u'MLT': u'Maltese', - u'SLK': u'Slovak', - u'FUL': u'Fulah', - u'JPN': u'Japanese', - u'VOL': u'Volapük', - u'VOT': u'Votic', - u'IND': u'Indonesian', - u'AVE': u'Avestan', - u'JPR': u'Judeo-Persian', - u'AVA': u'Avaric', - u'PAP': u'Papiamento', - u'EWO': u'Ewondo', - u'PAU': u'Palauan', - u'EWE': u'Ewe', - u'PAG': u'Pangasinan', - u'PAM': u'Pampanga', - u'PAN': u'Panjabi; Punjabi', - u'KIR': u'Kirghiz', - u'NIA': u'Nias', - u'KIK': u'Kikuyu; Gikuyu', - u'SYR': u'Syriac', - u'KIN': u'Kinyarwanda', - u'NIU': u'Niuean', - u'EPO': u'Esperanto', - u'JBO': u'Lojban', - u'MIC': u'Mi\'kmaq; Micmac', - u'THA': u'Thai', - u'HAI': u'Haida', - u'ELL': u'Greek, Modern (1453-)', - u'ADY': u'Adyghe; Adygei', - u'ELX': u'Elamite', - u'ADA': u'Adangme', - u'GRB': u'Grebo', - u'HAT': u'Haitian; Haitian Creole', - u'HAU': u'Hausa', - u'HAW': u'Hawaiian', - u'BIN': u'Bini', - u'AMH': u'Amharic', - u'BIK': u'Bikol', - u'BIH': u'Bihari', - u'MOS': u'Mossi', - u'MOH': u'Mohawk', - u'MON': u'Mongolian', - u'MOL': u'Moldavian', - u'BIS': u'Bislama', - u'TVL': u'Tuvalu', - u'IJO': u'Ijo', - u'EST': u'Estonian', - u'KMB': u'Kimbundu', - u'UMB': u'Umbundu', - u'TMH': u'Tamashek', - u'FON': u'Fon', - u'HSB': u'Upper Sorbian', - u'RUN': u'Rundi', - u'RUS': u'Russian', - u'PLI': u'Pali', - u'SRD': u'Sardinian', - u'ACH': u'Acoli', - u'NDE': u'Ndebele, North; North Ndebele', - u'DZO': u'Dzongkha', - u'KRU': u'Kurukh', - u'SRR': u'Serer', - u'IDO': u'Ido', - u'SRP': u'Serbian', - u'KRO': u'Kru', - u'KRC': u'Karachay-Balkar', - u'NDS': u'Low German; Low Saxon; German, Low; Saxon, Low', - u'ZUN': u'Zuni', - u'ZUL': u'Zulu', - u'TWI': u'Twi', - u'NSO': u'Northern Sotho, Pedi; Sepedi', - u'SOM': u'Somali', - u'SON': u'Songhai', - u'SOT': u'Sotho, Southern', - u'MKD': u'Macedonian', - u'HER': u'Herero', - u'LOL': u'Mongo', - u'HEB': u'Hebrew', - u'LOZ': u'Lozi', - u'GIL': u'Gilbertese', - u'WAS': u'Washo', - u'WAR': u'Waray', - u'BUL': u'Bulgarian', - u'WAL': u'Walamo', - u'BUA': u'Buriat', - u'BUG': u'Buginese', - u'AZE': u'Azerbaijani', - u'ZHA': u'Zhuang; Chuang', - u'ZHO': u'Chinese', - u'NNO': u'Norwegian Nynorsk; Nynorsk, Norwegian', - u'UIG': u'Uighur; Uyghur', - u'MYV': u'Erzya', - u'INH': u'Ingush', - u'KHM': u'Khmer', - u'MYA': u'Burmese', - u'KHA': u'Khasi', - u'INA': u'Interlingua (International Auxiliary Language Association)', - u'NAH': u'Nahuatl', - u'TIR': u'Tigrinya', - u'NAP': u'Neapolitan', - u'NAV': u'Navajo; Navaho', - u'NAU': u'Nauru', - u'GRN': u'Guarani', - u'TIG': u'Tigre', - u'YOR': u'Yoruba', - u'ILE': u'Interlingue', - u'SQI': u'Albanian', -} - -# EOF diff --git a/musicbrainz2/data/releasetypenames.py b/musicbrainz2/data/releasetypenames.py deleted file mode 100644 index f16ed19e..00000000 --- a/musicbrainz2/data/releasetypenames.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - -__revision__ = '$Id: releasetypenames.py 8728 2006-12-17 23:42:30Z luks $' - -releaseTypeNames = { - u'http://musicbrainz.org/ns/mmd-1.0#None': u'None', - u'http://musicbrainz.org/ns/mmd-1.0#Album': u'Album', - u'http://musicbrainz.org/ns/mmd-1.0#Single': u'Single', - u'http://musicbrainz.org/ns/mmd-1.0#EP': u'EP', - u'http://musicbrainz.org/ns/mmd-1.0#Compilation': u'Compilation', - u'http://musicbrainz.org/ns/mmd-1.0#Soundtrack': u'Soundtrack', - u'http://musicbrainz.org/ns/mmd-1.0#Spokenword': u'Spokenword', - u'http://musicbrainz.org/ns/mmd-1.0#Interview': u'Interview', - u'http://musicbrainz.org/ns/mmd-1.0#Audiobook': u'Audiobook', - u'http://musicbrainz.org/ns/mmd-1.0#Live': u'Live', - u'http://musicbrainz.org/ns/mmd-1.0#Remix': u'Remix', - u'http://musicbrainz.org/ns/mmd-1.0#Other': u'Other', - u'http://musicbrainz.org/ns/mmd-1.0#Official': u'Official', - u'http://musicbrainz.org/ns/mmd-1.0#Promotion': u'Promotion', - u'http://musicbrainz.org/ns/mmd-1.0#Bootleg': u'Bootleg', - u'http://musicbrainz.org/ns/mmd-1.0#Pseudo-Release': u'Pseudo-Release', -} - -# EOF diff --git a/musicbrainz2/data/scriptnames.py b/musicbrainz2/data/scriptnames.py deleted file mode 100644 index 30a55bd7..00000000 --- a/musicbrainz2/data/scriptnames.py +++ /dev/null @@ -1,59 +0,0 @@ -# -*- coding: utf-8 -*- - -__revision__ = '$Id: scriptnames.py 7386 2006-04-30 11:12:55Z matt $' - -scriptNames = { - u'Yiii': u'Yi', - u'Telu': u'Telugu', - u'Taml': u'Tamil', - u'Guru': u'Gurmukhi', - u'Hebr': u'Hebrew', - u'Geor': u'Georgian (Mkhedruli)', - u'Ugar': u'Ugaritic', - u'Cyrl': u'Cyrillic', - u'Hrkt': u'Kanji & Kana', - u'Armn': u'Armenian', - u'Runr': u'Runic', - u'Khmr': u'Khmer', - u'Latn': u'Latin', - u'Hani': u'Han (Hanzi, Kanji, Hanja)', - u'Ital': u'Old Italic (Etruscan, Oscan, etc.)', - u'Hano': u'Hanunoo (Hanunóo)', - u'Ethi': u'Ethiopic (Ge\'ez)', - u'Gujr': u'Gujarati', - u'Hang': u'Hangul', - u'Arab': u'Arabic', - u'Thaa': u'Thaana', - u'Buhd': u'Buhid', - u'Sinh': u'Sinhala', - u'Orya': u'Oriya', - u'Hans': u'Han (Simplified variant)', - u'Thai': u'Thai', - u'Cprt': u'Cypriot', - u'Linb': u'Linear B', - u'Hant': u'Han (Traditional variant)', - u'Osma': u'Osmanya', - u'Mong': u'Mongolian', - u'Deva': u'Devanagari (Nagari)', - u'Laoo': u'Lao', - u'Tagb': u'Tagbanwa', - u'Hira': u'Hiragana', - u'Bopo': u'Bopomofo', - u'Goth': u'Gothic', - u'Tale': u'Tai Le', - u'Mymr': u'Myanmar (Burmese)', - u'Tglg': u'Tagalog', - u'Grek': u'Greek', - u'Mlym': u'Malayalam', - u'Cher': u'Cherokee', - u'Tibt': u'Tibetan', - u'Kana': u'Katakana', - u'Syrc': u'Syriac', - u'Cans': u'Unified Canadian Aboriginal Syllabics', - u'Beng': u'Bengali', - u'Limb': u'Limbu', - u'Ogam': u'Ogham', - u'Knda': u'Kannada', -} - -# EOF diff --git a/musicbrainz2/disc.py b/musicbrainz2/disc.py deleted file mode 100644 index 8d283115..00000000 --- a/musicbrainz2/disc.py +++ /dev/null @@ -1,221 +0,0 @@ -"""Utilities for working with Audio CDs. - -This module contains utilities for working with Audio CDs. - -The functions in this module need both a working ctypes package (already -included in python-2.5) and an installed libdiscid. If you don't have -libdiscid, it can't be loaded, or your platform isn't supported by either -ctypes or this module, a C{NotImplementedError} is raised when using the -L{readDisc()} function. - -@author: Matthias Friedrich -""" -__revision__ = '$Id: disc.py 11987 2009-08-22 11:57:51Z matt $' - -import sys -import urllib -import urlparse -import ctypes -import ctypes.util -from musicbrainz2.model import Disc - -__all__ = [ 'DiscError', 'readDisc', 'getSubmissionUrl' ] - - -class DiscError(IOError): - """The Audio CD could not be read. - - This may be simply because no disc was in the drive, the device name - was wrong or the disc can't be read. Reading errors can occur in case - of a damaged disc or a copy protection mechanism, for example. - """ - pass - - -def _openLibrary(): - """Tries to open libdiscid. - - @return: a C{ctypes.CDLL} object, representing the opened library - - @raise NotImplementedError: if the library can't be opened - """ - # This only works for ctypes >= 0.9.9.3. Any libdiscid is found, - # no matter how it's called on this platform. - try: - if hasattr(ctypes.cdll, 'find'): - libDiscId = ctypes.cdll.find('discid') - _setPrototypes(libDiscId) - return libDiscId - except OSError, e: - raise NotImplementedError('Error opening library: ' + str(e)) - - # Try to find the library using ctypes.util - libName = ctypes.util.find_library('discid') - if libName != None: - try: - libDiscId = ctypes.cdll.LoadLibrary(libName) - _setPrototypes(libDiscId) - return libDiscId - except OSError, e: - raise NotImplementedError('Error opening library: ' + - str(e)) - - # For compatibility with ctypes < 0.9.9.3 try to figure out the library - # name without the help of ctypes. We use cdll.LoadLibrary() below, - # which isn't available for ctypes == 0.9.9.3. - # - if sys.platform == 'linux2': - libName = 'libdiscid.so.0' - elif sys.platform == 'darwin': - libName = 'libdiscid.0.dylib' - elif sys.platform == 'win32': - libName = 'discid.dll' - else: - # This should at least work for Un*x-style operating systems - libName = 'libdiscid.so.0' - - try: - libDiscId = ctypes.cdll.LoadLibrary(libName) - _setPrototypes(libDiscId) - return libDiscId - except OSError, e: - raise NotImplementedError('Error opening library: ' + str(e)) - - assert False # not reached - - -def _setPrototypes(libDiscId): - ct = ctypes - libDiscId.discid_new.argtypes = ( ) - libDiscId.discid_new.restype = ct.c_void_p - - libDiscId.discid_free.argtypes = (ct.c_void_p, ) - - libDiscId.discid_read.argtypes = (ct.c_void_p, ct.c_char_p) - - libDiscId.discid_get_error_msg.argtypes = (ct.c_void_p, ) - libDiscId.discid_get_error_msg.restype = ct.c_char_p - - libDiscId.discid_get_id.argtypes = (ct.c_void_p, ) - libDiscId.discid_get_id.restype = ct.c_char_p - - libDiscId.discid_get_first_track_num.argtypes = (ct.c_void_p, ) - libDiscId.discid_get_first_track_num.restype = ct.c_int - - libDiscId.discid_get_last_track_num.argtypes = (ct.c_void_p, ) - libDiscId.discid_get_last_track_num.restype = ct.c_int - - libDiscId.discid_get_sectors.argtypes = (ct.c_void_p, ) - libDiscId.discid_get_sectors.restype = ct.c_int - - libDiscId.discid_get_track_offset.argtypes = (ct.c_void_p, ct.c_int) - libDiscId.discid_get_track_offset.restype = ct.c_int - - libDiscId.discid_get_track_length.argtypes = (ct.c_void_p, ct.c_int) - libDiscId.discid_get_track_length.restype = ct.c_int - - -def getSubmissionUrl(disc, host='mm.musicbrainz.org', port=80): - """Returns a URL for adding a disc to the MusicBrainz database. - - A fully initialized L{musicbrainz2.model.Disc} object is needed, as - returned by L{readDisc}. A disc object returned by the web service - doesn't provide the necessary information. - - Note that the created URL is intended for interactive use and points - to the MusicBrainz disc submission wizard by default. This method - just returns a URL, no network connection is needed. The disc drive - isn't used. - - @param disc: a fully initialized L{musicbrainz2.model.Disc} object - @param host: a string containing a host name - @param port: an integer containing a port number - - @return: a string containing the submission URL - - @see: L{readDisc} - """ - assert isinstance(disc, Disc), 'musicbrainz2.model.Disc expected' - discid = disc.getId() - first = disc.getFirstTrackNum() - last = disc.getLastTrackNum() - sectors = disc.getSectors() - assert None not in (discid, first, last, sectors) - - tracks = last - first + 1 - toc = "%d %d %d " % (first, last, sectors) - toc = toc + ' '.join( map(lambda x: str(x[0]), disc.getTracks()) ) - - query = urllib.urlencode({ 'id': discid, 'toc': toc, 'tracks': tracks }) - - if port == 80: - netloc = host - else: - netloc = host + ':' + str(port) - - url = ('http', netloc, '/bare/cdlookup.html', '', query, '') - - return urlparse.urlunparse(url) - - -def readDisc(deviceName=None): - """Reads an Audio CD in the disc drive. - - This reads a CD's table of contents (TOC) and calculates the MusicBrainz - DiscID, which is a 28 character ASCII string. This DiscID can be used - to retrieve a list of matching releases from the web service (see - L{musicbrainz2.webservice.Query}). - - Note that an Audio CD has to be in drive for this to work. The - C{deviceName} argument may be used to set the device. The default - depends on the operating system (on linux, it's C{'/dev/cdrom'}). - No network connection is needed for this function. - - If the device doesn't exist or there's no valid Audio CD in the drive, - a L{DiscError} exception is raised. - - @param deviceName: a string containing the CD drive's device name - - @return: a L{musicbrainz2.model.Disc} object - - @raise DiscError: if there was a problem reading the disc - @raise NotImplementedError: if DiscID generation isn't supported - """ - libDiscId = _openLibrary() - - handle = libDiscId.discid_new() - assert handle != 0, "libdiscid: discid_new() returned NULL" - - # Access the CD drive. This also works if deviceName is None because - # ctypes passes a NULL pointer in this case. - # - res = libDiscId.discid_read(handle, deviceName) - if res == 0: - raise DiscError(libDiscId.discid_get_error_msg(handle)) - - - # Now extract the data from the result. - # - disc = Disc() - - disc.setId( libDiscId.discid_get_id(handle) ) - - firstTrackNum = libDiscId.discid_get_first_track_num(handle) - lastTrackNum = libDiscId.discid_get_last_track_num(handle) - - disc.setSectors(libDiscId.discid_get_sectors(handle)) - - for i in range(firstTrackNum, lastTrackNum+1): - trackOffset = libDiscId.discid_get_track_offset(handle, i) - trackSectors = libDiscId.discid_get_track_length(handle, i) - - disc.addTrack( (trackOffset, trackSectors) ) - - disc.setFirstTrackNum(firstTrackNum) - disc.setLastTrackNum(lastTrackNum) - - libDiscId.discid_free(handle) - - return disc - -# EOF diff --git a/musicbrainz2/model.py b/musicbrainz2/model.py deleted file mode 100644 index fe8f05df..00000000 --- a/musicbrainz2/model.py +++ /dev/null @@ -1,2488 +0,0 @@ -"""The MusicBrainz domain model. - -These classes are part of the MusicBrainz domain model. They may be used -by other modules and don't contain any network or other I/O code. If you -want to request data from the web service, please have a look at -L{musicbrainz2.webservice}. - -The most important classes, usually acting as entry points, are -L{Artist}, L{Release}, and L{Track}. - -@var VARIOUS_ARTISTS_ID: The ID of the special 'Various Artists' artist. - -@var NS_MMD_1: Default namespace prefix for all MusicBrainz metadata. -@var NS_REL_1: Namespace prefix for relations. -@var NS_EXT_1: Namespace prefix for MusicBrainz extensions. - -@see: L{musicbrainz2.webservice} - -@author: Matthias Friedrich -""" -try: - set -except NameError: - from sets import Set as set - -__revision__ = '$Id: model.py 12829 2010-09-15 12:00:11Z luks $' - -__all__ = [ - 'VARIOUS_ARTISTS_ID', 'NS_MMD_1', 'NS_REL_1', 'NS_EXT_1', - 'Entity', 'Artist', 'Release', 'Track', 'User', 'ReleaseGroup', - 'Relation', 'Disc', 'ReleaseEvent', 'Label', 'Tag', 'Rating', - 'AbstractAlias', 'ArtistAlias', 'LabelAlias', -] - - -VARIOUS_ARTISTS_ID = 'http://musicbrainz.org/artist/89ad4ac3-39f7-470e-963a-56509c546377' - -# Namespace URI prefixes -# -NS_MMD_1 = 'http://musicbrainz.org/ns/mmd-1.0#' -NS_REL_1 = 'http://musicbrainz.org/ns/rel-1.0#' -NS_EXT_1 = 'http://musicbrainz.org/ns/ext-1.0#' - - -class Entity(object): - """A first-level MusicBrainz class. - - All entities in MusicBrainz have unique IDs (which are absolute URIs) - as well as any number of L{relations } to other entities - and free text tags. This class is abstract and should not be - instantiated. - - Relations are differentiated by their I{target type}, that means, - where they link to. MusicBrainz currently supports four target types - (artists, releases, tracks, and URLs) each identified using a URI. - To get all relations with a specific target type, you can use - L{getRelations} and pass one of the following constants as the - parameter: - - - L{Relation.TO_ARTIST} - - L{Relation.TO_RELEASE} - - L{Relation.TO_TRACK} - - L{Relation.TO_URL} - - @see: L{Relation} - """ - - def __init__(self, id_=None): - """Constructor. - - This should only used by derived classes. - - @param id_: a string containing an absolute URI - """ - self._id = id_ - self._relations = { } - self._tags = { } - self._rating = Rating() - - def getId(self): - """Returns a MusicBrainz ID. - - @return: a string containing a URI, or None - """ - return self._id - - def setId(self, value): - """Sets a MusicBrainz ID. - - @param value: a string containing an absolute URI - """ - self._id = value - - id = property(getId, setId, doc='The MusicBrainz ID.') - - def getRelations(self, targetType=None, relationType=None, - requiredAttributes=(), direction=None): - """Returns a list of relations. - - If C{targetType} is given, only relations of that target - type are returned. For MusicBrainz, the following target - types are defined: - - L{Relation.TO_ARTIST} - - L{Relation.TO_RELEASE} - - L{Relation.TO_TRACK} - - L{Relation.TO_URL} - - If C{targetType} is L{Relation.TO_ARTIST}, for example, - this method returns all relations between this Entity and - artists. - - You may use the C{relationType} parameter to further restrict - the selection. If it is set, only relations with the given - relation type are returned. The C{requiredAttributes} sequence - lists attributes that have to be part of all returned relations. - - If C{direction} is set, only relations with the given reading - direction are returned. You can use the L{Relation.DIR_FORWARD}, - L{Relation.DIR_BACKWARD}, and L{Relation.DIR_NONE} constants - for this. - - @param targetType: a string containing an absolute URI, or None - @param relationType: a string containing an absolute URI, or None - @param requiredAttributes: a sequence containing absolute URIs - @param direction: one of L{Relation}'s direction constants - @return: a list of L{Relation} objects - - @see: L{Entity} - """ - allRels = [ ] - if targetType is not None: - allRels = self._relations.setdefault(targetType, [ ]) - else: - for (k, relList) in self._relations.items(): - for rel in relList: - allRels.append(rel) - - # Filter for direction. - # - if direction is not None: - allRels = [r for r in allRels if r.getDirection() == direction] - - # Filter for relation type. - # - if relationType is None: - return allRels - else: - allRels = [r for r in allRels if r.getType() == relationType] - - # Now filer for attribute type. - # - tmp = [] - required = set(iter(requiredAttributes)) - - for r in allRels: - attrs = set(iter(r.getAttributes())) - if required.issubset(attrs): - tmp.append(r) - return tmp - - - def getRelationTargets(self, targetType=None, relationType=None, - requiredAttributes=(), direction=None): - """Returns a list of relation targets. - - The arguments work exactly like in L{getRelations}, but - instead of L{Relation} objects, the matching relation - targets are returned. This can be L{Artist}, L{Release}, - or L{Track} objects, depending on the relations. - - As a special case, URL strings are returned if the target - is an URL. - - @param targetType: a string containing an absolute URI, or None - @param relationType: a string containing an absolute URI, or None - @param requiredAttributes: a sequence containing absolute URIs - @param direction: one of L{Relation}'s direction constants - @return: a list of objects, depending on the relation - - @see: L{getRelations} - """ - ret = [ ] - rels = self.getRelations(targetType, relationType, - requiredAttributes, direction) - - for r in rels: - if r.getTargetType() == Relation.TO_URL: - ret.append(r.getTargetId()) - else: - ret.append(r.getTarget()) - - return ret - - - def addRelation(self, relation): - """Adds a relation. - - This method adds C{relation} to the list of relations. The - given relation has to be initialized, at least the target - type has to be set. - - @param relation: the L{Relation} object to add - - @see: L{Entity} - """ - assert relation.getType is not None - assert relation.getTargetType is not None - assert relation.getTargetId is not None - l = self._relations.setdefault(relation.getTargetType(), [ ]) - l.append(relation) - - - def getRelationTargetTypes(self): - """Returns a list of target types available for this entity. - - Use this to find out to which types of targets this entity - has relations. If the entity only has relations to tracks and - artists, for example, then a list containg the strings - L{Relation.TO_TRACK} and L{Relation.TO_ARTIST} is returned. - - @return: a list of strings containing URIs - - @see: L{getRelations} - """ - return self._relations.keys() - - def getTag(self, value): - """Return the tag with the given value (aka the tag's name). - - @return: the L{Tag} with the given name or raises a KeyError - """ - return self._tags[value] - - def getTags(self): - """Return all tags attached to this Entity. - - @return: a list of L{Tag} objects - """ - return self._tags.values() - - tags = property(getTags, doc='The tags for this entity.') - - def addTag(self, tag): - """Add a new tag. - - This merges an existing tag with the same name. - - @param tag: the L{Tag} object to add - - @see: L{getTags} - """ - if self._tags.has_key(tag.value): - existing = self._tags[tag.value] - existing.count += tag.count - else: - self._tags[tag.value] = tag - - def getRating(self): - """Return the rating of this Entity. - 0 = Unrated - 1 - 5 = Rating - - @return: rating - """ - return self._rating - - rating = property(getRating, doc='The rating for this entity.') - - def setRating(self, value): - self._rating = value - - -class Artist(Entity): - """Represents an artist. - - Artists in MusicBrainz can have a type. Currently, this type can - be either Person or Group for which the following URIs are assigned: - - - C{http://musicbrainz.org/ns/mmd-1.0#Person} - - C{http://musicbrainz.org/ns/mmd-1.0#Group} - - Use the L{TYPE_PERSON} and L{TYPE_GROUP} constants for comparison. - """ - TYPE_PERSON = NS_MMD_1 + 'Person' - TYPE_GROUP = NS_MMD_1 + 'Group' - - def __init__(self, id_=None, type_=None, name=None, sortName=None): - """Constructor. - - @param id_: a string containing an absolute URI - @param type_: a string containing an absolute URI - @param name: a string containing the artist's name - @param sortName: a string containing the artist's sort name - """ - Entity.__init__(self, id_) - self._type = type_ - self._name = name - self._sortName = sortName - self._disambiguation = None - self._beginDate = None - self._endDate = None - self._aliases = [ ] - self._releases = [ ] - self._releasesCount = None - self._releasesOffset = None - self._releaseGroups = [ ] - self._releaseGroupsCount = None - self._releaseGroupsOffset = None - - def getType(self): - """Returns the artist's type. - - @return: a string containing an absolute URI, or None - """ - return self._type - - def setType(self, type_): - """Sets the artist's type. - - @param type_: a string containing an absolute URI - """ - self._type = type_ - - type = property(getType, setType, doc="The artist's type.") - - def getName(self): - """Returns the artist's name. - - @return: a string containing the artist's name, or None - """ - return self._name - - def setName(self, name): - """Sets the artist's name. - - @param name: a string containing the artist's name - """ - self._name = name - - name = property(getName, setName, doc="The artist's name.") - - def getSortName(self): - """Returns the artist's sort name. - - The sort name is the artist's name in a special format which - is better suited for lexicographic sorting. The MusicBrainz - style guide specifies this format. - - @see: U{The MusicBrainz Style Guidelines - } - """ - return self._sortName - - def setSortName(self, sortName): - """Sets the artist's sort name. - - @param sortName: a string containing the artist's sort name - - @see: L{getSortName} - """ - self._sortName = sortName - - sortName = property(getSortName, setSortName, - doc="The artist's sort name.") - - def getDisambiguation(self): - """Returns the disambiguation attribute. - - This attribute may be used if there is more than one artist - with the same name. In this case, disambiguation attributes - are added to the artists' names to keep them apart. - - For example, there are at least three bands named 'Vixen'. - Each band has a different disambiguation in the MusicBrainz - database, like 'Hip-hop' or 'all-female rock/glam band'. - - @return: a disambiguation string, or None - - @see: L{getUniqueName} - """ - return self._disambiguation - - def setDisambiguation(self, disambiguation): - """Sets the disambiguation attribute. - - @param disambiguation: a disambiguation string - - @see: L{getDisambiguation}, L{getUniqueName} - """ - self._disambiguation = disambiguation - - disambiguation = property(getDisambiguation, setDisambiguation, - doc="The disambiguation comment.") - - def getUniqueName(self): - """Returns a unique artist name (using disambiguation). - - This method returns the artist name together with the - disambiguation attribute in parenthesis if it exists. - Example: 'Vixen (Hip-hop)'. - - @return: a string containing the unique name - - @see: L{getDisambiguation} - """ - d = self.getDisambiguation() - if d is not None and d.strip() != '': - return '%s (%s)' % (self.getName(), d) - else: - return self.getName() - - def getBeginDate(self): - """Returns the birth/foundation date. - - The definition of the I{begin date} depends on the artist's - type. For persons, this is the day of birth, for groups it - is the day the group was founded. - - The returned date has the format 'YYYY', 'YYYY-MM', or - 'YYYY-MM-DD', depending on how much detail is known. - - @return: a string containing the date, or None - - @see: L{getType} - """ - return self._beginDate - - def setBeginDate(self, dateStr): - """Sets the begin/foundation date. - - @param dateStr: a date string - - @see: L{getBeginDate} - """ - self._beginDate = dateStr - - beginDate = property(getBeginDate, setBeginDate, - doc="The begin/foundation date.") - - def getEndDate(self): - """Returns the death/dissolving date. - - The definition of the I{end date} depends on the artist's - type. For persons, this is the day of death, for groups it - is the day the group was dissolved. - - @return: a string containing a date, or None - - @see: L{getBeginDate} - """ - return self._endDate - - def setEndDate(self, dateStr): - """Sets the death/dissolving date. - - @param dateStr: a string containing a date - - @see: L{setEndDate}, L{getBeginDate} - """ - self._endDate = dateStr - - endDate = property(getEndDate, setEndDate, - doc="The death/dissolving date.") - - def getAliases(self): - """Returns the list of aliases for this artist. - - @return: a list of L{ArtistAlias} objects - """ - return self._aliases - - aliases = property(getAliases, doc='The list of aliases.') - - def addAlias(self, alias): - """Adds an alias for this artist. - - @param alias: an L{ArtistAlias} object - """ - self._aliases.append(alias) - - def getReleases(self): - """Returns a list of releases from this artist. - - This may also include releases where this artist isn't the - I{main} artist but has just contributed one or more tracks - (aka VA-Releases). - - @return: a list of L{Release} objects - """ - return self._releases - - releases = property(getReleases, doc='The list of releases') - - def addRelease(self, release): - """Adds a release to this artist's list of releases. - - @param release: a L{Release} object - """ - self._releases.append(release) - - def getReleasesOffset(self): - """Returns the offset of the release list. - - This is used if the release list is incomplete (ie. the web - service only returned part of the release for this artist). - Note that the offset value is zero-based, which means release - C{0} is the first release. - - @return: an integer containing the offset, or None - - @see: L{getReleases}, L{getReleasesCount} - """ - return self._releasesOffset - - def setReleasesOffset(self, offset): - """Sets the offset of the release list. - - @param offset: an integer containing the offset, or None - - @see: L{getReleasesOffset} - """ - self._releasesOffset = offset - - releasesOffset = property(getReleasesOffset, setReleasesOffset, - doc='The offset of the release list.') - - def getReleasesCount(self): - """Returns the number of existing releases. - - This may or may not match with the number of elements that - L{getReleases} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setReleasesCount}, L{getReleasesOffset} - """ - return self._releasesCount - - def setReleasesCount(self, value): - """Sets the number of existing releases. - - @param value: an integer containing the count, or None - - @see: L{getReleasesCount}, L{setReleasesOffset} - """ - self._releasesCount = value - - releasesCount = property(getReleasesCount, setReleasesCount, - doc='The total number of releases') - - def getReleaseGroups(self): - """Returns a list of release groups from this artist. - - @return: a list of L{ReleaseGroup} objects - """ - return self._releaseGroups - - releaseGroups = property(getReleaseGroups, doc='The list of release groups') - - def addReleaseGroup(self, releaseGroup): - """Adds a release group to this artist's list of release groups. - - @param releaseGroup: a L{ReleaseGroup} object - """ - self._releaseGroups.append(releaseGroup) - - def getReleaseGroupsOffset(self): - """Returns the offset of the release group list. - - This is used if the release group list is incomplete (ie. the - web service only returned part of the result for this artist). - Note that the offset value is zero-based, which means release - group C{0} is the first release group. - - @return: an integer containing the offset, or None - - @see: L{getReleaseGroups}, L{getReleaseGroupsCount} - """ - return self._releaseGroupsOffset - - def setReleaseGroupsOffset(self, offset): - """Sets the offset of the release group list. - - @param offset: an integer containing the offset, or None - - @see: L{getReleaseGroupsOffset} - """ - self._releaseGroupsOffset = offset - - releaseGroupsOffset = property(getReleaseGroupsOffset, setReleaseGroupsOffset, - doc='The offset of the release group list.') - - def getReleaseGroupsCount(self): - """Returns the number of existing release groups. - - This may or may not match with the number of elements that - L{getReleaseGroups} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setReleaseGroupsCount}, L{getReleaseGroupsOffset} - """ - return self._releaseGroupsCount - - def setReleaseGroupsCount(self, value): - """Sets the number of existing release groups. - - @param value: an integer containing the count, or None - - @see: L{getReleaseGroupsCount}, L{setReleaseGroupsOffset} - """ - self._releaseGroupsCount = value - - releasesCount = property(getReleaseGroupsCount, setReleaseGroupsCount, - doc='The total number of release groups') - - -class Rating(object): - """The representation of a MusicBrain rating. - - The rating can have the following values: - - 0 = Unrated - [1..5] = Rating - """ - def __init__(self, value=None, count=None): - """Constructor. - - @param value: a string containing the tag's value - @param count: the number of users who added this tag - """ - self._value = value - self._count = count - - def getValue(self): - """Returns a string with the tag's value. - - @return: an integer containing the rating's value, or None - """ - return self._value - - def setValue(self, value): - """ Set the value of this rating. - - 0 or None = Clear your rating - 1 - 5 = Rating - - @param value: the rating to apply - - @raise ValueError: if value is not a double or not in the - range 0 - 5 or None. - """ - if value == None: - value = 0 - try: - value = float(value) - except ValueError, e: - raise ValueError("Value for rating needs to be an" \ - "float.") - if value < 0.0 or value > 5.0: - raise ValueError("Value needs to be in the range [0..5]") - self._value = value - - value = property(getValue, setValue, doc='The value of the rating.') - - def getCount(self): - """Returns an integer containing the rating's frequency count. - - @return: an integer containing the rating's frequency count, - or None - """ - return self._count - - def setCount(self, count): - """Sets the frequency count of this rating. - - @param count: an integer containing the tag's frequency count - """ - self._count = count - - count = property(getCount, setCount, doc="This tag's frequency count.") - - def __str__(self): - return str(self._value) - - def __unicode__(self): - return unicode(self._value) - - -class Tag(object): - """The representation of a MusicBrainz folksonomy tag. - - The tag's value is the text that's displayed in the tag cloud. - The count attribute keeps track of how many users added the tag - to its owning entity. - """ - def __init__(self, value=None, count=None): - """Constructor. - - @param value: a string containing the tag's value - @param count: the number of users who added this tag - """ - self._value = value - self._count = count - - def getValue(self): - """Returns a string with the tag's value. - - @return: a string containing the tags's value, or None - """ - return self._value - - def setValue(self, value): - """Sets the value of this tag. - - @param value: A string containing the value of the tag - """ - self._value = value - - value = property(getValue, setValue, doc='The value of the text.') - - def getCount(self): - """Returns an integer containing the tag's frequency count. - - @return: an integer containing the tags's frequency count, or None - """ - return self._count - - def setCount(self, count): - """Sets the frequency count of this tag. - - @param count: an integer containing the tag's frequency count - """ - self._count = count - - count = property(getCount, setCount, doc="This tag's frequency count.") - - def __str__(self): - return str(self._value) - - def __unicode__(self): - return unicode(self._value) - - -class Label(Entity): - """Represents a record label. - - A label within MusicBrainz is an L{Entity}. It contains information - about the label like when it was established, its name, label code and - other relationships. All release events may be assigned a label. - """ - TYPE_UNKNOWN = NS_MMD_1 + 'Unknown' - - TYPE_DISTRIBUTOR = NS_MMD_1 + 'Distributor' - TYPE_HOLDING = NS_MMD_1 + 'Holding' - TYPE_PRODUCTION = NS_MMD_1 + 'Production' - - TYPE_ORIGINAL = NS_MMD_1 + 'OriginalProduction' - TYPE_BOOTLEG = NS_MMD_1 + 'BootlegProduction' - TYPE_REISSUE = NS_MMD_1 + 'ReissueProduction' - - def __init__(self, id_=None): - """Constructor. - - @param id_: a string containing an absolute URI - """ - Entity.__init__(self, id_) - self._type = None - self._name = None - self._sortName = None - self._disambiguation = None - self._countryId = None - self._code = None - self._beginDate = None - self._endDate = None - self._aliases = [ ] - - def getType(self): - """Returns the type of this label. - - @return: a string containing an absolute URI - """ - return self._type - - def setType(self, type_): - """Sets the type of this label. - - @param type_: A string containing the absolute URI of the type of label. - """ - self._type = type_ - - type = property(getType, setType, doc='The type of label') - - def getName(self): - """Returns a string with the name of the label. - - @return: a string containing the label's name, or None - """ - return self._name - - def setName(self, name): - """Sets the name of this label. - - @param name: A string containing the name of the label - """ - self._name = name - - name = property(getName, setName, doc='The name of the label.') - - def getSortName(self): - """Returns the label's sort name. - - The sort name is the label's name in a special format which - is better suited for lexicographic sorting. The MusicBrainz - style guide specifies this format. - - @see: U{The MusicBrainz Style Guidelines - } - """ - return self._sortName - - def setSortName(self, sortName): - """Sets the label's sort name. - - @param sortName: a string containing the label's sort name - - @see: L{getSortName} - """ - self._sortName = sortName - - sortName = property(getSortName, setSortName, - doc="The label's sort name.") - - def getDisambiguation(self): - """Returns the disambiguation attribute. - - This attribute may be used if there is more than one label - with the same name. In this case, disambiguation attributes - are added to the labels' names to keep them apart. - - @return: a disambiguation string, or None - - @see: L{getUniqueName} - """ - return self._disambiguation - - def setDisambiguation(self, disambiguation): - """Sets the disambiguation attribute. - - @param disambiguation: a disambiguation string - - @see: L{getDisambiguation}, L{getUniqueName} - """ - self._disambiguation = disambiguation - - disambiguation = property(getDisambiguation, setDisambiguation, - doc="The disambiguation comment.") - - def getUniqueName(self): - """Returns a unique label name (using disambiguation). - - This method returns the label's name together with the - disambiguation attribute in parenthesis if it exists. - - @return: a string containing the unique name - - @see: L{getDisambiguation} - """ - d = self.getDisambiguation() - if d is not None and d.strip() != '': - return '%s (%s)' % (self.getName(), d) - else: - return self.getName() - - def getBeginDate(self): - """Returns the date this label was established. - - @return: A string contained the start date, or None - """ - return self._beginDate - - def setBeginDate(self, date): - """Set the date this label was established. - - @param date: A string in the format of YYYY-MM-DD - """ - self._beginDate = date - - beginDate = property(getBeginDate, setBeginDate, - doc='The date this label was established.') - - def getEndDate(self): - """Returns the date this label closed. - - The returned date has the format 'YYYY', 'YYYY-MM', or - 'YYYY-MM-DD', depending on how much detail is known. - - @return: A string containing the date, or None - """ - return self._endDate - - def setEndDate(self, date): - """Set the date this label closed. - - The date may have the format 'YYYY', 'YYYY-MM', or - 'YYYY-MM-DD', depending on how much detail is known. - - @param date: A string containing the date, or None - """ - self._endDate = date - - endDate = property(getEndDate, setEndDate, - doc='The date this label closed.') - - def getCountry(self): - """Returns the country the label is located. - - @return: a string containing an ISO-3166 country code, or None - - @see: L{musicbrainz2.utils.getCountryName} - """ - return self._countryId - - def setCountry(self, country): - """Sets the country the label is located. - - @param country: a string containing an ISO-3166 country code - """ - self._countryId = country - - country = property(getCountry, setCountry, - doc='The country the label is located.') - - def getCode(self): - """Returns the label code. - - Label codes have been introduced by the IFPI (International - Federation of Phonogram and Videogram Industries) to uniquely - identify record labels. The label code consists of 'LC-' and 4 - figures (currently being extended to 5 figures). - - @return: a string containing the label code, or None - """ - return self._code - - def setCode(self, code): - """Sets the label code. - - @param code: a string containing the label code - """ - self._code = code - - code = property(getCode, setCode, - doc='The label code.') - - def getAliases(self): - """Returns the list of aliases for this label. - - @return: a list of L{LabelAlias} objects - """ - return self._aliases - - aliases = property(getAliases, doc='The list of aliases.') - - def addAlias(self, alias): - """Adds an alias for this label. - - @param alias: a L{LabelAlias} object - """ - self._aliases.append(alias) - - -class Release(Entity): - """Represents a Release. - - A release within MusicBrainz is an L{Entity} which contains L{Track} - objects. Releases may be of more than one type: There can be albums, - singles, compilations, live recordings, official releases, bootlegs - etc. - - @note: The current MusicBrainz server implementation supports only a - limited set of types. - """ - TYPE_NONE = NS_MMD_1 + 'None' - TYPE_NON_ALBUM_TRACKS = NS_MMD_1 + "NonAlbum Track" - - TYPE_ALBUM = NS_MMD_1 + 'Album' - TYPE_SINGLE = NS_MMD_1 + 'Single' - TYPE_EP = NS_MMD_1 + 'EP' - TYPE_COMPILATION = NS_MMD_1 + 'Compilation' - TYPE_SOUNDTRACK = NS_MMD_1 + 'Soundtrack' - TYPE_SPOKENWORD = NS_MMD_1 + 'Spokenword' - TYPE_INTERVIEW = NS_MMD_1 + 'Interview' - TYPE_AUDIOBOOK = NS_MMD_1 + 'Audiobook' - TYPE_LIVE = NS_MMD_1 + 'Live' - TYPE_REMIX = NS_MMD_1 + 'Remix' - TYPE_OTHER = NS_MMD_1 + 'Other' - - TYPE_OFFICIAL = NS_MMD_1 + 'Official' - TYPE_PROMOTION = NS_MMD_1 + 'Promotion' - TYPE_BOOTLEG = NS_MMD_1 + 'Bootleg' - TYPE_PSEUDO_RELEASE = NS_MMD_1 + 'Pseudo-Release' - - def __init__(self, id_=None, title=None): - """Constructor. - - @param id_: a string containing an absolute URI - @param title: a string containing the title - """ - Entity.__init__(self, id_) - self._types = [ ] - self._title = title - self._textLanguage = None - self._textScript = None - self._asin = None - self._artist = None - self._releaseEvents = [ ] - #self._releaseEventsCount = None - self._releaseGroup = None - self._discs = [ ] - #self._discIdsCount = None - self._tracks = [ ] - self._tracksOffset = None - self._tracksCount = None - - - def getTypes(self): - """Returns the types of this release. - - To test for release types, you can use the constants - L{TYPE_ALBUM}, L{TYPE_SINGLE}, etc. - - @return: a list of strings containing absolute URIs - - @see: L{musicbrainz2.utils.getReleaseTypeName} - """ - return self._types - - types = property(getTypes, doc='The list of types for this release.') - - def addType(self, type_): - """Add a type to the list of types. - - @param type_: a string containing absolute URIs - - @see: L{getTypes} - """ - self._types.append(type_) - - def getTitle(self): - """Returns the release's title. - - @return: a string containing the release's title - """ - return self._title - - def setTitle(self, title): - """Sets the release's title. - - @param title: a string containing the release's title, or None - """ - self._title = title - - title = property(getTitle, setTitle, doc='The title of this release.') - - def getTextLanguage(self): - """Returns the language used in release and track titles. - - To represent the language, the ISO-639-2/T standard is used, - which provides three-letter terminological language codes like - 'ENG', 'DEU', 'JPN', 'KOR', 'ZHO' or 'YID'. - - Note that this refers to release and track I{titles}, not - lyrics. - - @return: a string containing the language code, or None - - @see: L{musicbrainz2.utils.getLanguageName} - """ - return self._textLanguage - - def setTextLanguage(self, language): - """Sets the language used in releaes and track titles. - - @param language: a string containing a language code - - @see: L{getTextLanguage} - """ - self._textLanguage = language - - textLanguage = property(getTextLanguage, setTextLanguage, - doc='The language used in release and track titles.') - - def getTextScript(self): - """Returns the script used in release and track titles. - - To represent the script, ISO-15924 script codes are used. - Valid codes are, among others: 'Latn', 'Cyrl', 'Hans', 'Hebr' - - Note that this refers to release and track I{titles}, not - lyrics. - - @return: a string containing the script code, or None - - @see: L{musicbrainz2.utils.getScriptName} - """ - return self._textScript - - def setTextScript(self, script): - """Sets the script used in releaes and track titles. - - @param script: a string containing a script code - - @see: L{getTextScript} - """ - self._textScript = script - - textScript = property(getTextScript, setTextScript, - doc='The script used in release and track titles.') - - def getAsin(self): - """Returns the amazon shop identifier (ASIN). - - The ASIN is a 10-letter code (except for books) assigned - by Amazon, which looks like 'B000002IT2' or 'B00006I4YD'. - - @return: a string containing the ASIN, or None - """ - return self._asin - - def setAsin(self, asin): - """Sets the amazon shop identifier (ASIN). - - @param asin: a string containing the ASIN - - @see: L{getAsin} - """ - self._asin = asin - - asin = property(getAsin, setAsin, doc='The amazon shop identifier.') - - def getArtist(self): - """Returns the main artist of this release. - - @return: an L{Artist} object, or None - """ - return self._artist - - def setArtist(self, artist): - """Sets this release's main artist. - - @param artist: an L{Artist} object - """ - self._artist = artist - - artist = property(getArtist, setArtist, - doc='The main artist of this release.') - - def getReleaseGroup(self): - """Returns the release group to which this release belongs. - - @return: a L{ReleaseGroup} object, or None. - """ - return self._releaseGroup - - def setReleaseGroup(self, releaseGroup): - """Sets the release's release group. - - @param releaseGroup: a L{ReleaseGroup} object, or None. - """ - self._releaseGroup = releaseGroup - - releaseGroup = property(getReleaseGroup, setReleaseGroup, - doc='The release group this release belongs to.') - - def isSingleArtistRelease(self): - """Checks if this is a single artist's release. - - Returns C{True} if the release's main artist (L{getArtist}) is - also the main artist for all of the tracks. This is checked by - comparing the artist IDs. - - Note that the release's artist has to be set (see L{setArtist}) - for this. The track artists may be unset. - - @return: True, if this is a single artist's release - """ - releaseArtist = self.getArtist() - assert releaseArtist is not None, 'Release Artist may not be None!' - for track in self.getTracks(): - if track.getArtist() is None: - continue - if track.getArtist().getId() != releaseArtist.getId(): - return False - - return True - - def getTracks(self): - """Returns the tracks this release contains. - - @return: a list containing L{Track} objects - - @see: L{getTracksOffset}, L{getTracksCount} - """ - return self._tracks - - tracks = property(getTracks, doc='The list of tracks.') - - def addTrack(self, track): - """Adds a track to this release. - - This appends a track at the end of this release's track list. - - @param track: a L{Track} object - """ - self._tracks.append(track) - - def getTracksOffset(self): - """Returns the offset of the track list. - - This is used if the track list is incomplete (ie. the web - service only returned part of the tracks on this release). - Note that the offset value is zero-based, which means track - C{0} is the first track. - - @return: an integer containing the offset, or None - - @see: L{getTracks}, L{getTracksCount} - """ - return self._tracksOffset - - def setTracksOffset(self, offset): - """Sets the offset of the track list. - - @param offset: an integer containing the offset, or None - - @see: L{getTracksOffset}, L{setTracksCount} - """ - self._tracksOffset = offset - - tracksOffset = property(getTracksOffset, setTracksOffset, - doc='The offset of the track list.') - - def getTracksCount(self): - """Returns the number of tracks on this release. - - This may or may not match with the number of elements that - L{getTracks} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setTracksCount}, L{getTracks}, L{getTracksOffset} - """ - return self._tracksCount - - def setTracksCount(self, value): - """Sets the number of tracks on this release. - - @param value: an integer containing the count, or None - - @see: L{getTracksCount}, L{setTracksOffset} - """ - self._tracksCount = value - - tracksCount = property(getTracksCount, setTracksCount, - doc='The total number of releases') - - - def getReleaseEvents(self): - """Returns the list of release events. - - A L{Release} may contain a list of so-called release events, - each represented using a L{ReleaseEvent} object. Release - evens specify where and when this release was, well, released. - - @return: a list of L{ReleaseEvent} objects - - @see: L{getReleaseEventsAsDict} - """ - return self._releaseEvents - - releaseEvents = property(getReleaseEvents, - doc='The list of release events.') - - def addReleaseEvent(self, event): - """Adds a release event to this release. - - @param event: a L{ReleaseEvent} object - - @see: L{getReleaseEvents} - """ - self._releaseEvents.append(event) - - def getReleaseEventsAsDict(self): - """Returns the release events represented as a dict. - - Keys are ISO-3166 country codes like 'DE', 'UK', 'FR' etc. - Values are dates in 'YYYY', 'YYYY-MM' or 'YYYY-MM-DD' format. - - @return: a dict containing (countryCode, date) entries - - @see: L{getReleaseEvents}, L{musicbrainz2.utils.getCountryName} - """ - d = { } - for event in self.getReleaseEvents(): - d[event.getCountry()] = event.getDate() - return d - - def getEarliestReleaseDate(self): - """Returns the earliest release date. - - This favours complete dates. For example, '2006-09' is - returned if there is '2000', too. If there is no release - event associated with this release, None is returned. - - @return: a string containing the date, or None - - @see: L{getReleaseEvents}, L{getReleaseEventsAsDict} - """ - event = self.getEarliestReleaseEvent() - - if event is None: - return None - else: - return event.getDate() - - def getEarliestReleaseEvent(self): - """Returns the earliest release event. - - This works like L{getEarliestReleaseDate}, but instead of - just the date, this returns a L{ReleaseEvent} object. - - @return: a L{ReleaseEvent} object, or None - - @see: L{getReleaseEvents}, L{getEarliestReleaseDate} - """ - dates = [ ] - for event in self.getReleaseEvents(): - date = event.getDate() - if len(date) == 10: # 'YYYY-MM-DD' - dates.append( (date, event) ) - elif len(date) == 7: # 'YYYY-MM' - dates.append( (date + '-99', event) ) - else: - dates.append( (date + '-99-99', event) ) - - dates.sort(lambda x, y: cmp(x[0], y[0])) - - if len(dates) > 0: - return dates[0][1] - else: - return None - - - #def getReleaseEventsCount(self): - # """Returns the number of release events. - # - # This may or may not match with the number of elements that - # getReleaseEvents() returns. If the count is higher than - # the list, it indicates that the list is incomplete. - # """ - # return self._releaseEventsCount - - #def setReleaseEventsCount(self, value): - # self._releaseEventsCount = value - - def getDiscs(self): - """Returns the discs associated with this release. - - Discs are currently containers for MusicBrainz DiscIDs. - Note that under rare circumstances (identical TOCs), a - DiscID could be associated with more than one release. - - @return: a list of L{Disc} objects - """ - return self._discs - - discs = property(getDiscs, doc='The list of associated discs.') - - def addDisc(self, disc): - """Adds a disc to this release. - - @param disc: a L{Disc} object - """ - self._discs.append(disc) - - #def getDiscIdsCount(self): - # return self._discIdsCount - - #def setDiscIdsCount(self, value): - # self._discIdsCount = value - - -class ReleaseGroup(Entity): - """Represents a ReleaseGroup. - - A ReleaseGroup in MusicBrainz is an L{Entity} which groups several different - versions of L{Release} objects (e.g., different editions of the same album). - - @see: L{Release} - @see: L{Entity} - """ - - def __init__(self, id_=None, title=None): - """Constructor. - - @param id_: a string containing an absolute URI - @param title: a string containing the title - """ - Entity.__init__(self, id_) - self._title = title - self._id = id_ - self._type = None - self._releases = [ ] - self._artist = None - self._releasesOffset = 0 - self._releasesCount = 0 - - def getType(self): - """Returns the type of this release group. - - To test for release types, you can use the constants - L{Release.TYPE_ALBUM}, L{Release.TYPE_SINGLE}, etc. - - @return: a string containing an absolute URI, or None. - - @see: L{musicbrainz2.utils.getReleaseTypeName} - """ - return self._type - - def setType(self, type_): - """Sets the type of this release group. - - Use a constant from the L{Release} class, such as - L{Release.TYPE_ALBUM} or L{Release.TYPE_SINGLE} to - set the value. - - @param type_: a string containing an absolute URI, or None. - - @see: L{musicbrainz2.utils.getReleaseTypeName} - """ - self._type = type_ - - type = property(getType, setType, - doc = 'The type of this release group.') - - def getReleases(self): - """Gets the releases in this release group. - - @return: a list of L{Release} objects - @see: L{Release} - """ - return self._releases - - releases = property(getReleases, - doc = 'The list of releases in this release group.') - - def addRelease(self, release): - """Adds a L{Release} to this release group. - - @param release: a L{Release} object - """ - self._releases.append(release) - - def getReleasesOffset(self): - """Returns the offset of the release list. - - This is used if the release list is incomplete (i.e., the web - service only returned a portion of the releases in this release - group). - - @return: an integer containing the offset, or None. - @see: L{getReleases}, L{getReleasesCount} - """ - return self._releasesOffset - - def setReleasesOffset(self, offset): - """Sets the offset of the release list. - - @param offset: an integer containing the offset, or None. - @see: L{getReleases}, L{getReleasesOffset} - """ - self._releasesOffset = offset - - releasesOffset = property(getReleasesOffset, setReleasesOffset, - doc='The offset of the release list.') - - def getReleasesCount(self): - """Returns the number of releases in this release group. - - This may or may not match the number of elements returned by - L{getReleases}. If the count is higher than the length of that - list, then the list is incomplete. - - @return: an integer containing the count, or None - @see: L{getReleases}, L{setReleasesCount}, L{getReleasesOffset} - """ - return self._releasesCount - - def setReleasesCount(self, value): - """Sets the number of releases in this release group. - - @param value: an integer containing the count, or None. - @see: L{getReleases}, L{getReleasesCount}, L{getReleasesOffset} - """ - self._releasesCount = value - - releasesCount = property(getReleasesCount, setReleasesCount, - doc = 'The total number of releases') - - def getTitle(self): - """Returns this release group's title. - - @return: a string containing the release group's title - """ - return self._title - - def setTitle(self, title): - """Sets the release group's title. - - @param title: a string containing the release group's title. - """ - self._title = title - - title = property(getTitle, setTitle, - doc = 'The title of this release group.') - - def getArtist(self): - """Returns the main artist of this release group. - - @return: an L{Artist} object, or None - """ - return self._artist - - def setArtist(self, artist): - """Sets the release group's main artist. - - @param artist: an L{Artist} object - """ - self._artist = artist - - artist = property(getArtist, setArtist, - doc = 'The main artist of this release group') - - -class Track(Entity): - """Represents a track. - - This class represents a track which may appear on one or more releases. - A track may be associated with exactly one artist (the I{main} artist). - - Using L{getReleases}, you can find out on which releases this track - appears. To get the track number, too, use the - L{Release.getTracksOffset} method. - - @note: Currently, the MusicBrainz server doesn't support tracks to - be on more than one release. - - @see: L{Release}, L{Artist} - """ - def __init__(self, id_=None, title=None): - """Constructor. - - @param id_: a string containing an absolute URI - @param title: a string containing the title - """ - Entity.__init__(self, id_) - self._title = title - self._artist = None - self._duration = None - self._puids = [ ] - self._releases = [ ] - self._isrcs = [ ] - - def getTitle(self): - """Returns the track's title. - - The style and format of this attribute is specified by the - style guide. - - @return: a string containing the title, or None - - @see: U{The MusicBrainz Style Guidelines - } - """ - return self._title - - def setTitle(self, title): - """Sets the track's title. - - @param title: a string containing the title - - @see: L{getTitle} - """ - self._title = title - - title = property(getTitle, setTitle, doc="The track's title.") - - def getArtist(self): - """Returns the main artist of this track. - - @return: an L{Artist} object, or None - """ - return self._artist - - def setArtist(self, artist): - """Sets this track's main artist. - - @param artist: an L{Artist} object - """ - self._artist = artist - - artist = property(getArtist, setArtist, doc="The track's main artist.") - - def getDuration(self): - """Returns the duration of this track in milliseconds. - - @return: an int containing the duration in milliseconds, or None - """ - return self._duration - - def setDuration(self, duration): - """Sets the duration of this track in milliseconds. - - @param duration: an int containing the duration in milliseconds - """ - self._duration = duration - - duration = property(getDuration, setDuration, - doc='The duration in milliseconds.') - - def getDurationSplit(self): - """Returns the duration as a (minutes, seconds) tuple. - - If no duration is set, (0, 0) is returned. Seconds are - rounded towards the ceiling if at least 500 milliseconds - are left. - - @return: a (minutes, seconds) tuple, both entries being ints - """ - duration = self.getDuration() - if duration is None: - return (0, 0) - else: - seconds = int( round(duration / 1000.0) ) - return (seconds / 60, seconds % 60) - - def getPuids(self): - """Returns the PUIDs associated with this track. - - Please note that a PUID may be associated with more than one - track. - - @return: a list of strings, each containing one PUID - """ - return self._puids - - puids = property(getPuids, doc='The list of associated PUIDs.') - - def addPuid(self, puid): - """Add a PUID to this track. - - @param puid: a string containing a PUID - """ - self._puids.append(puid) - - def getISRCs(self): - """Returns the ISRCs associated with this track. - - @return: a list of strings, each containing one ISRC - """ - return self._isrcs - - isrcs = property(getISRCs, doc='The list of associated ISRCs') - - def addISRC(self, isrc): - """Add a ISRC to this track. - - @param isrc: a string containing an ISRC - """ - self._isrcs.append(isrc) - - def getReleases(self): - """Returns the list of releases this track appears on. - - @return: a list of L{Release} objects - """ - return self._releases - - releases = property(getReleases, - doc='The releases on which this track appears.') - - def addRelease(self, release): - """Add a release on which this track appears. - - @param release: a L{Release} object - """ - self._releases.append(release) - - -class Relation(object): - """Represents a relation between two Entities. - - There may be an arbitrary number of relations between all first - class objects in MusicBrainz. The Relation itself has multiple - attributes, which may or may not be used for a given relation - type. - - Note that a L{Relation} object only contains the target but not - the source end of the relation. - - @todo: Add some examples. - - @cvar TO_ARTIST: Identifies relations linking to an artist. - @cvar TO_RELEASE: Identifies relations linking to a release. - @cvar TO_TRACK: Identifies relations linking to a track. - @cvar TO_URL: Identifies relations linking to an URL. - - @cvar DIR_NONE: Relation reading direction doesn't matter. - @cvar DIR_FORWARD: Relation reading direction is from source to target. - @cvar DIR_BACKWARD: Relation reading direction is from target to source. - @cvar DIR_BOTH: Relation reading direction doesn't matter (no longer used!). - """ - # Relation target types - # - TO_ARTIST = NS_REL_1 + 'Artist' - TO_RELEASE = NS_REL_1 + 'Release' - TO_TRACK = NS_REL_1 + 'Track' - TO_URL = NS_REL_1 + 'Url' - - # Relation reading directions - # - DIR_BOTH = 'both' - DIR_FORWARD = 'forward' - DIR_BACKWARD = 'backward' - DIR_NONE = 'none' - - def __init__(self, relationType=None, targetType=None, targetId=None, - direction=DIR_NONE, attributes=None, - beginDate=None, endDate=None, target=None): - """Constructor. - - @param relationType: a string containing an absolute URI - @param targetType: a string containing an absolute URI - @param targetId: a string containing an absolute URI - @param direction: one of C{Relation.DIR_FORWARD}, - C{Relation.DIR_BACKWARD}, or C{Relation.DIR_NONE} - @param attributes: a list of strings containing absolute URIs - @param beginDate: a string containing a date - @param endDate: a string containing a date - @param target: an instance of a subclass of L{Entity} - """ - self._relationType = relationType - self._targetType = targetType - self._targetId = targetId - self._direction = direction - self._beginDate = beginDate - self._endDate = endDate - self._target = target - self._attributes = attributes - if self._attributes is None: - self._attributes = [ ] - - def getType(self): - """Returns this relation's type. - - @return: a string containing an absolute URI, or None - """ - return self._relationType - - def setType(self, type_): - """Sets this relation's type. - - @param type_: a string containing an absolute URI - """ - self._relationType = type_ - - type = property(getType, setType, doc="The relation's type.") - - def getTargetId(self): - """Returns the target's ID. - - This is the ID the relation points to. It is an absolute - URI, and in case of an URL relation, it is a URL. - - @return: a string containing an absolute URI - """ - return self._targetId - - def setTargetId(self, targetId): - """Sets the target's ID. - - @param targetId: a string containing an absolute URI - - @see: L{getTargetId} - """ - self._targetId = targetId - - targetId = property(getTargetId, setTargetId, doc="The target's ID.") - - def getTargetType(self): - """Returns the target's type. - - For MusicBrainz data, the following target types are defined: - - artists: L{Relation.TO_ARTIST} - - releases: L{Relation.TO_RELEASE} - - tracks: L{Relation.TO_TRACK} - - urls: L{Relation.TO_URL} - - @return: a string containing an absolute URI - """ - return self._targetType - - def setTargetType(self, targetType): - """Sets the target's type. - - @param targetType: a string containing an absolute URI - - @see: L{getTargetType} - """ - self._targetType = targetType - - targetId = property(getTargetId, setTargetId, - doc="The type of target this relation points to.") - - def getAttributes(self): - """Returns a list of attributes describing this relation. - - The attributes permitted depend on the relation type. - - @return: a list of strings containing absolute URIs - """ - return self._attributes - - attributes = property(getAttributes, - doc='The list of attributes describing this relation.') - - def addAttribute(self, attribute): - """Adds an attribute to the list. - - @param attribute: a string containing an absolute URI - """ - self._attributes.append(attribute) - - def getBeginDate(self): - """Returns the begin date. - - The definition depends on the relation's type. It may for - example be the day of a marriage or the year an artist - joined a band. For other relation types this may be - undefined. - - @return: a string containing a date - """ - return self._beginDate - - def setBeginDate(self, dateStr): - """Sets the begin date. - - @param dateStr: a string containing a date - - @see: L{getBeginDate} - """ - self._beginDate = dateStr - - beginDate = property(getBeginDate, setBeginDate, doc="The begin date.") - - def getEndDate(self): - """Returns the end date. - - As with the begin date, the definition depends on the - relation's type. Depending on the relation type, this may - or may not be defined. - - @return: a string containing a date - - @see: L{getBeginDate} - """ - return self._endDate - - def setEndDate(self, dateStr): - """Sets the end date. - - @param dateStr: a string containing a date - - @see: L{getBeginDate} - """ - self._endDate = dateStr - - endDate = property(getEndDate, setEndDate, doc="The end date.") - - def getDirection(self): - """Returns the reading direction. - - The direction may be one of L{Relation.DIR_FORWARD}, - L{Relation.DIR_BACKWARD}, or L{Relation.DIR_NONE}, - depending on how the relation should be read. For example, - if direction is L{Relation.DIR_FORWARD} for a cover relation, - it is read as "X is a cover of Y". For some relations there is - no reading direction (like marriages) and the web service doesn't - send a direction. In these cases, the direction is set to - L{Relation.DIR_NONE}. - - @return: L{Relation.DIR_FORWARD}, L{Relation.DIR_BACKWARD}, - or L{Relation.DIR_NONE} - """ - return self._direction - - def setDirection(self, direction): - """Sets the reading direction. - - @param direction: L{Relation.DIR_FORWARD}, - L{Relation.DIR_BACKWARD}, or L{Relation.DIR_NONE} - - @see: L{getDirection} - """ - self._direction = direction - - direction = property(getDirection, setDirection, - doc="The reading direction.") - - def getTarget(self): - """Returns this relation's target object. - - Note that URL relations never have a target object. Use the - L{getTargetId} method to get the URL. - - @return: a subclass of L{Entity}, or None - """ - return self._target - - def setTarget(self, target): - """Sets this relation's target object. - - Note that URL relations never have a target object, they - are set using L{setTargetId}. - - @param target: a subclass of L{Entity} - """ - self._target = target - - target = property(getTarget, setTarget, - doc="The relation's target object.") - - -class ReleaseEvent(object): - """A release event, indicating where and when a release took place. - - All country codes used must be valid ISO-3166 country codes (i.e. 'DE', - 'UK' or 'FR'). The dates are strings and must have the format 'YYYY', - 'YYYY-MM' or 'YYYY-MM-DD'. - - The format of the release medium is a URI that can be compared to the - constants on this class (L{FORMAT_CD}, L{FORMAT_DVD} and others). - """ - FORMAT_CD = NS_MMD_1 + 'CD' - FORMAT_DVD = NS_MMD_1 + 'DVD' - FORMAT_SACD = NS_MMD_1 + 'SACD' - FORMAT_DUALDISC = NS_MMD_1 + 'DualDisc' - FORMAT_LASERDISC = NS_MMD_1 + 'LaserDisc' - FORMAT_MINIDISC = NS_MMD_1 + 'MiniDisc' - FORMAT_VINYL = NS_MMD_1 + 'Vinyl' - FORMAT_CASSETTE = NS_MMD_1 + 'Cassette' - FORMAT_CARTRIDGE = NS_MMD_1 + 'Cartridge' - FORMAT_REEL_TO_REEL = NS_MMD_1 + 'ReelToReel' - FORMAT_DAT = NS_MMD_1 + 'DAT' - FORMAT_DIGITAL = NS_MMD_1 + 'Digital' - FORMAT_WAX_CYLINDER = NS_MMD_1 + 'WaxCylinder' - FORMAT_PIANO_ROLL = NS_MMD_1 + 'PianoRoll' - FORMAT_OTHER = NS_MMD_1 + 'Other' - - def __init__(self, country=None, dateStr=None): - """Constructor. - - @param country: a string containing an ISO-3166 country code - @param dateStr: a string containing a date string - """ - self._countryId = country - self._dateStr = dateStr - self._catalogNumber = None - self._barcode = None - self._label = None - self._format = None - - def getCountry(self): - """Returns the country a release took place. - - @note: Due to a server limitation, the web service does not - return country IDs for release collection queries. This only - affects the L{musicbrainz2.webservice.Query.getReleases} query. - - @return: a string containing an ISO-3166 country code, or None - - @see: L{musicbrainz2.utils.getCountryName} - """ - return self._countryId - - def setCountry(self, country): - """Sets the country a release took place. - - @param country: a string containing an ISO-3166 country code - """ - self._countryId = country - - country = property(getCountry, setCountry, - doc='The country a release took place.') - - def getCatalogNumber(self): - """Returns the catalog number of this release event. - - @return: A string containing the catalog number, or None - """ - return self._catalogNumber - - def setCatalogNumber(self, catalogNumber): - """Sets the catalog number of this release event. - - @param catalogNumber: A string containing the catalog number - """ - self._catalogNumber = catalogNumber - - catalogNumber = property(getCatalogNumber, setCatalogNumber, - doc='The catalog number of the release event') - - def getBarcode(self): - """Returns the barcode of this release event. - - @return: A string containing the barcode, or None - """ - return self._barcode - - def setBarcode(self, barcode): - """Sets the barcode of this release event. - - @param barcode: A string containing the barcode - """ - self._barcode = barcode - - barcode = property(getBarcode, setBarcode, - doc='The barcode of the release event') - - def getLabel(self): - """Returns a L{Label} object for the label associated with this release. - - @return: a L{Label} object, or None - """ - return self._label - - def setLabel(self, label): - """Sets the label of this release event. - - @param label: A L{Label} object - """ - self._label = label - - label = property(getLabel, setLabel, doc='The label of the release') - - def getDate(self): - """Returns the date a release took place. - - @return: a string containing a date - """ - return self._dateStr - - def setDate(self, dateStr): - """Sets the date a release took place. - - @param dateStr: a string containing a date - """ - self._dateStr = dateStr - - date = property(getDate, setDate, doc='The date a release took place.') - - def getFormat(self): - """Returns the format of the release medium. - - @return: a string containing a URI, or None - """ - return self._format - - def setFormat(self, format): - """Sets the format of the release medium. - - @param format: a string containing a URI - """ - self._format = format - - format = property(getFormat, setFormat, - doc='The format of the release medium.') - - -class CDStub(object): - """Represents a CD Stub""" - - def __init__(self, disc): - """Constructor. - - @param disc: a L{Disc} object to create this CD Stub from - """ - assert isinstance(disc, Disc), 'musicbrainz2.model.Disc expected' - self._disc = disc - self._tracks = [ ] - self._title = "" - self._artist = "" - self._barcode = "" - self._comment = "" - - def setTitle(self, title): - """Sets the title of this release. - - @param title: a string containing the title - """ - self._title = title - - def getTitle(self): - """Returns the title of this release. - - @return: a string containing the title - """ - return self._title - - title = property(getTitle, setTitle, - doc='The title of the release') - - def setArtist(self, artist): - """Sets the artist of this release. - - @param artist: a string containing the artist - """ - self._artist = artist - - def getArtist(self): - """Returns the artist of this release. - - @return: a string containing the artist - """ - return self._artist - - artist = property(getArtist, setArtist, - doc='The artist of the release') - - def setComment(self, comment): - """Sets the comment for this release. - - @param comment: a string containing the comment - """ - self._comment = comment - - def getComment(self): - """Returns the comment for this release. - - @return: a string containing the comment - """ - return self._comment - - comment = property(getComment, setComment, - doc='Comment for the release (optional)') - - def setBarcode(self, barcode): - """Sets the barcode of this release. - - @param barcode: a string containing the barcode - """ - self._barcode = barcode - - def getBarcode(self): - """Returns the barcode of this release. - - @return: a string containing the barcode - """ - return self._barcode - - barcode = property(getBarcode, setBarcode, - doc='Barcode for the release (optional)') - - def addTrack(self, title, artist=''): - """Add a track to this release - - @param title: a string containing the title of the track - @param artist: a string containing the artist of the track, - if different to the album artist - """ - self._tracks.append((title, artist)) - - def getTracks(self): - """Return all the tracks on the release. - - @return: a list of tuples containing (title, artist) pairs - for each track - """ - return self._tracks - - tracks = property(getTracks, doc='The tracks of the release.') - -class Disc(object): - """Represents an Audio CD. - - This class represents an Audio CD. A disc can have an ID (the - MusicBrainz DiscID), which is calculated from the CD's table of - contents (TOC). There may also be data from the TOC like the length - of the disc in sectors, as well as position and length of the tracks. - - Note that different TOCs, maybe due to different pressings, lead to - different DiscIDs. Conversely, if two different discs have the same - TOC, they also have the same DiscID (which is unlikely but not - impossible). DiscIDs are always 28 characters long and look like this: - C{'J68I_CDcUFdCRCIbHSEbTBCbooA-'}. Sometimes they are also referred - to as CDIndex IDs. - - The L{MusicBrainz web service } only returns - the DiscID and the number of sectors. The DiscID calculation function - L{musicbrainz2.disc.readDisc}, however, can retrieve the other - attributes of L{Disc} from an Audio CD in the disc drive. - """ - def __init__(self, id_=None): - """Constructor. - - @param id_: a string containing a 28-character DiscID - """ - self._id = id_ - self._sectors = None - self._firstTrackNum = None - self._lastTrackNum = None - self._tracks = [ ] - - def getId(self): - """Returns the MusicBrainz DiscID. - - @return: a string containing a 28-character DiscID - """ - return self._id - - def setId(self, id_): - """Sets the MusicBrainz DiscId. - - @param id_: a string containing a 28-character DiscID - """ - self._id = id_ - - id = property(getId, setId, doc="The MusicBrainz DiscID.") - - def getSectors(self): - """Returns the length of the disc in sectors. - - @return: the length in sectors as an integer, or None - """ - return self._sectors - - def setSectors(self, sectors): - """Sets the length of the disc in sectors. - - @param sectors: the length in sectors as an integer - """ - self._sectors = sectors - - sectors = property(getSectors, setSectors, - doc="The length of the disc in sectors.") - - def getFirstTrackNum(self): - """Returns the number of the first track on this disc. - - @return: an int containing the track number, or None - """ - return self._firstTrackNum - - def setFirstTrackNum(self, trackNum): - """Sets the number of the first track on this disc. - - @param trackNum: an int containing the track number, or None - """ - self._firstTrackNum = trackNum - - firstTrackNum = property(getFirstTrackNum, setFirstTrackNum, - doc="The number of the first track on this disc.") - - def getLastTrackNum(self): - """Returns the number of the last track on this disc. - - @return: an int containing the track number, or None - """ - return self._lastTrackNum - - def setLastTrackNum(self, trackNum): - """Sets the number of the last track on this disc. - - @param trackNum: an int containing the track number, or None - """ - self._lastTrackNum = trackNum - - lastTrackNum = property(getLastTrackNum, setLastTrackNum, - doc="The number of the last track on this disc.") - - def getTracks(self): - """Returns the sector offset and length of this disc. - - This method returns a list of tuples containing the track - offset and length in sectors for all tracks on this disc. - The track offset is measured from the beginning of the disc, - the length is relative to the track's offset. Note that the - leadout track is I{not} included. - - @return: a list of (offset, length) tuples (values are ints) - """ - return self._tracks - - tracks = property(getTracks, - doc='Sector offset and length of all tracks.') - - def addTrack(self, track): - """Adds a track to the list. - - This method adds an (offset, length) tuple to the list of - tracks. The leadout track must I{not} be added. The total - length of the disc can be set using L{setSectors}. - - @param track: an (offset, length) tuple (values are ints) - - @see: L{getTracks} - """ - self._tracks.append(track) - - -class AbstractAlias(object): - """An abstract super class for all alias classes.""" - def __init__(self, value=None, type_=None, script=None): - """Constructor. - - @param value: a string containing the alias - @param type_: a string containing an absolute URI - @param script: a string containing an ISO-15924 script code - """ - self._value = value - self._type = type_ - self._script = script - - def getValue(self): - """Returns the alias. - - @return: a string containing the alias - """ - return self._value - - def setValue(self, value): - """Sets the alias. - - @param value: a string containing the alias - """ - self._value = value - - value = property(getValue, setValue, doc='The alias value.') - - def getType(self): - """Returns the alias type. - - @return: a string containing an absolute URI, or None - """ - return self._type - - def setType(self, type_): - """Sets the alias type. - - @param type_: a string containing an absolute URI, or None - """ - self._type = type_ - - type = property(getType, setType, doc='The alias type.') - - def getScript(self): - """Returns the alias script. - - @return: a string containing an ISO-15924 script code - """ - return self._script - - def setScript(self, script): - """Sets the alias script. - - @param script: a string containing an ISO-15924 script code - """ - self._script = script - - script = property(getScript, setScript, doc='The alias script.') - - -class ArtistAlias(AbstractAlias): - """Represents an artist alias. - - An alias (the I{alias value}) is a different representation of an - artist's name. This may be a common misspelling or a transliteration - (the I{alias type}). - - The I{alias script} is interesting mostly for transliterations and - indicates which script is used for the alias value. To represent the - script, ISO-15924 script codes like 'Latn', 'Cyrl', or 'Hebr' are used. - """ - pass - - -class LabelAlias(AbstractAlias): - """Represents a label alias. - - An alias (the I{alias value}) is a different representation of a - label's name. This may be a common misspelling or a transliteration - (the I{alias type}). - - The I{alias script} is interesting mostly for transliterations and - indicates which script is used for the alias value. To represent the - script, ISO-15924 script codes like 'Latn', 'Cyrl', or 'Hebr' are used. - """ - pass - - -class User(object): - """Represents a MusicBrainz user.""" - - def __init__(self): - """Constructor.""" - self._name = None - self._types = [ ] - self._showNag = None - - def getName(self): - """Returns the user name. - - @return: a string containing the user name - """ - return self._name - - def setName(self, name): - """Sets the user name. - - @param name: a string containing the user name - """ - self._name = name - - name = property(getName, setName, doc='The MusicBrainz user name.') - - def getTypes(self): - """Returns the types of this user. - - Most users' type list is empty. Currently, the following types - are defined: - - - 'http://musicbrainz.org/ns/ext-1.0#AutoEditor' - - 'http://musicbrainz.org/ns/ext-1.0#RelationshipEditor' - - 'http://musicbrainz.org/ns/ext-1.0#Bot' - - 'http://musicbrainz.org/ns/ext-1.0#NotNaggable' - - @return: a list of strings containing absolute URIs - """ - return self._types - - types = property(getTypes, doc="The user's types.") - - def addType(self, type_): - """Add a type to the list of types. - - @param type_: a string containing absolute URIs - - @see: L{getTypes} - """ - self._types.append(type_) - - def getShowNag(self): - """Returns true if a nag screen should be displayed to the user. - - @return: C{True}, C{False}, or None - """ - return self._showNag - - def setShowNag(self, value): - """Sets the value of the nag screen flag. - - If set to C{True}, - - @param value: C{True} or C{False} - - @see: L{getShowNag} - """ - self._showNag = value - - showNag = property(getShowNag, setShowNag, - doc='The value of the nag screen flag.') - -# EOF diff --git a/musicbrainz2/utils.py b/musicbrainz2/utils.py deleted file mode 100644 index 0eff7be8..00000000 --- a/musicbrainz2/utils.py +++ /dev/null @@ -1,204 +0,0 @@ -"""Various utilities to simplify common tasks. - -This module contains helper functions to make common tasks easier. - -@author: Matthias Friedrich -""" -__revision__ = '$Id: utils.py 11853 2009-07-21 09:26:50Z luks $' - -import re -import urlparse -import os.path - -__all__ = [ - 'extractUuid', 'extractFragment', 'extractEntityType', - 'getReleaseTypeName', 'getCountryName', 'getLanguageName', - 'getScriptName', -] - - -# A pattern to split the path part of an absolute MB URI. -PATH_PATTERN = '^/(artist|release|track|label|release-group)/([^/]*)$' - - -def extractUuid(uriStr, resType=None): - """Extract the UUID part from a MusicBrainz identifier. - - This function takes a MusicBrainz ID (an absolute URI) as the input - and returns the UUID part of the URI, thus turning it into a relative - URI. If C{uriStr} is None or a relative URI, then it is returned - unchanged. - - The C{resType} parameter can be used for error checking. Set it to - 'artist', 'release', or 'track' to make sure C{uriStr} is a - syntactically valid MusicBrainz identifier of the given resource - type. If it isn't, a C{ValueError} exception is raised. - This error checking only works if C{uriStr} is an absolute URI, of - course. - - Example: - - >>> from musicbrainz2.utils import extractUuid - >>> extractUuid('http://musicbrainz.org/artist/c0b2500e-0cef-4130-869d-732b23ed9df5', 'artist') - 'c0b2500e-0cef-4130-869d-732b23ed9df5' - >>> - - @param uriStr: a string containing a MusicBrainz ID (an URI), or None - @param resType: a string containing a resource type - - @return: a string containing a relative URI, or None - - @raise ValueError: the given URI is no valid MusicBrainz ID - """ - if uriStr is None: - return None - - (scheme, netloc, path) = urlparse.urlparse(uriStr)[:3] - - if scheme == '': - return uriStr # no URI, probably already the UUID - - if scheme != 'http' or netloc != 'musicbrainz.org': - raise ValueError('%s is no MB ID.' % uriStr) - - m = re.match(PATH_PATTERN, path) - - if m: - if resType is None: - return m.group(2) - else: - if m.group(1) == resType: - return m.group(2) - else: - raise ValueError('expected "%s" Id' % resType) - else: - raise ValueError('%s is no valid MB ID.' % uriStr) - - -def extractFragment(uriStr, uriPrefix=None): - """Extract the fragment part from a URI. - - If C{uriStr} is None or no absolute URI, then it is returned unchanged. - - The C{uriPrefix} parameter can be used for error checking. If C{uriStr} - is an absolute URI, then the function checks if it starts with - C{uriPrefix}. If it doesn't, a C{ValueError} exception is raised. - - @param uriStr: a string containing an absolute URI - @param uriPrefix: a string containing an URI prefix - - @return: a string containing the fragment, or None - - @raise ValueError: the given URI doesn't start with C{uriPrefix} - """ - if uriStr is None: - return None - - (scheme, netloc, path, params, query, frag) = urlparse.urlparse(uriStr) - if scheme == '': - return uriStr # this is no URI - - if uriPrefix is None or uriStr.startswith(uriPrefix): - return frag - else: - raise ValueError("prefix doesn't match URI %s" % uriStr) - - -def extractEntityType(uriStr): - """Returns the entity type an entity URI is referring to. - - @param uriStr: a string containing an absolute entity URI - - @return: a string containing 'artist', 'release', 'track', or 'label' - - @raise ValueError: if the given URI is no valid MusicBrainz ID - """ - if uriStr is None: - raise ValueError('None is no valid entity URI') - - (scheme, netloc, path) = urlparse.urlparse(uriStr)[:3] - - if scheme == '': - raise ValueError('%s is no absolute MB ID.' % uriStr) - - if scheme != 'http' or netloc != 'musicbrainz.org': - raise ValueError('%s is no MB ID.' % uriStr) - - m = re.match(PATH_PATTERN, path) - - if m: - return m.group(1) - else: - raise ValueError('%s is no valid MB ID.' % uriStr) - - -def getReleaseTypeName(releaseType): - """Returns the name of a release type URI. - - @param releaseType: a string containing a release type URI - - @return: a string containing a printable name for the release type - - @see: L{musicbrainz2.model.Release} - """ - from musicbrainz2.data.releasetypenames import releaseTypeNames - return releaseTypeNames.get(releaseType) - - -def getCountryName(id_): - """Returns a country's name based on an ISO-3166 country code. - - The country table this function is based on has been modified for - MusicBrainz purposes by using the extension mechanism defined in - ISO-3166. All IDs are still valid ISO-3166 country codes, but some - IDs have been added to include historic countries and some of the - country names have been modified to make them better suited for - display purposes. - - If the country ID is not found, None is returned. This may happen - for example, when new countries are added to the MusicBrainz web - service which aren't known to this library yet. - - @param id_: a two-letter upper case string containing an ISO-3166 code - - @return: a string containing the country's name, or None - - @see: L{musicbrainz2.model} - """ - from musicbrainz2.data.countrynames import countryNames - return countryNames.get(id_) - - -def getLanguageName(id_): - """Returns a language name based on an ISO-639-2/T code. - - This function uses a subset of the ISO-639-2/T code table to map - language IDs (terminologic, not bibliographic ones!) to names. - - @param id_: a three-letter upper case string containing an ISO-639-2/T code - - @return: a string containing the language's name, or None - - @see: L{musicbrainz2.model} - """ - from musicbrainz2.data.languagenames import languageNames - return languageNames.get(id_) - - -def getScriptName(id_): - """Returns a script name based on an ISO-15924 code. - - This function uses a subset of the ISO-15924 code table to map - script IDs to names. - - @param id_: a four-letter string containing an ISO-15924 script code - - @return: a string containing the script's name, or None - - @see: L{musicbrainz2.model} - """ - from musicbrainz2.data.scriptnames import scriptNames - return scriptNames.get(id_) - - -# EOF diff --git a/musicbrainz2/webservice.py b/musicbrainz2/webservice.py deleted file mode 100644 index a869530d..00000000 --- a/musicbrainz2/webservice.py +++ /dev/null @@ -1,1519 +0,0 @@ -"""Classes for interacting with the MusicBrainz XML web service. - -The L{WebService} class talks to a server implementing the MusicBrainz XML -web service. It mainly handles URL generation and network I/O. Use this -if maximum control is needed. - -The L{Query} class provides a convenient interface to the most commonly -used features of the web service. By default it uses L{WebService} to -retrieve data and the L{XML parser } to parse the -responses. The results are object trees using the L{MusicBrainz domain -model }. - -@author: Matthias Friedrich -""" -__revision__ = '$Id: webservice.py 12973 2011-04-29 11:49:31Z luks $' - -import re -import urllib -import urllib2 -import urlparse -import logging -import os.path -from StringIO import StringIO -import musicbrainz2 -from musicbrainz2.model import Artist, Release, Track -from musicbrainz2.wsxml import MbXmlParser, ParseError -import musicbrainz2.utils as mbutils - -__all__ = [ - 'WebServiceError', 'AuthenticationError', 'ConnectionError', - 'RequestError', 'ResourceNotFoundError', 'ResponseError', - 'IIncludes', 'ArtistIncludes', 'ReleaseIncludes', 'TrackIncludes', - 'LabelIncludes', 'ReleaseGroupIncludes', - 'IFilter', 'ArtistFilter', 'ReleaseFilter', 'TrackFilter', - 'UserFilter', 'LabelFilter', 'ReleaseGroupFilter', - 'IWebService', 'WebService', 'Query', -] - - -class IWebService(object): - """An interface all concrete web service classes have to implement. - - All web service classes have to implement this and follow the - method specifications. - """ - - def get(self, entity, id_, include, filter, version): - """Query the web service. - - Using this method, you can either get a resource by id (using - the C{id_} parameter, or perform a query on all resources of - a type. - - The C{filter} and the C{id_} parameter exclude each other. If - you are using a filter, you may not set C{id_} and vice versa. - - Returns a file-like object containing the result or raises a - L{WebServiceError} or one of its subclasses in case of an - error. Which one is used depends on the implementing class. - - @param entity: a string containing the entity's name - @param id_: a string containing a UUID, or the empty string - @param include: a tuple containing values for the 'inc' parameter - @param filter: parameters, depending on the entity - @param version: a string containing the web service version to use - - @return: a file-like object - - @raise WebServiceError: in case of errors - """ - raise NotImplementedError() - - - def post(self, entity, id_, data, version): - """Submit data to the web service. - - @param entity: a string containing the entity's name - @param id_: a string containing a UUID, or the empty string - @param data: A string containing the data to post - @param version: a string containing the web service version to use - - @return: a file-like object - - @raise WebServiceError: in case of errors - """ - raise NotImplementedError() - - -class WebServiceError(Exception): - """A web service error has occurred. - - This is the base class for several other web service related - exceptions. - """ - - def __init__(self, msg='Webservice Error', reason=None): - """Constructor. - - Set C{msg} to an error message which explains why this - exception was raised. The C{reason} parameter should be the - original exception which caused this L{WebService} exception - to be raised. If given, it has to be an instance of - C{Exception} or one of its child classes. - - @param msg: a string containing an error message - @param reason: another exception instance, or None - """ - Exception.__init__(self) - self.msg = msg - self.reason = reason - - def __str__(self): - """Makes this class printable. - - @return: a string containing an error message - """ - return self.msg - - -class ConnectionError(WebServiceError): - """Getting a server connection failed. - - This exception is mostly used if the client couldn't connect to - the server because of an invalid host name or port. It doesn't - make sense if the web service in question doesn't use the network. - """ - pass - - -class RequestError(WebServiceError): - """An invalid request was made. - - This exception is raised if the client made an invalid request. - That could be syntactically invalid identifiers or unknown or - invalid parameter values. - """ - pass - - -class ResourceNotFoundError(WebServiceError): - """No resource with the given ID exists. - - This is usually a wrapper around IOError (which is superclass of - HTTPError). - """ - pass - - -class AuthenticationError(WebServiceError): - """Authentication failed. - - This is thrown if user name, password or realm were invalid while - trying to access a protected resource. - """ - pass - - -class ResponseError(WebServiceError): - """The returned resource was invalid. - - This may be due to a malformed XML document or if the requested - data wasn't part of the response. It can only occur in case of - bugs in the web service itself. - """ - pass - -class DigestAuthHandler(urllib2.HTTPDigestAuthHandler): - """Patched DigestAuthHandler to correctly handle Digest Auth according to RFC 2617. - - This will allow multiple qop values in the WWW-Authenticate header (e.g. "auth,auth-int"). - The only supported qop value is still auth, though. - See http://bugs.python.org/issue9714 - - @author Kuno Woudt - """ - def get_authorization(self, req, chal): - qop = chal.get('qop') - if qop and ',' in qop and 'auth' in qop.split(','): - chal['qop'] = 'auth' - - return urllib2.HTTPDigestAuthHandler.get_authorization(self, req, chal) - -class WebService(IWebService): - """An interface to the MusicBrainz XML web service via HTTP. - - By default, this class uses the MusicBrainz server but may be - configured for accessing other servers as well using the - L{constructor <__init__>}. This implements L{IWebService}, so - additional documentation on method parameters can be found there. - """ - - def __init__(self, host='musicbrainz.org', port=80, pathPrefix='/ws', - username=None, password=None, realm='musicbrainz.org', - opener=None): - """Constructor. - - This can be used without parameters. In this case, the - MusicBrainz server will be used. - - @param host: a string containing a host name - @param port: an integer containing a port number - @param pathPrefix: a string prepended to all URLs - @param username: a string containing a MusicBrainz user name - @param password: a string containing the user's password - @param realm: a string containing the realm used for authentication - @param opener: an C{urllib2.OpenerDirector} object used for queries - """ - self._host = host - self._port = port - self._username = username - self._password = password - self._realm = realm - self._pathPrefix = pathPrefix - self._log = logging.getLogger(str(self.__class__)) - - if opener is None: - self._opener = urllib2.build_opener() - else: - self._opener = opener - - passwordMgr = self._RedirectPasswordMgr() - authHandler = DigestAuthHandler(passwordMgr) - authHandler.add_password(self._realm, (), # no host set - self._username, self._password) - self._opener.add_handler(authHandler) - - - def _makeUrl(self, entity, id_, include=( ), filter={ }, - version='1', type_='xml'): - params = dict(filter) - if type_ is not None: - params['type'] = type_ - if len(include) > 0: - params['inc'] = ' '.join(include) - - netloc = self._host - if self._port != 80: - netloc += ':' + str(self._port) - path = '/'.join((self._pathPrefix, version, entity, id_)) - - query = urllib.urlencode(params) - - url = urlparse.urlunparse(('http', netloc, path, '', query,'')) - - return url - - - def _openUrl(self, url, data=None): - userAgent = 'python-musicbrainz/' + musicbrainz2.__version__ - req = urllib2.Request(url) - req.add_header('User-Agent', userAgent) - return self._opener.open(req, data) - - - def get(self, entity, id_, include=( ), filter={ }, version='1'): - """Query the web service via HTTP-GET. - - Returns a file-like object containing the result or raises a - L{WebServiceError}. Conditions leading to errors may be - invalid entities, IDs, C{include} or C{filter} parameters - and unsupported version numbers. - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid IDs or parameters - @raise AuthenticationError: invalid user name and/or password - @raise ResourceNotFoundError: resource doesn't exist - - @see: L{IWebService.get} - """ - url = self._makeUrl(entity, id_, include, filter, version) - - self._log.debug('GET ' + url) - - try: - return self._openUrl(url) - except urllib2.HTTPError, e: - self._log.debug("GET failed: " + str(e)) - if e.code == 400: # in python 2.4: httplib.BAD_REQUEST - raise RequestError(str(e), e) - elif e.code == 401: # httplib.UNAUTHORIZED - raise AuthenticationError(str(e), e) - elif e.code == 404: # httplib.NOT_FOUND - raise ResourceNotFoundError(str(e), e) - else: - raise WebServiceError(str(e), e) - except urllib2.URLError, e: - self._log.debug("GET failed: " + str(e)) - raise ConnectionError(str(e), e) - - - def post(self, entity, id_, data, version='1'): - """Send data to the web service via HTTP-POST. - - Note that this may require authentication. You can set - user name, password and realm in the L{constructor <__init__>}. - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid IDs or parameters - @raise AuthenticationError: invalid user name and/or password - @raise ResourceNotFoundError: resource doesn't exist - - @see: L{IWebService.post} - """ - url = self._makeUrl(entity, id_, version=version, type_=None) - - self._log.debug('POST ' + url) - self._log.debug('POST-BODY: ' + data) - - try: - return self._openUrl(url, data) - except urllib2.HTTPError, e: - self._log.debug("POST failed: " + str(e)) - if e.code == 400: # in python 2.4: httplib.BAD_REQUEST - raise RequestError(str(e), e) - elif e.code == 401: # httplib.UNAUTHORIZED - raise AuthenticationError(str(e), e) - elif e.code == 404: # httplib.NOT_FOUND - raise ResourceNotFoundError(str(e), e) - else: - raise WebServiceError(str(e), e) - except urllib2.URLError, e: - self._log.debug("POST failed: " + str(e)) - raise ConnectionError(str(e), e) - - - # Special password manager which also works with redirects by simply - # ignoring the URI. As a consequence, only *ONE* (username, password) - # tuple per realm can be used for all URIs. - # - class _RedirectPasswordMgr(urllib2.HTTPPasswordMgr): - def __init__(self): - self._realms = { } - - def find_user_password(self, realm, uri): - # ignoring the uri parameter intentionally - try: - return self._realms[realm] - except KeyError: - return (None, None) - - def add_password(self, realm, uri, username, password): - # ignoring the uri parameter intentionally - self._realms[realm] = (username, password) - - -class IFilter(object): - """A filter for collections. - - This is the interface all filters have to implement. Filter classes - are initialized with a set of criteria and are then applied to - collections of items. The criteria are usually strings or integer - values, depending on the filter. - - Note that all strings passed to filters should be unicode strings - (python type C{unicode}). Standard strings are converted to unicode - internally, but have a limitation: Only 7 Bit pure ASCII characters - may be used, otherwise a C{UnicodeDecodeError} is raised. - """ - def createParameters(self): - """Create a list of query parameters. - - This method creates a list of (C{parameter}, C{value}) tuples, - based on the contents of the implementing subclass. - C{parameter} is a string containing a parameter name - and C{value} an arbitrary string. No escaping of those strings - is required. - - @return: a sequence of (key, value) pairs - """ - raise NotImplementedError() - - -class ArtistFilter(IFilter): - """A filter for the artist collection.""" - - def __init__(self, name=None, limit=None, offset=None, query=None): - """Constructor. - - The C{query} parameter may contain a query in U{Lucene syntax - }. - Note that the C{name} and C{query} may not be used together. - - @param name: a unicode string containing the artist's name - @param limit: the maximum number of artists to return - @param offset: start results at this zero-based offset - @param query: a string containing a query in Lucene syntax - """ - self._params = [ - ('name', name), - ('limit', limit), - ('offset', offset), - ('query', query), - ] - - if not _paramsValid(self._params): - raise ValueError('invalid combination of parameters') - - def createParameters(self): - return _createParameters(self._params) - - -class LabelFilter(IFilter): - """A filter for the label collection.""" - - def __init__(self, name=None, limit=None, offset=None, query=None): - """Constructor. - - The C{query} parameter may contain a query in U{Lucene syntax - }. - Note that the C{name} and C{query} may not be used together. - - @param name: a unicode string containing the label's name - @param limit: the maximum number of labels to return - @param offset: start results at this zero-based offset - @param query: a string containing a query in Lucene syntax - """ - self._params = [ - ('name', name), - ('limit', limit), - ('offset', offset), - ('query', query), - ] - - if not _paramsValid(self._params): - raise ValueError('invalid combination of parameters') - - def createParameters(self): - return _createParameters(self._params) - -class ReleaseGroupFilter(IFilter): - """A filter for the release group collection.""" - - def __init__(self, title=None, releaseTypes=None, artistName=None, - artistId=None, limit=None, offset=None, query=None): - """Constructor. - - If C{artistId} is set, only releases matching those IDs are - returned. The C{releaseTypes} parameter allows you to limit - the types of the release groups returned. You can set it to - C{(Release.TYPE_ALBUM, Release.TYPE_OFFICIAL)}, for example, - to only get officially released albums. Note that those values - are connected using the I{AND} operator. MusicBrainz' support - is currently very limited, so C{Release.TYPE_LIVE} and - C{Release.TYPE_COMPILATION} exclude each other (see U{the - documentation on release attributes - } for more - information and all valid values). - - If both the C{artistName} and the C{artistId} parameter are - given, the server will ignore C{artistName}. - - The C{query} parameter may contain a query in U{Lucene syntax - }. - Note that C{query} may not be used together with the other - parameters except for C{limit} and C{offset}. - - @param title: a unicode string containing the release group's title - @param releaseTypes: a sequence of release type URIs - @param artistName: a unicode string containing the artist's name - @param artistId: a unicode string containing the artist's ID - @param limit: the maximum number of release groups to return - @param offset: start results at this zero-based offset - @param query: a string containing a query in Lucene syntax - - @see: the constants in L{musicbrainz2.model.Release} - """ - if releaseTypes is None or len(releaseTypes) == 0: - releaseTypesStr = None - else: - releaseTypesStr = ' '.join(map(mbutils.extractFragment, releaseTypes)) - - self._params = [ - ('title', title), - ('releasetypes', releaseTypesStr), - ('artist', artistName), - ('artistid', mbutils.extractUuid(artistId)), - ('limit', limit), - ('offset', offset), - ('query', query), - ] - - if not _paramsValid(self._params): - raise ValueError('invalid combination of parameters') - - def createParameters(self): - return _createParameters(self._params) - - -class ReleaseFilter(IFilter): - """A filter for the release collection.""" - - def __init__(self, title=None, discId=None, releaseTypes=None, - artistName=None, artistId=None, limit=None, - offset=None, query=None, trackCount=None): - """Constructor. - - If C{discId} or C{artistId} are set, only releases matching - those IDs are returned. The C{releaseTypes} parameter allows - to limit the types of the releases returned. You can set it to - C{(Release.TYPE_ALBUM, Release.TYPE_OFFICIAL)}, for example, - to only get officially released albums. Note that those values - are connected using the I{AND} operator. MusicBrainz' support - is currently very limited, so C{Release.TYPE_LIVE} and - C{Release.TYPE_COMPILATION} exclude each other (see U{the - documentation on release attributes - } for more - information and all valid values). - - If both the C{artistName} and the C{artistId} parameter are - given, the server will ignore C{artistName}. - - The C{query} parameter may contain a query in U{Lucene syntax - }. - Note that C{query} may not be used together with the other - parameters except for C{limit} and C{offset}. - - @param title: a unicode string containing the release's title - @param discId: a unicode string containing the DiscID - @param releaseTypes: a sequence of release type URIs - @param artistName: a unicode string containing the artist's name - @param artistId: a unicode string containing the artist's ID - @param limit: the maximum number of releases to return - @param offset: start results at this zero-based offset - @param query: a string containing a query in Lucene syntax - @param trackCount: the number of tracks in the release - - @see: the constants in L{musicbrainz2.model.Release} - """ - if releaseTypes is None or len(releaseTypes) == 0: - releaseTypesStr = None - else: - tmp = [ mbutils.extractFragment(x) for x in releaseTypes ] - releaseTypesStr = ' '.join(tmp) - - self._params = [ - ('title', title), - ('discid', discId), - ('releasetypes', releaseTypesStr), - ('artist', artistName), - ('artistid', mbutils.extractUuid(artistId)), - ('limit', limit), - ('offset', offset), - ('query', query), - ('count', trackCount), - ] - - if not _paramsValid(self._params): - raise ValueError('invalid combination of parameters') - - def createParameters(self): - return _createParameters(self._params) - - -class TrackFilter(IFilter): - """A filter for the track collection.""" - - def __init__(self, title=None, artistName=None, artistId=None, - releaseTitle=None, releaseId=None, - duration=None, puid=None, limit=None, offset=None, - query=None): - """Constructor. - - If C{artistId}, C{releaseId} or C{puid} are set, only tracks - matching those IDs are returned. - - The server will ignore C{artistName} and C{releaseTitle} if - C{artistId} or ${releaseId} are set respectively. - - The C{query} parameter may contain a query in U{Lucene syntax - }. - Note that C{query} may not be used together with the other - parameters except for C{limit} and C{offset}. - - @param title: a unicode string containing the track's title - @param artistName: a unicode string containing the artist's name - @param artistId: a string containing the artist's ID - @param releaseTitle: a unicode string containing the release's title - @param releaseId: a string containing the release's title - @param duration: the track's length in milliseconds - @param puid: a string containing a PUID - @param limit: the maximum number of releases to return - @param offset: start results at this zero-based offset - @param query: a string containing a query in Lucene syntax - """ - self._params = [ - ('title', title), - ('artist', artistName), - ('artistid', mbutils.extractUuid(artistId)), - ('release', releaseTitle), - ('releaseid', mbutils.extractUuid(releaseId)), - ('duration', duration), - ('puid', puid), - ('limit', limit), - ('offset', offset), - ('query', query), - ] - - if not _paramsValid(self._params): - raise ValueError('invalid combination of parameters') - - def createParameters(self): - return _createParameters(self._params) - - -class UserFilter(IFilter): - """A filter for the user collection.""" - - def __init__(self, name=None): - """Constructor. - - @param name: a unicode string containing a MusicBrainz user name - """ - self._name = name - - def createParameters(self): - if self._name is not None: - return [ ('name', self._name.encode('utf-8')) ] - else: - return [ ] - - -class IIncludes(object): - """An interface implemented by include tag generators.""" - def createIncludeTags(self): - raise NotImplementedError() - - -class ArtistIncludes(IIncludes): - """A specification on how much data to return with an artist. - - Example: - - >>> from musicbrainz2.model import Release - >>> from musicbrainz2.webservice import ArtistIncludes - >>> inc = ArtistIncludes(artistRelations=True, releaseRelations=True, - ... releases=(Release.TYPE_ALBUM, Release.TYPE_OFFICIAL)) - >>> - - The MusicBrainz server only supports some combinations of release - types for the C{releases} and C{vaReleases} include tags. At the - moment, not more than two release types should be selected, while - one of them has to be C{Release.TYPE_OFFICIAL}, - C{Release.TYPE_PROMOTION} or C{Release.TYPE_BOOTLEG}. - - @note: Only one of C{releases} and C{vaReleases} may be given. - """ - def __init__(self, aliases=False, releases=(), vaReleases=(), - artistRelations=False, releaseRelations=False, - trackRelations=False, urlRelations=False, tags=False, - ratings=False, releaseGroups=False): - - assert not isinstance(releases, basestring) - assert not isinstance(vaReleases, basestring) - assert len(releases) == 0 or len(vaReleases) == 0 - - self._includes = { - 'aliases': aliases, - 'artist-rels': artistRelations, - 'release-groups': releaseGroups, - 'release-rels': releaseRelations, - 'track-rels': trackRelations, - 'url-rels': urlRelations, - 'tags': tags, - 'ratings': ratings, - } - - for elem in releases: - self._includes['sa-' + mbutils.extractFragment(elem)] = True - - for elem in vaReleases: - self._includes['va-' + mbutils.extractFragment(elem)] = True - - def createIncludeTags(self): - return _createIncludes(self._includes) - - -class ReleaseIncludes(IIncludes): - """A specification on how much data to return with a release.""" - def __init__(self, artist=False, counts=False, releaseEvents=False, - discs=False, tracks=False, - artistRelations=False, releaseRelations=False, - trackRelations=False, urlRelations=False, - labels=False, tags=False, ratings=False, isrcs=False, - releaseGroup=False): - self._includes = { - 'artist': artist, - 'counts': counts, - 'labels': labels, - 'release-groups': releaseGroup, - 'release-events': releaseEvents, - 'discs': discs, - 'tracks': tracks, - 'artist-rels': artistRelations, - 'release-rels': releaseRelations, - 'track-rels': trackRelations, - 'url-rels': urlRelations, - 'tags': tags, - 'ratings': ratings, - 'isrcs': isrcs, - } - - # Requesting labels without releaseEvents makes no sense, - # so we pull in releaseEvents, if necessary. - if labels and not releaseEvents: - self._includes['release-events'] = True - # Ditto for isrcs with no tracks - if isrcs and not tracks: - self._includes['tracks'] = True - - def createIncludeTags(self): - return _createIncludes(self._includes) - - -class ReleaseGroupIncludes(IIncludes): - """A specification on how much data to return with a release group.""" - - def __init__(self, artist=False, releases=False, tags=False): - """Constructor. - - @param artist: Whether to include the release group's main artist info. - @param releases: Whether to include the release group's releases. - """ - self._includes = { - 'artist': artist, - 'releases': releases, - } - - def createIncludeTags(self): - return _createIncludes(self._includes) - - -class TrackIncludes(IIncludes): - """A specification on how much data to return with a track.""" - def __init__(self, artist=False, releases=False, puids=False, - artistRelations=False, releaseRelations=False, - trackRelations=False, urlRelations=False, tags=False, - ratings=False, isrcs=False): - self._includes = { - 'artist': artist, - 'releases': releases, - 'puids': puids, - 'artist-rels': artistRelations, - 'release-rels': releaseRelations, - 'track-rels': trackRelations, - 'url-rels': urlRelations, - 'tags': tags, - 'ratings': ratings, - 'isrcs': isrcs, - } - - def createIncludeTags(self): - return _createIncludes(self._includes) - - -class LabelIncludes(IIncludes): - """A specification on how much data to return with a label.""" - def __init__(self, aliases=False, tags=False, ratings=False): - self._includes = { - 'aliases': aliases, - 'tags': tags, - 'ratings': ratings, - } - - def createIncludeTags(self): - return _createIncludes(self._includes) - - -class Query(object): - """A simple interface to the MusicBrainz web service. - - This is a facade which provides a simple interface to the MusicBrainz - web service. It hides all the details like fetching data from a server, - parsing the XML and creating an object tree. Using this class, you can - request data by ID or search the I{collection} of all resources - (artists, releases, or tracks) to retrieve those matching given - criteria. This document contains examples to get you started. - - - Working with Identifiers - ======================== - - MusicBrainz uses absolute URIs as identifiers. For example, the artist - 'Tori Amos' is identified using the following URI:: - http://musicbrainz.org/artist/c0b2500e-0cef-4130-869d-732b23ed9df5 - - In some situations it is obvious from the context what type of - resource an ID refers to. In these cases, abbreviated identifiers may - be used, which are just the I{UUID} part of the URI. Thus the ID above - may also be written like this:: - c0b2500e-0cef-4130-869d-732b23ed9df5 - - All methods in this class which require IDs accept both the absolute - URI and the abbreviated form (aka the relative URI). - - - Creating a Query Object - ======================= - - In most cases, creating a L{Query} object is as simple as this: - - >>> import musicbrainz2.webservice as ws - >>> q = ws.Query() - >>> - - The instantiated object uses the standard L{WebService} class to - access the MusicBrainz web service. If you want to use a different - server or you have to pass user name and password because one of - your queries requires authentication, you have to create the - L{WebService} object yourself and configure it appropriately. - This example uses the MusicBrainz test server and also sets - authentication data: - - >>> import musicbrainz2.webservice as ws - >>> service = ws.WebService(host='test.musicbrainz.org', - ... username='whatever', password='secret') - >>> q = ws.Query(service) - >>> - - - Querying for Individual Resources - ================================= - - If the MusicBrainz ID of a resource is known, then the L{getArtistById}, - L{getReleaseById}, or L{getTrackById} method can be used to retrieve - it. Example: - - >>> import musicbrainz2.webservice as ws - >>> q = ws.Query() - >>> artist = q.getArtistById('c0b2500e-0cef-4130-869d-732b23ed9df5') - >>> artist.name - u'Tori Amos' - >>> artist.sortName - u'Amos, Tori' - >>> print artist.type - http://musicbrainz.org/ns/mmd-1.0#Person - >>> - - This returned just the basic artist data, however. To get more detail - about a resource, the C{include} parameters may be used which expect - an L{ArtistIncludes}, L{ReleaseIncludes}, or L{TrackIncludes} object, - depending on the resource type. - - To get data about a release which also includes the main artist - and all tracks, for example, the following query can be used: - - >>> import musicbrainz2.webservice as ws - >>> q = ws.Query() - >>> releaseId = '33dbcf02-25b9-4a35-bdb7-729455f33ad7' - >>> include = ws.ReleaseIncludes(artist=True, tracks=True) - >>> release = q.getReleaseById(releaseId, include) - >>> release.title - u'Tales of a Librarian' - >>> release.artist.name - u'Tori Amos' - >>> release.tracks[0].title - u'Precious Things' - >>> - - Note that the query gets more expensive for the server the more - data you request, so please be nice. - - - Searching in Collections - ======================== - - For each resource type (artist, release, and track), there is one - collection which contains all resources of a type. You can search - these collections using the L{getArtists}, L{getReleases}, and - L{getTracks} methods. The collections are huge, so you have to - use filters (L{ArtistFilter}, L{ReleaseFilter}, or L{TrackFilter}) - to retrieve only resources matching given criteria. - - For example, If you want to search the release collection for - releases with a specified DiscID, you would use L{getReleases} - and a L{ReleaseFilter} object: - - >>> import musicbrainz2.webservice as ws - >>> q = ws.Query() - >>> filter = ws.ReleaseFilter(discId='8jJklE258v6GofIqDIrE.c5ejBE-') - >>> results = q.getReleases(filter=filter) - >>> results[0].score - 100 - >>> results[0].release.title - u'Under the Pink' - >>> - - The query returns a list of results (L{wsxml.ReleaseResult} objects - in this case), which are ordered by score, with a higher score - indicating a better match. Note that those results don't contain - all the data about a resource. If you need more detail, you can then - use the L{getArtistById}, L{getReleaseById}, or L{getTrackById} - methods to request the resource. - - All filters support the C{limit} argument to limit the number of - results returned. This defaults to 25, but the server won't send - more than 100 results to save bandwidth and processing power. Using - C{limit} and the C{offset} parameter, you can page through the - results. - - - Error Handling - ============== - - All methods in this class raise a L{WebServiceError} exception in case - of errors. Depending on the method, a subclass of L{WebServiceError} may - be raised which allows an application to handle errors more precisely. - The following example handles connection errors (invalid host name - etc.) separately and all other web service errors in a combined - catch clause: - - >>> try: - ... artist = q.getArtistById('c0b2500e-0cef-4130-869d-732b23ed9df5') - ... except ws.ConnectionError, e: - ... pass # implement your error handling here - ... except ws.WebServiceError, e: - ... pass # catches all other web service errors - ... - >>> - """ - - def __init__(self, ws=None, wsFactory=WebService, clientId=None): - """Constructor. - - The C{ws} parameter has to be a subclass of L{IWebService}. - If it isn't given, the C{wsFactory} parameter is used to - create an L{IWebService} subclass. - - If the constructor is called without arguments, an instance - of L{WebService} is used, preconfigured to use the MusicBrainz - server. This should be enough for most users. - - If you want to use queries which require authentication you - have to pass a L{WebService} instance where user name and - password have been set. - - The C{clientId} parameter is required for data submission. - The format is C{'application-version'}, where C{application} - is your application's name and C{version} is a version - number which may not include a '-' character. - - @param ws: a subclass instance of L{IWebService}, or None - @param wsFactory: a callable object which creates an object - @param clientId: a unicode string containing the application's ID - """ - if ws is None: - self._ws = wsFactory() - else: - self._ws = ws - - self._clientId = clientId - self._log = logging.getLogger(str(self.__class__)) - - - def getArtistById(self, id_, include=None): - """Returns an artist. - - If no artist with that ID can be found, C{include} contains - invalid tags or there's a server problem, an exception is - raised. - - @param id_: a string containing the artist's ID - @param include: an L{ArtistIncludes} object, or None - - @return: an L{Artist } object, or None - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResourceNotFoundError: artist doesn't exist - @raise ResponseError: server returned invalid data - """ - uuid = mbutils.extractUuid(id_, 'artist') - result = self._getFromWebService('artist', uuid, include) - artist = result.getArtist() - if artist is not None: - return artist - else: - raise ResponseError("server didn't return artist") - - - def getArtists(self, filter): - """Returns artists matching given criteria. - - @param filter: an L{ArtistFilter} object - - @return: a list of L{musicbrainz2.wsxml.ArtistResult} objects - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResponseError: server returned invalid data - """ - result = self._getFromWebService('artist', '', filter=filter) - return result.getArtistResults() - - def getLabelById(self, id_, include=None): - """Returns a L{model.Label} - - If no label with that ID can be found, or there is a server problem, - an exception is raised. - - @param id_: a string containing the label's ID. - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResourceNotFoundError: release doesn't exist - @raise ResponseError: server returned invalid data - """ - uuid = mbutils.extractUuid(id_, 'label') - result = self._getFromWebService('label', uuid, include) - label = result.getLabel() - if label is not None: - return label - else: - raise ResponseError("server didn't return a label") - - def getLabels(self, filter): - result = self._getFromWebService('label', '', filter=filter) - return result.getLabelResults() - - def getReleaseById(self, id_, include=None): - """Returns a release. - - If no release with that ID can be found, C{include} contains - invalid tags or there's a server problem, and exception is - raised. - - @param id_: a string containing the release's ID - @param include: a L{ReleaseIncludes} object, or None - - @return: a L{Release } object, or None - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResourceNotFoundError: release doesn't exist - @raise ResponseError: server returned invalid data - """ - uuid = mbutils.extractUuid(id_, 'release') - result = self._getFromWebService('release', uuid, include) - release = result.getRelease() - if release is not None: - return release - else: - raise ResponseError("server didn't return release") - - - def getReleases(self, filter): - """Returns releases matching given criteria. - - @param filter: a L{ReleaseFilter} object - - @return: a list of L{musicbrainz2.wsxml.ReleaseResult} objects - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResponseError: server returned invalid data - """ - result = self._getFromWebService('release', '', filter=filter) - return result.getReleaseResults() - - def getReleaseGroupById(self, id_, include=None): - """Returns a release group. - - If no release group with that ID can be found, C{include} - contains invalid tags, or there's a server problem, an - exception is raised. - - @param id_: a string containing the release group's ID - @param include: a L{ReleaseGroupIncludes} object, or None - - @return: a L{ReleaseGroup } object, or None - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResourceNotFoundError: release doesn't exist - @raise ResponseError: server returned invalid data - """ - uuid = mbutils.extractUuid(id_, 'release-group') - result = self._getFromWebService('release-group', uuid, include) - releaseGroup = result.getReleaseGroup() - if releaseGroup is not None: - return releaseGroup - else: - raise ResponseError("server didn't return releaseGroup") - - def getReleaseGroups(self, filter): - """Returns release groups matching the given criteria. - - @param filter: a L{ReleaseGroupFilter} object - - @return: a list of L{musicbrainz2.wsxml.ReleaseGroupResult} objects - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResponseError: server returned invalid data - """ - result = self._getFromWebService('release-group', '', filter=filter) - return result.getReleaseGroupResults() - - def getTrackById(self, id_, include=None): - """Returns a track. - - If no track with that ID can be found, C{include} contains - invalid tags or there's a server problem, an exception is - raised. - - @param id_: a string containing the track's ID - @param include: a L{TrackIncludes} object, or None - - @return: a L{Track } object, or None - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResourceNotFoundError: track doesn't exist - @raise ResponseError: server returned invalid data - """ - uuid = mbutils.extractUuid(id_, 'track') - result = self._getFromWebService('track', uuid, include) - track = result.getTrack() - if track is not None: - return track - else: - raise ResponseError("server didn't return track") - - - def getTracks(self, filter): - """Returns tracks matching given criteria. - - @param filter: a L{TrackFilter} object - - @return: a list of L{musicbrainz2.wsxml.TrackResult} objects - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise ResponseError: server returned invalid data - """ - result = self._getFromWebService('track', '', filter=filter) - return result.getTrackResults() - - - def getUserByName(self, name): - """Returns information about a MusicBrainz user. - - You can only request user data if you know the user name and - password for that account. If username and/or password are - incorrect, an L{AuthenticationError} is raised. - - See the example in L{Query} on how to supply user name and - password. - - @param name: a unicode string containing the user's name - - @return: a L{User } object - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or include tags - @raise AuthenticationError: invalid user name and/or password - @raise ResourceNotFoundError: track doesn't exist - @raise ResponseError: server returned invalid data - """ - filter = UserFilter(name=name) - result = self._getFromWebService('user', '', None, filter) - - if len(result.getUserList()) > 0: - return result.getUserList()[0] - else: - raise ResponseError("response didn't contain user data") - - - def _getFromWebService(self, entity, id_, include=None, filter=None): - if filter is None: - filterParams = [ ] - else: - filterParams = filter.createParameters() - - if include is None: - includeParams = [ ] - else: - includeParams = include.createIncludeTags() - - stream = self._ws.get(entity, id_, includeParams, filterParams) - try: - parser = MbXmlParser() - return parser.parse(stream) - except ParseError, e: - raise ResponseError(str(e), e) - - - def submitPuids(self, tracks2puids): - """Submit track to PUID mappings. - - The C{tracks2puids} parameter has to be a dictionary, with the - keys being MusicBrainz track IDs (either as absolute URIs or - in their 36 character ASCII representation) and the values - being PUIDs (ASCII, 36 characters). - - Note that this method only works if a valid user name and - password have been set. See the example in L{Query} on how - to supply authentication data. - - @param tracks2puids: a dictionary mapping track IDs to PUIDs - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid track or PUIDs - @raise AuthenticationError: invalid user name and/or password - """ - assert self._clientId is not None, 'Please supply a client ID' - params = [ ] - params.append( ('client', self._clientId.encode('utf-8')) ) - - for (trackId, puid) in tracks2puids.iteritems(): - trackId = mbutils.extractUuid(trackId, 'track') - params.append( ('puid', trackId + ' ' + puid) ) - - encodedStr = urllib.urlencode(params, True) - - self._ws.post('track', '', encodedStr) - - def submitISRCs(self, tracks2isrcs): - """Submit track to ISRC mappings. - - The C{tracks2isrcs} parameter has to be a dictionary, with the - keys being MusicBrainz track IDs (either as absolute URIs or - in their 36 character ASCII representation) and the values - being ISRCs (ASCII, 12 characters). - - Note that this method only works if a valid user name and - password have been set. See the example in L{Query} on how - to supply authentication data. - - @param tracks2isrcs: a dictionary mapping track IDs to ISRCs - - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid track or ISRCs - @raise AuthenticationError: invalid user name and/or password - """ - params = [ ] - - for (trackId, isrc) in tracks2isrcs.iteritems(): - trackId = mbutils.extractUuid(trackId, 'track') - params.append( ('isrc', trackId + ' ' + isrc) ) - - encodedStr = urllib.urlencode(params, True) - - self._ws.post('track', '', encodedStr) - - def addToUserCollection(self, releases): - """Add releases to a user's collection. - - The releases parameter must be a list. It can contain either L{Release} - objects or a string representing a MusicBrainz release ID (either as - absolute URIs or in their 36 character ASCII representation). - - Adding a release that is already in the collection has no effect. - - @param releases: a list of releases to add to the user collection - - @raise ConnectionError: couldn't connect to server - @raise AuthenticationError: invalid user name and/or password - """ - rels = [] - for release in releases: - if isinstance(release, Release): - rels.append(mbutils.extractUuid(release.id)) - else: - rels.append(mbutils.extractUuid(release)) - encodedStr = urllib.urlencode({'add': ",".join(rels)}, True) - self._ws.post('collection', '', encodedStr) - - def removeFromUserCollection(self, releases): - """Remove releases from a user's collection. - - The releases parameter must be a list. It can contain either L{Release} - objects or a string representing a MusicBrainz release ID (either as - absolute URIs or in their 36 character ASCII representation). - - Removing a release that is not in the collection has no effect. - - @param releases: a list of releases to remove from the user collection - - @raise ConnectionError: couldn't connect to server - @raise AuthenticationError: invalid user name and/or password - """ - rels = [] - for release in releases: - if isinstance(release, Release): - rels.append(mbutils.extractUuid(release.id)) - else: - rels.append(mbutils.extractUuid(release)) - encodedStr = urllib.urlencode({'remove': ",".join(rels)}, True) - self._ws.post('collection', '', encodedStr) - - def getUserCollection(self, offset=0, maxitems=100): - """Get the releases that are in a user's collection - - A maximum of 100 items will be returned for any one call - to this method. To fetch more than 100 items, use the offset - parameter. - - @param offset: the offset to start fetching results from - @param maxitems: the upper limit on items to return - - @return: a list of L{musicbrainz2.wsxml.ReleaseResult} objects - - @raise ConnectionError: couldn't connect to server - @raise AuthenticationError: invalid user name and/or password - """ - params = { 'offset': offset, 'maxitems': maxitems } - - stream = self._ws.get('collection', '', filter=params) - print stream - try: - parser = MbXmlParser() - result = parser.parse(stream) - except ParseError, e: - raise ResponseError(str(e), e) - - return result.getReleaseResults() - - def submitUserTags(self, entityUri, tags): - """Submit folksonomy tags for an entity. - - Note that all previously existing tags from the authenticated - user are replaced with the ones given to this method. Other - users' tags are not affected. - - @param entityUri: a string containing an absolute MB ID - @param tags: A list of either L{Tag } objects - or strings - - @raise ValueError: invalid entityUri - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID, entity or tags - @raise AuthenticationError: invalid user name and/or password - """ - entity = mbutils.extractEntityType(entityUri) - uuid = mbutils.extractUuid(entityUri, entity) - params = ( - ('type', 'xml'), - ('entity', entity), - ('id', uuid), - ('tags', ','.join([unicode(tag).encode('utf-8') for tag in tags])) - ) - - encodedStr = urllib.urlencode(params) - - self._ws.post('tag', '', encodedStr) - - - def getUserTags(self, entityUri): - """Returns a list of folksonomy tags a user has applied to an entity. - - The given parameter has to be a fully qualified MusicBrainz ID, as - returned by other library functions. - - Note that this method only works if a valid user name and - password have been set. Only the tags the authenticated user - applied to the entity will be returned. If username and/or - password are incorrect, an AuthenticationError is raised. - - This method will return a list of L{Tag } - objects. - - @param entityUri: a string containing an absolute MB ID - - @raise ValueError: invalid entityUri - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or entity - @raise AuthenticationError: invalid user name and/or password - """ - entity = mbutils.extractEntityType(entityUri) - uuid = mbutils.extractUuid(entityUri, entity) - params = { 'entity': entity, 'id': uuid } - - stream = self._ws.get('tag', '', filter=params) - try: - parser = MbXmlParser() - result = parser.parse(stream) - except ParseError, e: - raise ResponseError(str(e), e) - - return result.getTagList() - - def submitUserRating(self, entityUri, rating): - """Submit rating for an entity. - - Note that all previously existing rating from the authenticated - user are replaced with the one given to this method. Other - users' ratings are not affected. - - @param entityUri: a string containing an absolute MB ID - @param rating: A L{Rating } object - or integer - - @raise ValueError: invalid entityUri - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID, entity or tags - @raise AuthenticationError: invalid user name and/or password - """ - entity = mbutils.extractEntityType(entityUri) - uuid = mbutils.extractUuid(entityUri, entity) - params = ( - ('type', 'xml'), - ('entity', entity), - ('id', uuid), - ('rating', unicode(rating).encode('utf-8')) - ) - - encodedStr = urllib.urlencode(params) - - self._ws.post('rating', '', encodedStr) - - - def getUserRating(self, entityUri): - """Return the rating a user has applied to an entity. - - The given parameter has to be a fully qualified MusicBrainz - ID, as returned by other library functions. - - Note that this method only works if a valid user name and - password have been set. Only the rating the authenticated user - applied to the entity will be returned. If username and/or - password are incorrect, an AuthenticationError is raised. - - This method will return a L{Rating } - object. - - @param entityUri: a string containing an absolute MB ID - - @raise ValueError: invalid entityUri - @raise ConnectionError: couldn't connect to server - @raise RequestError: invalid ID or entity - @raise AuthenticationError: invalid user name and/or password - """ - entity = mbutils.extractEntityType(entityUri) - uuid = mbutils.extractUuid(entityUri, entity) - params = { 'entity': entity, 'id': uuid } - - stream = self._ws.get('rating', '', filter=params) - try: - parser = MbXmlParser() - result = parser.parse(stream) - except ParseError, e: - raise ResponseError(str(e), e) - - return result.getRating() - - def submitCDStub(self, cdstub): - """Submit a CD Stub to the database. - - The number of tracks added to the CD Stub must match the TOC and DiscID - otherwise the submission wil fail. The submission will also fail if - the Disc ID is already in the MusicBrainz database. - - This method will only work if no user name and password are set. - - @param cdstub: a L{CDStub} object to submit - - @raise RequestError: Missmatching TOC/Track information or the - the CD Stub already exists or the Disc ID already exists - """ - assert self._clientId is not None, 'Please supply a client ID' - disc = cdstub._disc - params = [ ] - params.append( ('client', self._clientId.encode('utf-8')) ) - params.append( ('discid', disc.id) ) - params.append( ('title', cdstub.title) ) - params.append( ('artist', cdstub.artist) ) - if cdstub.barcode != "": - params.append( ('barcode', cdstub.barcode) ) - if cdstub.comment != "": - params.append( ('comment', cdstub.comment) ) - - trackind = 0 - for track,artist in cdstub.tracks: - params.append( ('track%d' % trackind, track) ) - if artist != "": - params.append( ('artist%d' % trackind, artist) ) - - trackind += 1 - - toc = "%d %d %d " % (disc.firstTrackNum, disc.lastTrackNum, disc.sectors) - toc = toc + ' '.join( map(lambda x: str(x[0]), disc.getTracks()) ) - - params.append( ('toc', toc) ) - - encodedStr = urllib.urlencode(params) - self._ws.post('release', '', encodedStr) - -def _createIncludes(tagMap): - selected = filter(lambda x: x[1] == True, tagMap.items()) - return map(lambda x: x[0], selected) - -def _createParameters(params): - """Remove (x, None) tuples and encode (x, str/unicode) to utf-8.""" - ret = [ ] - for p in params: - if isinstance(p[1], (str, unicode)): - ret.append( (p[0], p[1].encode('utf-8')) ) - elif p[1] is not None: - ret.append(p) - - return ret - -def _paramsValid(params): - """Check if the query parameter collides with other parameters.""" - tmp = [ ] - for name, value in params: - if value is not None and name not in ('offset', 'limit'): - tmp.append(name) - - if 'query' in tmp and len(tmp) > 1: - return False - else: - return True - -if __name__ == '__main__': - import doctest - doctest.testmod() - -# EOF diff --git a/musicbrainz2/wsxml.py b/musicbrainz2/wsxml.py deleted file mode 100644 index 7fd2a166..00000000 --- a/musicbrainz2/wsxml.py +++ /dev/null @@ -1,1675 +0,0 @@ -"""A parser for the Music Metadata XML Format (MMD). - -This module contains L{MbXmlParser}, which parses the U{Music Metadata XML -Format (MMD) } returned by the -MusicBrainz webservice. - -There are also DOM helper functions in this module used by the parser which -probably aren't useful to users. -""" -__revision__ = '$Id: wsxml.py 12028 2009-09-01 13:15:50Z matt $' - -import re -import logging -import urlparse -import xml.dom.minidom -import xml.sax.saxutils as saxutils -from xml.parsers.expat import ExpatError -from xml.dom import DOMException - -import musicbrainz2.utils as mbutils -import musicbrainz2.model as model -from musicbrainz2.model import NS_MMD_1, NS_REL_1, NS_EXT_1 - -__all__ = [ - 'DefaultFactory', 'Metadata', 'ParseError', - 'MbXmlParser', 'MbXmlWriter', - 'AbstractResult', - 'ArtistResult', 'ReleaseResult', 'TrackResult', 'LabelResult', - 'ReleaseGroupResult' -] - - -class DefaultFactory(object): - """A factory to instantiate classes from the domain model. - - This factory may be used to create objects from L{musicbrainz2.model}. - """ - def newArtist(self): return model.Artist() - def newRelease(self): return model.Release() - def newReleaseGroup(self): return model.ReleaseGroup() - def newTrack(self): return model.Track() - def newRelation(self): return model.Relation() - def newReleaseEvent(self): return model.ReleaseEvent() - def newDisc(self): return model.Disc() - def newArtistAlias(self): return model.ArtistAlias() - def newUser(self): return model.User() - def newLabel(self): return model.Label() - def newLabelAlias(self): return model.LabelAlias() - def newTag(self): return model.Tag() - def newRating(self): return model.Rating() - - -class ParseError(Exception): - """Exception to be thrown if a parse error occurs. - - The C{'msg'} attribute contains a printable error message, C{'reason'} - is the lower level exception that was raised. - """ - - def __init__(self, msg='Parse Error', reason=None): - Exception.__init__(self) - self.msg = msg - self.reason = reason - - def __str__(self): - return self.msg - - -class Metadata(object): - """Represents a parsed Music Metadata XML document. - - The Music Metadata XML format is very flexible and may contain a - diverse set of data (e.g. an artist, a release and a list of tracks), - but usually only a small subset is used (either an artist, a release - or a track, or a lists of objects from one class). - - @see: L{MbXmlParser} for reading, and L{MbXmlWriter} for writing - Metadata objects - """ - def __init__(self): - self._artist = None - self._release = None - self._track = None - self._label = None - self._releaseGroup = None - self._artistResults = [ ] - self._artistResultsOffset = None - self._artistResultsCount = None - self._releaseResults = [ ] - self._releaseResultsOffset = None - self._releaseResultsCount = None - self._releaseGroupResults = [ ] - self._releaseGroupResultsOffset = None - self._releaseGroupResultsCount = None - self._trackResults = [ ] - self._trackResultsOffset = None - self._trackResultsCount = None - self._labelResults = [ ] - self._labelResultsOffset = None - self._labelResultsCount = None - self._tagList = [ ] - self._rating = None - self._userList = [ ] - - def getArtist(self): - return self._artist - - def setArtist(self, artist): - self._artist = artist - - artist = property(getArtist, setArtist, doc='An Artist object.') - - def getLabel(self): - return self._label - - def setLabel(self, label): - self._label = label - - label = property(getLabel, setLabel, doc='A Label object.') - - def getRelease(self): - return self._release - - def setRelease(self, release): - self._release = release - - release = property(getRelease, setRelease, doc='A Release object.') - - def getReleaseGroup(self): - return self._releaseGroup - - def setReleaseGroup(self, releaseGroup): - self._releaseGroup = releaseGroup - - releaseGroup = property(getReleaseGroup, setReleaseGroup) - - def getTrack(self): - return self._track - - def setTrack(self, track): - self._track = track - - track = property(getTrack, setTrack, doc='A Track object.') - - def getArtistResults(self): - """Returns an artist result list. - - @return: a list of L{ArtistResult} objects. - """ - return self._artistResults - - artistResults = property(getArtistResults, - doc='A list of ArtistResult objects.') - - def getArtistResultsOffset(self): - """Returns the offset of the artist result list. - - The offset is used for paging through the result list. It - is zero-based. - - @return: an integer containing the offset, or None - - @see: L{getArtistResults}, L{getArtistResultsCount} - """ - return self._artistResultsOffset - - def setArtistResultsOffset(self, value): - """Sets the offset of the artist result list. - - @param value: an integer containing the offset, or None - - @see: L{getArtistResultsOffset} - """ - self._artistResultsOffset = value - - artistResultsOffset = property( - getArtistResultsOffset, setArtistResultsOffset, - doc='The offset of the artist results.') - - def getArtistResultsCount(self): - """Returns the total number of results available. - - This may or may not match with the number of elements that - L{getArtistResults} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setArtistResultsCount}, L{getArtistResultsOffset} - """ - return self._artistResultsCount - - def setArtistResultsCount(self, value): - """Sets the total number of available results. - - @param value: an integer containing the count, or None - - @see: L{getArtistResults}, L{setArtistResultsOffset} - """ - self._artistResultsCount = value - - artistResultsCount = property( - getArtistResultsCount, setArtistResultsCount, - doc='The total number of artists results.') - - def getLabelResults(self): - """Returns a label result list. - - @return: a list of L{LabelResult} objects. - """ - return self._labelResults - - labelResults = property(getLabelResults, - doc='A list of LabelResult objects') - - def getLabelResultsOffset(self): - """Returns the offset of the label result list. - - The offset is used for paging through the result list. It - is zero-based. - - @return: an integer containing the offset, or None - - @see: L{getLabelResults}, L{getLabelResultsCount} - """ - return self._labelResultsOffset - - def setLabelResultsOffset(self, value): - """Sets the offset of the label result list. - - @param value: an integer containing the offset, or None - - @see: L{getLabelResultsOffset} - """ - self._labelResultsOffset = value - - labelResultsOffset = property( - getLabelResultsOffset, setLabelResultsOffset, - doc='The offset of the label results.') - - def getLabelResultsCount(self): - """Returns the total number of results available. - - This may or may not match with the number of elements that - L{getLabelResults} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setLabelResultsCount}, L{getLabelResultsOffset} - """ - return self._labelResultsCount - - def setLabelResultsCount(self, value): - """Sets the total number of available results. - - @param value: an integer containing the count, or None - - @see: L{getLabelResults}, L{setLabelResultsOffset} - """ - self._labelResultsCount = value - - labelResultsCount = property( - getLabelResultsCount, setLabelResultsCount, - doc='The total number of label results.') - - def getReleaseResults(self): - """Returns a release result list. - - @return: a list of L{ReleaseResult} objects. - """ - return self._releaseResults - - releaseResults = property(getReleaseResults, - doc='A list of ReleaseResult objects.') - - def getReleaseResultsOffset(self): - """Returns the offset of the release result list. - - The offset is used for paging through the result list. It - is zero-based. - - @return: an integer containing the offset, or None - - @see: L{getReleaseResults}, L{getReleaseResultsCount} - """ - return self._releaseResultsOffset - - def setReleaseResultsOffset(self, value): - """Sets the offset of the release result list. - - @param value: an integer containing the offset, or None - - @see: L{getReleaseResultsOffset} - """ - self._releaseResultsOffset = value - - releaseResultsOffset = property( - getReleaseResultsOffset, setReleaseResultsOffset, - doc='The offset of the release results.') - - def getReleaseResultsCount(self): - """Returns the total number of results available. - - This may or may not match with the number of elements that - L{getReleaseResults} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setReleaseResultsCount}, L{getReleaseResultsOffset} - """ - return self._releaseResultsCount - - def setReleaseResultsCount(self, value): - """Sets the total number of available results. - - @param value: an integer containing the count, or None - - @see: L{getReleaseResults}, L{setReleaseResultsOffset} - """ - self._releaseResultsCount = value - - releaseResultsCount = property( - getReleaseResultsCount, setReleaseResultsCount, - doc='The total number of release results.') - - def getReleaseGroupResults(self): - """Returns a release group result list. - - @return: a list of L{ReleaseGroupResult} objects. - """ - return self._releaseGroupResults - - releaseGroupResults = property(getReleaseGroupResults, - doc = 'A list of ReleaseGroupResult objects.') - - def getReleaseGroupResultsOffset(self): - """Returns the offset of the release group result list. - - The offset is used for paging through the result list. It - is zero-based. - - @return: an integer containing the offset, or None. - - @see: L{getReleaseGroupResults}, L{getReleaseGroupResultsCount} - """ - return self._releaseGroupResultsOffset - - def setReleaseGroupResultsOffset(self, value): - """Sets the offset of the release group result list. - - @param value: an integer containing the offset, or None - - @see: L{getReleaseGroupResultsOffset} - """ - self._releaseGroupResultsOffset = value - - releaseGroupResultsOffset = property( - getReleaseGroupResultsOffset, setReleaseGroupResultsOffset, - doc='The offset of the release group results.') - - def getReleaseGroupResultsCount(self): - """Returns the total number of results available. - - This may or may not match with the number of elements that - L{getReleaseGroupResults} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setReleaseGroupResultsCount}, L{getReleaseGroupResultsOffset} - """ - return self._releaseGroupResultsCount - - def setReleaseGroupResultsCount(self, value): - """Sets the total number of available results. - - @param value: an integer containing the count, or None - - @see: L{getReleaseGroupResults}, L{setReleaseGroupResultsOffset} - """ - self._releaseGroupResultsCount = value - - releaseGroupResultsCount = property( - getReleaseGroupResultsCount, setReleaseGroupResultsCount, - doc='The total number of release group results.') - - def getTrackResults(self): - """Returns a track result list. - - @return: a list of L{TrackResult} objects. - """ - return self._trackResults - - trackResults = property(getTrackResults, - doc='A list of TrackResult objects.') - - def getTrackResultsOffset(self): - """Returns the offset of the track result list. - - The offset is used for paging through the result list. It - is zero-based. - - @return: an integer containing the offset, or None - - @see: L{getTrackResults}, L{getTrackResultsCount} - """ - return self._trackResultsOffset - - def setTrackResultsOffset(self, value): - """Sets the offset of the track result list. - - @param value: an integer containing the offset, or None - - @see: L{getTrackResultsOffset} - """ - self._trackResultsOffset = value - - trackResultsOffset = property( - getTrackResultsOffset, setTrackResultsOffset, - doc='The offset of the track results.') - - def getTrackResultsCount(self): - """Returns the total number of results available. - - This may or may not match with the number of elements that - L{getTrackResults} returns. If the count is higher than - the list, it indicates that the list is incomplete. - - @return: an integer containing the count, or None - - @see: L{setTrackResultsCount}, L{getTrackResultsOffset} - """ - return self._trackResultsCount - - def setTrackResultsCount(self, value): - """Sets the total number of available results. - - @param value: an integer containing the count, or None - - @see: L{getTrackResults}, L{setTrackResultsOffset} - """ - self._trackResultsCount = value - - trackResultsCount = property( - getTrackResultsCount, setTrackResultsCount, - doc='The total number of track results.') - - - def getTagList(self): - """Returns a list of tags. - - @return: a list of L{model.Tag} objects - """ - return self._tagList - - tagResults = property(getTagList, - doc='A list of Tag objects.') - - def getRating(self): - """Returns the rating. - - @return: rating object - """ - return self._rating - - def setRating(self, value): - """Sets the rating. - - @param value: a L{model.Rating} object - """ - self._rating = value - - rating = property(getRating, setRating, doc='A Rating object.') - - - # MusicBrainz extension to the schema - def getUserList(self): - """Returns a list of users. - - @return: a list of L{model.User} objects - - @note: This is a MusicBrainz extension. - """ - return self._userList - - userResults = property(getUserList, - doc='A list of User objects.') - - -class AbstractResult(object): - """The abstract representation of a result. - - A result is an instance of some kind (Artist, Release, ...) - associated with a score. - """ - - def __init__(self, score): - self._score = score - - def getScore(self): - """Returns the result score. - - The score indicates how good this result matches the search - parameters. The higher the value, the better the match. - - @return: an int between 0 and 100 (both inclusive), or None - """ - return self._score - - def setScore(self, score): - self._score = score - - score = property(getScore, setScore, doc='The relevance score.') - - -class ArtistResult(AbstractResult): - """Represents an artist result. - - An ArtistResult consists of a I{score} and an artist. The score is a - number between 0 and 100, where a higher number indicates a better - match. - """ - def __init__(self, artist, score): - super(ArtistResult, self).__init__(score) - self._artist = artist - - def getArtist(self): - """Returns an Artist object. - - @return: a L{musicbrainz2.model.Artist} object - """ - return self._artist - - def setArtist(self, artist): - self._artist = artist - - artist = property(getArtist, setArtist, doc='An Artist object.') - - -class ReleaseResult(AbstractResult): - """Represents a release result. - - A ReleaseResult consists of a I{score} and a release. The score is a - number between 0 and 100, where a higher number indicates a better - match. - """ - def __init__(self, release, score): - super(ReleaseResult, self).__init__(score) - self._release = release - - def getRelease(self): - """Returns a Release object. - - @return: a L{musicbrainz2.model.Release} object - """ - return self._release - - def setRelease(self, release): - self._release = release - - release = property(getRelease, setRelease, doc='A Release object.') - -class ReleaseGroupResult(AbstractResult): - """Represents a release group result. - - A ReleaseGroupResult consists of a I{score} and a release group. The - score is a number between 0 and 100, where a higher number indicates - a better match. - """ - def __init__(self, releaseGroup, score): - super(ReleaseGroupResult, self).__init__(score) - self._releaseGroup = releaseGroup - - def getReleaseGroup(self): - """Returns a ReleaseGroup object. - - @return: a L{musicbrainz2.model.ReleaseGroup} object - """ - return self._releaseGroup - - def setReleaseGroup(self, value): - self._releaseGroup = value - - releaseGroup = property(getReleaseGroup, setReleaseGroup, doc='A ReleaseGroup object.') - -class TrackResult(AbstractResult): - """Represents a track result. - - A TrackResult consists of a I{score} and a track. The score is a - number between 0 and 100, where a higher number indicates a better - match. - """ - def __init__(self, track, score): - super(TrackResult, self).__init__(score) - self._track = track - - def getTrack(self): - """Returns a Track object. - - @return: a L{musicbrainz2.model.Track} object - """ - return self._track - - def setTrack(self, track): - self._track = track - - track = property(getTrack, setTrack, doc='A Track object.') - - -class LabelResult(AbstractResult): - """Represents a label result. - - An LabelResult consists of a I{score} and a label. The score is a - number between 0 and 100, where a higher number indicates a better - match. - """ - def __init__(self, label, score): - super(LabelResult, self).__init__(score) - self._label = label - - def getLabel(self): - """Returns a Label object. - - @return: a L{musicbrainz2.model.Label} object - """ - return self._label - - def setLabel(self, label): - self._label = label - - label = property(getLabel, setLabel, doc='A Label object.') - - -class MbXmlParser(object): - """A parser for the Music Metadata XML format. - - This parser supports all basic features and extensions defined by - MusicBrainz, including unlimited document nesting. By default it - reads an XML document from a file-like object (stream) and returns - an object tree representing the document using classes from - L{musicbrainz2.model}. - - The implementation tries to be as permissive as possible. Invalid - contents are skipped, but documents have to be well-formed and using - the correct namespace. In case of unrecoverable errors, a L{ParseError} - exception is raised. - - @see: U{The Music Metadata XML Format - } - """ - - def __init__(self, factory=DefaultFactory()): - """Constructor. - - The C{factory} parameter has be an instance of L{DefaultFactory} - or a subclass of it. It is used by L{parse} to obtain objects - from L{musicbrainz2.model} to build resulting object tree. - If you supply your own factory, you have to make sure all - returned objects have the same interface as their counterparts - from L{musicbrainz2.model}. - - @param factory: an object factory - """ - self._log = logging.getLogger(str(self.__class__)) - self._factory = factory - - def parse(self, inStream): - """Parses the MusicBrainz web service XML. - - Returns a L{Metadata} object representing the parsed XML or - raises a L{ParseError} exception if the data was malformed. - The parser tries to be liberal and skips invalid content if - possible. - - Note that an L{IOError} may be raised if there is a problem - reading C{inStream}. - - @param inStream: a file-like object - @return: a L{Metadata} object (never None) - @raise ParseError: if the document is not valid - @raise IOError: if reading from the stream failed - """ - - try: - doc = xml.dom.minidom.parse(inStream) - - # Try to find the root element. If this isn't an mmd - # XML file or the namespace is wrong, this will fail. - elems = doc.getElementsByTagNameNS(NS_MMD_1, 'metadata') - - if len(elems) != 0: - md = self._createMetadata(elems[0]) - else: - msg = 'cannot find root element mmd:metadata' - self._log.debug('ParseError: ' + msg) - raise ParseError(msg) - - doc.unlink() - - return md - except ExpatError, e: - self._log.debug('ExpatError: ' + str(e)) - raise ParseError(msg=str(e), reason=e) - except DOMException, e: - self._log.debug('DOMException: ' + str(e)) - raise ParseError(msg=str(e), reason=e) - - - def _createMetadata(self, metadata): - md = Metadata() - - for node in _getChildElements(metadata): - if _matches(node, 'artist'): - md.artist = self._createArtist(node) - elif _matches(node, 'release'): - md.release = self._createRelease(node) - elif _matches(node, 'release-group'): - md.releaseGroup = self._createReleaseGroup(node) - elif _matches(node, 'track'): - md.track = self._createTrack(node) - elif _matches(node, 'label'): - md.label = self._createLabel(node) - elif _matches(node, 'artist-list'): - (offset, count) = self._getListAttrs(node) - md.artistResultsOffset = offset - md.artistResultsCount = count - self._addArtistResults(node, md.getArtistResults()) - elif _matches(node, 'release-list'): - (offset, count) = self._getListAttrs(node) - md.releaseResultsOffset = offset - md.releaseResultsCount = count - self._addReleaseResults(node, md.getReleaseResults()) - elif _matches(node, 'release-group-list'): - (offset, count) = self._getListAttrs(node) - md.releaseGroupResultsOffset = offset - md.releaseGroupResultsCount = count - self._addReleaseGroupResults(node, md.getReleaseGroupResults()) - elif _matches(node, 'track-list'): - (offset, count) = self._getListAttrs(node) - md.trackResultsOffset = offset - md.trackResultsCount = count - self._addTrackResults(node, md.getTrackResults()) - elif _matches(node, 'label-list'): - (offset, count) = self._getListAttrs(node) - md.labelResultsOffset = offset - md.labelResultsCount = count - self._addLabelResults(node, md.getLabelResults()) - elif _matches(node, 'tag-list'): - self._addTagsToList(node, md.getTagList()) - elif _matches(node, 'user-list', NS_EXT_1): - self._addUsersToList(node, md.getUserList()) - - return md - - - def _addArtistResults(self, listNode, resultList): - for c in _getChildElements(listNode): - artist = self._createArtist(c) - score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) - if artist is not None: - resultList.append(ArtistResult(artist, score)) - - def _addReleaseResults(self, listNode, resultList): - for c in _getChildElements(listNode): - release = self._createRelease(c) - score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) - if release is not None: - resultList.append(ReleaseResult(release, score)) - - def _addReleaseGroupResults(self, listNode, resultList): - for c in _getChildElements(listNode): - releaseGroup = self._createReleaseGroup(c) - score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) - if releaseGroup is not None: - resultList.append(ReleaseGroupResult(releaseGroup, score)) - - def _addTrackResults(self, listNode, resultList): - for c in _getChildElements(listNode): - track = self._createTrack(c) - score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) - if track is not None: - resultList.append(TrackResult(track, score)) - - def _addLabelResults(self, listNode, resultList): - for c in _getChildElements(listNode): - label = self._createLabel(c) - score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) - if label is not None: - resultList.append(LabelResult(label, score)) - - def _addReleasesToList(self, listNode, resultList): - self._addToList(listNode, resultList, self._createRelease) - - def _addReleaseGroupsToList(self, listNode, resultList): - self._addToList(listNode, resultList, self._createReleaseGroup) - - def _addTracksToList(self, listNode, resultList): - self._addToList(listNode, resultList, self._createTrack) - - def _addUsersToList(self, listNode, resultList): - self._addToList(listNode, resultList, self._createUser) - - def _addTagsToList(self, listNode, resultList): - self._addToList(listNode, resultList, self._createTag) - - def _addTagsToEntity(self, listNode, entity): - for node in _getChildElements(listNode): - tag = self._createTag(node) - entity.addTag(tag) - - def _addRatingToEntity(self, attrNode, entity): - rating = self._createRating(attrNode) - entity.setRating(rating) - - def _addToList(self, listNode, resultList, creator): - for c in _getChildElements(listNode): - resultList.append(creator(c)) - - def _getListAttrs(self, listNode): - offset = _getIntAttr(listNode, 'offset') - count = _getIntAttr(listNode, 'count') - return (offset, count) - - - def _createArtist(self, artistNode): - artist = self._factory.newArtist() - artist.setId(_getIdAttr(artistNode, 'id', 'artist')) - artist.setType(_getUriAttr(artistNode, 'type')) - - for node in _getChildElements(artistNode): - if _matches(node, 'name'): - artist.setName(_getText(node)) - elif _matches(node, 'sort-name'): - artist.setSortName(_getText(node)) - elif _matches(node, 'disambiguation'): - artist.setDisambiguation(_getText(node)) - elif _matches(node, 'life-span'): - artist.setBeginDate(_getDateAttr(node, 'begin')) - artist.setEndDate(_getDateAttr(node, 'end')) - elif _matches(node, 'alias-list'): - self._addArtistAliases(node, artist) - elif _matches(node, 'release-list'): - (offset, count) = self._getListAttrs(node) - artist.setReleasesOffset(offset) - artist.setReleasesCount(count) - self._addReleasesToList(node, artist.getReleases()) - elif _matches(node, 'release-group-list'): - (offset, count) = self._getListAttrs(node) - artist.setReleaseGroupsOffset(offset) - artist.setReleaseGroupsCount(count) - self._addReleaseGroupsToList(node, artist.getReleaseGroups()) - elif _matches(node, 'relation-list'): - self._addRelationsToEntity(node, artist) - elif _matches(node, 'tag-list'): - self._addTagsToEntity(node, artist) - elif _matches(node, 'rating'): - self._addRatingToEntity(node, artist) - - return artist - - def _createLabel(self, labelNode): - label = self._factory.newLabel() - label.setId(_getIdAttr(labelNode, 'id', 'label')) - label.setType(_getUriAttr(labelNode, 'type')) - - for node in _getChildElements(labelNode): - if _matches(node, 'name'): - label.setName(_getText(node)) - if _matches(node, 'sort-name'): - label.setSortName(_getText(node)) - elif _matches(node, 'disambiguation'): - label.setDisambiguation(_getText(node)) - elif _matches(node, 'label-code'): - label.setCode(_getText(node)) - elif _matches(node, 'country'): - country = _getText(node, '^[A-Z]{2}$') - label.setCountry(country) - elif _matches(node, 'life-span'): - label.setBeginDate(_getDateAttr(node, 'begin')) - label.setEndDate(_getDateAttr(node, 'end')) - elif _matches(node, 'alias-list'): - self._addLabelAliases(node, label) - elif _matches(node, 'tag-list'): - self._addTagsToEntity(node, label) - elif _matches(node, 'rating'): - self._addRatingToEntity(node, label) - - return label - - def _createRelease(self, releaseNode): - release = self._factory.newRelease() - release.setId(_getIdAttr(releaseNode, 'id', 'release')) - for t in _getUriListAttr(releaseNode, 'type'): - release.addType(t) - - for node in _getChildElements(releaseNode): - if _matches(node, 'title'): - release.setTitle(_getText(node)) - elif _matches(node, 'text-representation'): - lang = _getAttr(node, 'language', '^[A-Z]{3}$') - release.setTextLanguage(lang) - script = _getAttr(node, 'script', '^[A-Z][a-z]{3}$') - release.setTextScript(script) - elif _matches(node, 'asin'): - release.setAsin(_getText(node)) - elif _matches(node, 'artist'): - release.setArtist(self._createArtist(node)) - elif _matches(node, 'release-event-list'): - self._addReleaseEvents(node, release) - elif _matches(node, 'release-group'): - release.setReleaseGroup(self._createReleaseGroup(node)) - elif _matches(node, 'disc-list'): - self._addDiscs(node, release) - elif _matches(node, 'track-list'): - (offset, count) = self._getListAttrs(node) - release.setTracksOffset(offset) - release.setTracksCount(count) - self._addTracksToList(node, release.getTracks()) - elif _matches(node, 'relation-list'): - self._addRelationsToEntity(node, release) - elif _matches(node, 'tag-list'): - self._addTagsToEntity(node, release) - elif _matches(node, 'rating'): - self._addRatingToEntity(node, release) - - return release - - def _createReleaseGroup(self, node): - rg = self._factory.newReleaseGroup() - rg.setId(_getIdAttr(node, 'id', 'release-group')) - rg.setType(_getUriAttr(node, 'type')) - - for child in _getChildElements(node): - if _matches(child, 'title'): - rg.setTitle(_getText(child)) - elif _matches(child, 'artist'): - rg.setArtist(self._createArtist(child)) - elif _matches(child, 'release-list'): - (offset, count) = self._getListAttrs(child) - rg.setReleasesOffset(offset) - rg.setReleasesCount(count) - self._addReleasesToList(child, rg.getReleases()) - - return rg - - def _addReleaseEvents(self, releaseListNode, release): - for node in _getChildElements(releaseListNode): - if _matches(node, 'event'): - country = _getAttr(node, 'country', '^[A-Z]{2}$') - date = _getDateAttr(node, 'date') - catalogNumber = _getAttr(node, 'catalog-number') - barcode = _getAttr(node, 'barcode') - format = _getUriAttr(node, 'format') - - # The date attribute is mandatory. If it isn't present, - # we don't add anything from this release event. - if date is not None: - event = self._factory.newReleaseEvent() - event.setCountry(country) - event.setDate(date) - event.setCatalogNumber(catalogNumber) - event.setBarcode(barcode) - event.setFormat(format) - - for subNode in _getChildElements(node): - if _matches(subNode, 'label'): - event.setLabel(self._createLabel(subNode)) - - release.addReleaseEvent(event) - - - def _addDiscs(self, discIdListNode, release): - for node in _getChildElements(discIdListNode): - if _matches(node, 'disc') and node.hasAttribute('id'): - d = self._factory.newDisc() - d.setId(node.getAttribute('id')) - d.setSectors(_getIntAttr(node, 'sectors', 0)) - release.addDisc(d) - - - def _addArtistAliases(self, aliasListNode, artist): - for node in _getChildElements(aliasListNode): - if _matches(node, 'alias'): - alias = self._factory.newArtistAlias() - self._initializeAlias(alias, node) - artist.addAlias(alias) - - - def _addLabelAliases(self, aliasListNode, label): - for node in _getChildElements(aliasListNode): - if _matches(node, 'alias'): - alias = self._factory.newLabelAlias() - self._initializeAlias(alias, node) - label.addAlias(alias) - - - def _initializeAlias(self, alias, node): - alias.setValue(_getText(node)) - alias.setType(_getUriAttr(node, 'type')) - alias.setScript(_getAttr(node, 'script', - '^[A-Z][a-z]{3}$')) - - - def _createTrack(self, trackNode): - track = self._factory.newTrack() - track.setId(_getIdAttr(trackNode, 'id', 'track')) - - for node in _getChildElements(trackNode): - if _matches(node, 'title'): - track.setTitle(_getText(node)) - elif _matches(node, 'artist'): - track.setArtist(self._createArtist(node)) - elif _matches(node, 'duration'): - track.setDuration(_getPositiveIntText(node)) - elif _matches(node, 'release-list'): - self._addReleasesToList(node, track.getReleases()) - elif _matches(node, 'puid-list'): - self._addPuids(node, track) - elif _matches(node, 'isrc-list'): - self._addISRCs(node, track) - elif _matches(node, 'relation-list'): - self._addRelationsToEntity(node, track) - elif _matches(node, 'tag-list'): - self._addTagsToEntity(node, track) - elif _matches(node, 'rating'): - self._addRatingToEntity(node, track) - - return track - - # MusicBrainz extension - def _createUser(self, userNode): - user = self._factory.newUser() - for t in _getUriListAttr(userNode, 'type', NS_EXT_1): - user.addType(t) - - for node in _getChildElements(userNode): - if _matches(node, 'name'): - user.setName(_getText(node)) - elif _matches(node, 'nag', NS_EXT_1): - user.setShowNag(_getBooleanAttr(node, 'show')) - - return user - - def _createRating(self, ratingNode): - rating = self._factory.newRating() - rating.value = _getText(ratingNode) - rating.count = _getIntAttr(ratingNode, 'votes-count') - return rating - - def _createTag(self, tagNode): - tag = self._factory.newTag() - tag.value = _getText(tagNode) - tag.count = _getIntAttr(tagNode, 'count') - return tag - - - def _addPuids(self, puidListNode, track): - for node in _getChildElements(puidListNode): - if _matches(node, 'puid') and node.hasAttribute('id'): - track.addPuid(node.getAttribute('id')) - - def _addISRCs(self, isrcListNode, track): - for node in _getChildElements(isrcListNode): - if _matches(node, 'isrc') and node.hasAttribute('id'): - track.addISRC(node.getAttribute('id')) - - def _addRelationsToEntity(self, relationListNode, entity): - targetType = _getUriAttr(relationListNode, 'target-type', NS_REL_1) - - if targetType is None: - return - - for node in _getChildElements(relationListNode): - if _matches(node, 'relation'): - rel = self._createRelation(node, targetType) - if rel is not None: - entity.addRelation(rel) - - - def _createRelation(self, relationNode, targetType): - relation = self._factory.newRelation() - - relation.setType(_getUriAttr(relationNode, 'type', NS_REL_1)) - relation.setTargetType(targetType) - resType = _getResourceType(targetType) - relation.setTargetId(_getIdAttr(relationNode, 'target', resType)) - - if relation.getType() is None \ - or relation.getTargetType() is None \ - or relation.getTargetId() is None: - return None - - relation.setDirection(_getDirectionAttr(relationNode, 'direction')) - relation.setBeginDate(_getDateAttr(relationNode, 'begin')) - relation.setEndDate(_getDateAttr(relationNode, 'end')) - - for a in _getUriListAttr(relationNode, 'attributes', NS_REL_1): - relation.addAttribute(a) - - target = None - children = _getChildElements(relationNode) - if len(children) > 0: - node = children[0] - if _matches(node, 'artist'): - target = self._createArtist(node) - elif _matches(node, 'release'): - target = self._createRelease(node) - elif _matches(node, 'track'): - target = self._createTrack(node) - - relation.setTarget(target) - - return relation - - -# -# XML output -# - -class _XmlWriter(object): - def __init__(self, outStream, indentAmount=' ', newline="\n"): - self._out = outStream - self._indentAmount = indentAmount - self._stack = [ ] - self._newline = newline - - def prolog(self, encoding='UTF-8', version='1.0'): - pi = '' % (version, encoding) - self._out.write(pi + self._newline) - - def start(self, name, attrs={ }): - indent = self._getIndention() - self._stack.append(name) - self._out.write(indent + self._makeTag(name, attrs) + self._newline) - - def end(self): - name = self._stack.pop() - indent = self._getIndention() - self._out.write('%s\n' % (indent, name)) - - def elem(self, name, value, attrs={ }): - # delete attributes with an unset value - for (k, v) in attrs.items(): - if v is None or v == '': - del attrs[k] - - if value is None or value == '': - if len(attrs) == 0: - return - self._out.write(self._getIndention()) - self._out.write(self._makeTag(name, attrs, True) + '\n') - else: - escValue = saxutils.escape(value or '') - self._out.write(self._getIndention()) - self._out.write(self._makeTag(name, attrs)) - self._out.write(escValue) - self._out.write('\n' % name) - - def _getIndention(self): - return self._indentAmount * len(self._stack) - - def _makeTag(self, name, attrs={ }, close=False): - ret = '<' + name - - for (k, v) in attrs.iteritems(): - if v is not None: - v = saxutils.quoteattr(str(v)) - ret += ' %s=%s' % (k, v) - - if close: - return ret + '/>' - else: - return ret + '>' - - - -class MbXmlWriter(object): - """Write XML in the Music Metadata XML format.""" - - def __init__(self, indentAmount=' ', newline="\n"): - """Constructor. - - @param indentAmount: the amount of whitespace to use per level - """ - self._indentAmount = indentAmount - self._newline = newline - - - def write(self, outStream, metadata): - """Writes the XML representation of a Metadata object to a file. - - @param outStream: an open file-like object - @param metadata: a L{Metadata} object - """ - xml = _XmlWriter(outStream, self._indentAmount, self._newline) - - xml.prolog() - xml.start('metadata', { - 'xmlns': NS_MMD_1, - 'xmlns:ext': NS_EXT_1, - }) - - self._writeArtist(xml, metadata.getArtist()) - self._writeRelease(xml, metadata.getRelease()) - self._writeReleaseGroup(xml, metadata.getReleaseGroup()) - self._writeTrack(xml, metadata.getTrack()) - self._writeLabel(xml, metadata.getLabel()) - - if len(metadata.getArtistResults()) > 0: - xml.start('artist-list', { - 'offset': metadata.artistResultsOffset, - 'count': metadata.artistResultsCount, - }) - for result in metadata.getArtistResults(): - self._writeArtist(xml, result.getArtist(), - result.getScore()) - xml.end() - - if len(metadata.getReleaseResults()) > 0: - xml.start('release-list', { - 'offset': metadata.releaseResultsOffset, - 'count': metadata.releaseResultsCount, - }) - for result in metadata.getReleaseResults(): - self._writeRelease(xml, result.getRelease(), - result.getScore()) - xml.end() - - if len(metadata.getReleaseGroupResults()) > 0: - xml.start('release-group-list', { - 'offset': metadata.releaseGroupResultsOffset, - 'count': metadata.releaseGroupResultsCount - }) - for result in metadata.getReleaseGroupResults(): - self._writeReleaseGroup(xml, result.getReleaseGroup(), - result.getScore()) - xml.end() - - if len(metadata.getTrackResults()) > 0: - xml.start('track-list', { - 'offset': metadata.trackResultsOffset, - 'count': metadata.trackResultsCount, - }) - for result in metadata.getTrackResults(): - self._writeTrack(xml, result.getTrack(), - result.getScore()) - xml.end() - - if len(metadata.getLabelResults()) > 0: - xml.start('label-list', { - 'offset': metadata.labelResultsOffset, - 'count': metadata.labelResultsCount, - }) - for result in metadata.getLabelResults(): - self._writeLabel(xml, result.getLabel(), - result.getScore()) - xml.end() - - xml.end() - - - def _writeArtist(self, xml, artist, score=None): - if artist is None: - return - - xml.start('artist', { - 'id': mbutils.extractUuid(artist.getId()), - 'type': mbutils.extractFragment(artist.getType()), - 'ext:score': score, - }) - - xml.elem('name', artist.getName()) - xml.elem('sort-name', artist.getSortName()) - xml.elem('disambiguation', artist.getDisambiguation()) - xml.elem('life-span', None, { - 'begin': artist.getBeginDate(), - 'end': artist.getEndDate(), - }) - - if len(artist.getAliases()) > 0: - xml.start('alias-list') - for alias in artist.getAliases(): - xml.elem('alias', alias.getValue(), { - 'type': alias.getType(), - 'script': alias.getScript(), - }) - xml.end() - - if len(artist.getReleases()) > 0: - xml.start('release-list') - for release in artist.getReleases(): - self._writeRelease(xml, release) - xml.end() - - if len(artist.getReleaseGroups()) > 0: - xml.start('release-group-list') - for releaseGroup in artist.getReleaseGroups(): - self._writeReleaseGroup(xml, releaseGroup) - xml.end() - - self._writeRelationList(xml, artist) - # TODO: extensions - - xml.end() - - - def _writeRelease(self, xml, release, score=None): - if release is None: - return - - types = [mbutils.extractFragment(t) for t in release.getTypes()] - typesStr = None - if len(types) > 0: - typesStr = ' '.join(types) - - xml.start('release', { - 'id': mbutils.extractUuid(release.getId()), - 'type': typesStr, - 'ext:score': score, - }) - - xml.elem('title', release.getTitle()) - xml.elem('text-representation', None, { - 'language': release.getTextLanguage(), - 'script': release.getTextScript() - }) - xml.elem('asin', release.getAsin()) - - self._writeArtist(xml, release.getArtist()) - self._writeReleaseGroup(xml, release.getReleaseGroup()) - - if len(release.getReleaseEvents()) > 0: - xml.start('release-event-list') - for event in release.getReleaseEvents(): - self._writeReleaseEvent(xml, event) - xml.end() - - if len(release.getDiscs()) > 0: - xml.start('disc-list') - for disc in release.getDiscs(): - xml.elem('disc', None, { 'id': disc.getId() }) - xml.end() - - if len(release.getTracks()) > 0: - # TODO: count attribute - xml.start('track-list', { - 'offset': release.getTracksOffset() - }) - for track in release.getTracks(): - self._writeTrack(xml, track) - xml.end() - - self._writeRelationList(xml, release) - # TODO: extensions - - xml.end() - - def _writeReleaseGroup(self, xml, rg, score = None): - if rg is None: - return - - xml.start('release-group', { - 'id': mbutils.extractUuid(rg.getId()), - 'type': mbutils.extractFragment(rg.getType()), - 'ext:score': score, - }) - - xml.elem('title', rg.getTitle()) - self._writeArtist(xml, rg.getArtist()) - - if len(rg.getReleases()) > 0: - xml.start('release-list') - for rel in rg.getReleases(): - self._writeRelease(xml, rel) - xml.end() - - xml.end() - - def _writeReleaseEvent(self, xml, event): - xml.start('event', { - 'country': event.getCountry(), - 'date': event.getDate(), - 'catalog-number': event.getCatalogNumber(), - 'barcode': event.getBarcode(), - 'format': event.getFormat() - }) - - self._writeLabel(xml, event.getLabel()) - - xml.end() - - - def _writeTrack(self, xml, track, score=None): - if track is None: - return - - xml.start('track', { - 'id': mbutils.extractUuid(track.getId()), - 'ext:score': score, - }) - - xml.elem('title', track.getTitle()) - xml.elem('duration', str(track.getDuration())) - self._writeArtist(xml, track.getArtist()) - - if len(track.getReleases()) > 0: - # TODO: offset + count - xml.start('release-list') - for release in track.getReleases(): - self._writeRelease(xml, release) - xml.end() - - if len(track.getPuids()) > 0: - xml.start('puid-list') - for puid in track.getPuids(): - xml.elem('puid', None, { 'id': puid }) - xml.end() - - self._writeRelationList(xml, track) - # TODO: extensions - - xml.end() - - - def _writeLabel(self, xml, label, score=None): - if label is None: - return - - xml.start('label', { - 'id': mbutils.extractUuid(label.getId()), - 'type': mbutils.extractFragment(label.getType()), - 'ext:score': score, - }) - - xml.elem('name', label.getName()) - xml.elem('sort-name', label.getSortName()) - xml.elem('disambiguation', label.getDisambiguation()) - xml.elem('life-span', None, { - 'begin': label.getBeginDate(), - 'end': label.getEndDate(), - }) - - if len(label.getAliases()) > 0: - xml.start('alias-list') - for alias in label.getAliases(): - xml.elem('alias', alias.getValue(), { - 'type': alias.getType(), - 'script': alias.getScript(), - }) - xml.end() - - # TODO: releases, artists - - self._writeRelationList(xml, label) - # TODO: extensions - - xml.end() - - - def _writeRelationList(self, xml, entity): - for tt in entity.getRelationTargetTypes(): - xml.start('relation-list', { - 'target-type': mbutils.extractFragment(tt), - }) - for rel in entity.getRelations(targetType=tt): - self._writeRelation(xml, rel, tt) - xml.end() - - - def _writeRelation(self, xml, rel, targetType): - relAttrs = ' '.join([mbutils.extractFragment(a) - for a in rel.getAttributes()]) - - if relAttrs == '': - relAttrs = None - - attrs = { - 'type': mbutils.extractFragment(rel.getType()), - 'target': rel.getTargetId(), - 'direction': rel.getDirection(), - 'begin': rel.getBeginDate(), - 'end': rel.getBeginDate(), - 'attributes': relAttrs, - } - - if rel.getTarget() is None: - xml.elem('relation', None, attrs) - else: - xml.start('relation', attrs) - if targetType == NS_REL_1 + 'Artist': - self._writeArtist(xml, rel.getTarget()) - elif targetType == NS_REL_1 + 'Release': - self._writeRelease(xml, rel.getTarget()) - elif targetType == NS_REL_1 + 'Track': - self._writeTrack(xml, rel.getTarget()) - xml.end() - - -# -# DOM Utilities -# - -def _matches(node, name, namespace=NS_MMD_1): - """Checks if an xml.dom.Node and a given name and namespace match.""" - - if node.localName == name and node.namespaceURI == namespace: - return True - else: - return False - - -def _getChildElements(parentNode): - """Returns all direct child elements of the given xml.dom.Node.""" - - children = [ ] - for node in parentNode.childNodes: - if node.nodeType == node.ELEMENT_NODE: - children.append(node) - - return children - - -def _getText(element, regex=None, default=None): - """Returns the text content of the given xml.dom.Element. - - This function simply fetches all contained text nodes, so the element - should not contain child elements. - """ - res = '' - for node in element.childNodes: - if node.nodeType == node.TEXT_NODE: - res += node.data - - if regex is None or re.match(regex, res): - return res - else: - return default - - -def _getPositiveIntText(element): - """Returns the text content of the given xml.dom.Element as an int.""" - - res = _getText(element) - - if res is None: - return None - - try: - return int(res) - except ValueError: - return None - - -def _getAttr(element, attrName, regex=None, default=None, ns=None): - """Returns an attribute of the given element. - - If there is no attribute with that name or the attribute doesn't - match the regular expression, default is returned. - """ - if element.hasAttributeNS(ns, attrName): - content = element.getAttributeNS(ns, attrName) - - if regex is None or re.match(regex, content): - return content - else: - return default - else: - return default - - -def _getDateAttr(element, attrName): - """Gets an incomplete date from an attribute.""" - return _getAttr(element, attrName, '^\d+(-\d\d)?(-\d\d)?$') - - -def _getIdAttr(element, attrName, typeName): - """Gets an ID from an attribute and turns it into an absolute URI.""" - value = _getAttr(element, attrName) - - return _makeAbsoluteUri('http://musicbrainz.org/' + typeName + '/', value) - - - -def _getIntAttr(element, attrName, min=0, max=None, ns=None): - """Gets an int from an attribute, or None.""" - try: - val = int(_getAttr(element, attrName, ns=ns)) - - if max is None: - max = val - - if min <= val <= max: - return val - else: - return None - except ValueError: - return None # raised if conversion to int fails - except TypeError: - return None # raised if no such attribute exists - - -def _getUriListAttr(element, attrName, prefix=NS_MMD_1): - """Gets a list of URIs from an attribute.""" - if not element.hasAttribute(attrName): - return [ ] - - f = lambda x: x != '' - uris = filter(f, re.split('\s+', element.getAttribute(attrName))) - - m = lambda x: _makeAbsoluteUri(prefix, x) - uris = map(m, uris) - - return uris - - -def _getUriAttr(element, attrName, prefix=NS_MMD_1): - """Gets a URI from an attribute. - - This also works for space-separated URI lists. In this case, the - first URI is returned. - """ - uris = _getUriListAttr(element, attrName, prefix) - if len(uris) > 0: - return uris[0] - else: - return None - - -def _getBooleanAttr(element, attrName): - """Gets a boolean value from an attribute.""" - value = _getAttr(element, attrName) - if value == 'true': - return True - elif value == 'false': - return False - else: - return None - - -def _getDirectionAttr(element, attrName): - """Gets the Relation reading direction from an attribute.""" - regex = '^\s*(' + '|'.join(( - model.Relation.DIR_FORWARD, - model.Relation.DIR_BACKWARD)) + ')\s*$' - return _getAttr(element, 'direction', regex, model.Relation.DIR_NONE) - - -def _makeAbsoluteUri(prefix, uriStr): - """Creates an absolute URI adding prefix, if necessary.""" - if uriStr is None: - return None - - (scheme, netloc, path, params, query, frag) = urlparse.urlparse(uriStr) - - if scheme == '' and netloc == '': - return prefix + uriStr - else: - return uriStr - - -def _getResourceType(uri): - """Gets the resource type from a URI. - - The resource type is the basename of the URI's path. - """ - m = re.match('^' + NS_REL_1 + '(.*)$', uri) - - if m: - return m.group(1).lower() - else: - return None - -# EOF diff --git a/pyItunes/Library.py b/pyItunes/Library.py deleted file mode 100644 index 460a1519..00000000 --- a/pyItunes/Library.py +++ /dev/null @@ -1,41 +0,0 @@ -from pyItunes.Song import Song -import time -class Library: - def __init__(self,dictionary): - self.songs = self.parseDictionary(dictionary) - - def parseDictionary(self,dictionary): - songs = [] - format = "%Y-%m-%dT%H:%M:%SZ" - for song,attributes in dictionary.iteritems(): - s = Song() - s.name = attributes.get('Name') - s.artist = attributes.get('Artist') - s.album_artist = attributes.get('Album Aritst') - s.composer = attributes.get('Composer') - s.album = attributes.get('Album') - s.genre = attributes.get('Genre') - s.kind = attributes.get('Kind') - if attributes.get('Size'): - s.size = int(attributes.get('Size')) - s.total_time = attributes.get('Total Time') - s.track_number = attributes.get('Track Number') - if attributes.get('Year'): - s.year = int(attributes.get('Year')) - if attributes.get('Date Modified'): - s.date_modified = time.strptime(attributes.get('Date Modified'),format) - if attributes.get('Date Added'): - s.date_added = time.strptime(attributes.get('Date Added'),format) - if attributes.get('Bit Rate'): - s.bit_rate = int(attributes.get('Bit Rate')) - if attributes.get('Sample Rate'): - s.sample_rate = int(attributes.get('Sample Rate')) - s.comments = attributes.get("Comments ") - if attributes.get('Rating'): - s.rating = int(attributes.get('Rating')) - if attributes.get('Play Count'): - s.play_count = int(attributes.get('Play Count')) - if attributes.get('Location'): - s.location = attributes.get('Location') - songs.append(s) - return songs \ No newline at end of file diff --git a/pyItunes/Library.pyc b/pyItunes/Library.pyc deleted file mode 100644 index b8a8ca11..00000000 Binary files a/pyItunes/Library.pyc and /dev/null differ diff --git a/pyItunes/Song.py b/pyItunes/Song.py deleted file mode 100644 index 27d44d79..00000000 --- a/pyItunes/Song.py +++ /dev/null @@ -1,46 +0,0 @@ -class Song: - """ - Song Attributes: - name (String) - artist (String) - album_arist (String) - composer = None (String) - album = None (String) - genre = None (String) - kind = None (String) - size = None (Integer) - total_time = None (Integer) - track_number = None (Integer) - year = None (Integer) - date_modified = None (Time) - date_added = None (Time) - bit_rate = None (Integer) - sample_rate = None (Integer) - comments = None (String) - rating = None (Integer) - album_rating = None (Integer) - play_count = None (Integer) - location = None (String) - """ - name = None - artist = None - album_arist = None - composer = None - album = None - genre = None - kind = None - size = None - total_time = None - track_number = None - year = None - date_modified = None - date_added = None - bit_rate = None - sample_rate = None - comments = None - rating = None - album_rating = None - play_count = None - location = None - - #title = property(getTitle,setTitle) \ No newline at end of file diff --git a/pyItunes/Song.pyc b/pyItunes/Song.pyc deleted file mode 100644 index 565886d9..00000000 Binary files a/pyItunes/Song.pyc and /dev/null differ diff --git a/pyItunes/XMLLibraryParser.py b/pyItunes/XMLLibraryParser.py deleted file mode 100644 index 7e4b239a..00000000 --- a/pyItunes/XMLLibraryParser.py +++ /dev/null @@ -1,42 +0,0 @@ -import re -class XMLLibraryParser: - def __init__(self,xmlLibrary): - f = open(xmlLibrary) - s = f.read() - lines = s.split("\n") - self.dictionary = self.parser(lines) - - def getValue(self,restOfLine): - value = re.sub("<.*?>","",restOfLine) - u = unicode(value,"utf-8") - cleanValue = u.encode("ascii","xmlcharrefreplace") - return cleanValue - - def keyAndRestOfLine(self,line): - rawkey = re.search('(.*?)',line).group(0) - key = re.sub("","",rawkey) - restOfLine = re.sub(".*?","",line).strip() - return key,restOfLine - - def parser(self,lines): - dicts = 0 - songs = {} - inSong = False - for line in lines: - if re.search('',line): - dicts += 1 - if re.search('',line): - dicts -= 1 - inSong = False - songs[songkey] = temp - if dicts == 2 and re.search('(.*?)',line): - rawkey = re.search('(.*?)',line).group(0) - songkey = re.sub("","",rawkey) - inSong = True - temp = {} - if dicts == 3 and re.search('(.*?)',line): - key,restOfLine = self.keyAndRestOfLine(line) - temp[key] = self.getValue(restOfLine) - if len(songs) > 0 and dicts < 2: - return songs - return songs \ No newline at end of file diff --git a/pyItunes/XMLLibraryParser.pyc b/pyItunes/XMLLibraryParser.pyc deleted file mode 100644 index 79cd2bce..00000000 Binary files a/pyItunes/XMLLibraryParser.pyc and /dev/null differ diff --git a/pyItunes/__init__.py b/pyItunes/__init__.py deleted file mode 100644 index bc7acfad..00000000 --- a/pyItunes/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from pyItunes.XMLLibraryParser import XMLLibraryParser -from pyItunes.Library import Library -from pyItunes.Song import Song \ No newline at end of file diff --git a/pyItunes/__init__.pyc b/pyItunes/__init__.pyc deleted file mode 100644 index 5090244d..00000000 Binary files a/pyItunes/__init__.pyc and /dev/null differ diff --git a/searcher.py b/searcher.py deleted file mode 100644 index dc859435..00000000 --- a/searcher.py +++ /dev/null @@ -1,212 +0,0 @@ -import urllib -from webServer import database -from headphones import config_file -from configobj import ConfigObj -import string -import feedparser -import sqlite3 -import re -import logger - - -config = ConfigObj(config_file) -General = config['General'] -NZBMatrix = config['NZBMatrix'] -SABnzbd = config['SABnzbd'] -Newznab = config['Newznab'] -NZBsorg = config['NZBsorg'] -usenet_retention = General['usenet_retention'] -include_lossless = General['include_lossless'] -nzbmatrix = NZBMatrix['nzbmatrix'] -nzbmatrix_username = NZBMatrix['nzbmatrix_username'] -nzbmatrix_apikey = NZBMatrix['nzbmatrix_apikey'] -newznab = Newznab['newznab'] -newznab_host = Newznab['newznab_host'] -newznab_apikey = Newznab['newznab_apikey'] -nzbsorg = NZBsorg['nzbsorg'] -nzbsorg_uid = NZBsorg['nzbsorg_uid'] -nzbsorg_hash = NZBsorg['nzbsorg_hash'] -sab_host = SABnzbd['sab_host'] -sab_username = SABnzbd['sab_username'] -sab_password = SABnzbd['sab_password'] -sab_apikey = SABnzbd['sab_apikey'] -sab_category = SABnzbd['sab_category'] - - - -def searchNZB(albumid=None): - - conn=sqlite3.connect(database) - c=conn.cursor() - - if albumid: - c.execute('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted" AND AlbumID="%s"' % albumid) - else: - c.execute('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted"') - - results = c.fetchall() - - for albums in results: - - reldate = albums[3] - year = reldate[:4] - clname = string.replace(albums[0], ' & ', ' ') - clalbum = string.replace(albums[1], ' & ', ' ') - term1 = re.sub('[\.\-]', ' ', '%s %s %s' % (clname, clalbum, year)).encode('utf-8') - term = string.replace(term1, '"', '') - - logger.log(u"Searching for "+term+" since it was marked as wanted") - - resultlist = [] - - if nzbmatrix == '1': - - if include_lossless == '1': - categories = "23,22" - maxsize = 2000000000 - else: - categories = "22" - maxsize = 250000000 - - - params = { "page": "download", - "username": nzbmatrix_username, - "apikey": nzbmatrix_apikey, - "subcat": categories, - "age": usenet_retention, - "english": 1, - "ssl": 1, - "scenename": 1, - "term": term - } - - searchURL = "http://rss.nzbmatrix.com/rss.php?" + urllib.urlencode(params) - logger.log(u"Parsing results from "+searchURL) - d = feedparser.parse(searchURL) - - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - if size < maxsize: - resultlist.append((title, size, url)) - logger.log(u"Found " + title +" : " + url + " (Size: " + size + ")") - else: - logger.log(title + u" is larger than the maxsize for this category, skipping. (Size: " + size+")", logger.WARNING) - - - except: - logger.log(u"No results found") - - if newznab == '1': - - if include_lossless == '1': - categories = "3040,3010" - maxsize = 2000000000 - else: - categories = "3010" - maxsize = 250000000 - - params = { "t": "search", - "apikey": newznab_apikey, - "cat": categories, - "maxage": usenet_retention, - "q": term - } - - searchURL = newznab_host + '/api?' + urllib.urlencode(params) - logger.log(u"Parsing results from "+searchURL) - - d = feedparser.parse(searchURL) - - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - if size < maxsize: - resultlist.append((title, size, url)) - logger.log(u"Found " + title +" : " + url + " (Size: " + size + ")") - else: - logger.log(title + u" is larger than the maxsize for this category, skipping. (Size: " + size+")", logger.WARNING) - - except: - logger.log(u"No results found") - - if nzbsorg == '1': - - if include_lossless == '1': - categories = "5,3010" - maxsize = 2000000000 - else: - categories = "5" - maxsize = 250000000 - - params = { "action": "search", - "dl": 1, - "i": nzbsorg_uid, - "h": nzbsorg_hash, - "age": usenet_retention, - "q": term - } - - searchURL = 'https://secure.nzbs.org/rss.php?' + urllib.urlencode(params) - - logger.log(u"Parsing results from "+searchURL) - d = feedparser.parse(searchURL) - - for item in d.entries: - try: - url = item.link - title = item.title - size = int(item.links[1]['length']) - if size < maxsize: - resultlist.append((title, size, url)) - logger.log(u"Found " + title +" : " + url + " (Size: " + size + ")") - else: - logger.log(title + u" is larger than the maxsize for this category, skipping. (Size: " + size +")", logger.WARNING) - - - except: - logger.log(u"No results found") - - if len(resultlist): - bestqual = sorted(resultlist, key=lambda title: title[1], reverse=True)[0] - - logger.log(u"Downloading: " + bestqual[0]) - downloadurl = bestqual[2] - - linkparams = {} - - linkparams["mode"] = "addurl" - - if sab_apikey != '': - linkparams["apikey"] = sab_apikey - if sab_username != '': - linkparams["ma_username"] = sab_username - if sab_password != '': - linkparams["ma_password"] = sab_password - if sab_category != '': - linkparams["cat"] = sab_category - - linkparams["name"] = downloadurl - - saburl = 'http://' + sab_host + '/sabnzbd/api?' + urllib.urlencode(linkparams) - logger.log(u"Sending link to SABNZBD: " + saburl) - - try: - urllib.urlopen(saburl) - - except: - logger.log(u"Unable to send link. Are you sure the host address is correct?", logger.ERROR) - - c.execute('UPDATE albums SET status = "Snatched" WHERE AlbumID="%s"' % albums[2]) - c.execute('INSERT INTO snatched VALUES( ?, ?, ?, ?, CURRENT_DATE, ?)', (albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched")) - conn.commit() - - else: - pass - - c.close() - \ No newline at end of file diff --git a/templates.py b/templates.py deleted file mode 100644 index b7bd364c..00000000 --- a/templates.py +++ /dev/null @@ -1,43 +0,0 @@ -from headphones import web_root - -_header = ''' - - - Headphones - - - - - -
''' - -_logobar = ''' - -
- ''' - -_nav = '''
''' - -_footer = ''' -
- - ''' \ No newline at end of file diff --git a/threadtools.py b/threadtools.py deleted file mode 100644 index c25f7515..00000000 --- a/threadtools.py +++ /dev/null @@ -1,41 +0,0 @@ -from cherrypy.process.plugins import SimplePlugin -from apscheduler.scheduler import Scheduler - -import os -import time -import threading -import Queue - -class threadtool(SimplePlugin): - - sched = Scheduler() - thread = None - - def __init__(self, bus): - SimplePlugin.__init__(self, bus) - - def start(self): - self.running = True - if not self.thread: - self.thread = threading.Thread(target=self.run) - self.thread.start() - self.sched.start() - start.priority = 80 - - def stop(self): - self.running = False - if self.thread: - self.thread.join() - self.thread = None - self.sched.shutdown() - stop.priority = 10 - - def run(self): - import updater - import searcher - import mover - from webServer import database - if os.path.exists(database): - self.sched.add_cron_job(updater.dbUpdate, hour=4, minute=0, second=0) - self.sched.add_interval_job(searcher.searchNZB, hours=12) - #self.sched.add_interval_job(mover.moveFiles, minutes=10) diff --git a/updater.py b/updater.py deleted file mode 100644 index 18217da5..00000000 --- a/updater.py +++ /dev/null @@ -1,76 +0,0 @@ -from webServer import database -import musicbrainz2.webservice as ws -import musicbrainz2.model as m -import musicbrainz2.utils as u -from mb import getReleaseGroup -import sqlite3 -import time - -import logger - -def dbUpdate(): - - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('SELECT ArtistID, ArtistName from artists WHERE Status="Active"') - - activeartists = c.fetchall() - - i = 0 - - while i < len(activeartists): - - artistid = activeartists[i][0] - artistname = activeartists[i][1] - logger.log(u"Updating album information for artist: " + artistname) - - c.execute('SELECT AlbumID from albums WHERE ArtistID="%s"' % artistid) - albumlist = c.fetchall() - - inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), releaseGroups=True) - artist = ws.Query().getArtistById(artistid, inc) - - for rg in artist.getReleaseGroups(): - - rgid = u.extractUuid(rg.id) - releaseid = getReleaseGroup(rgid) - inc = ws.ReleaseIncludes(artist=True, releaseEvents= True, tracks= True, releaseGroup=True) - results = ws.Query().getReleaseById(releaseid, inc) - - if any(releaseid in x for x in albumlist): - - logger.log(results.title + " already exists in the database. Updating ASIN, Release Date, Tracks") - - c.execute('UPDATE albums SET AlbumASIN="%s", ReleaseDate="%s" WHERE AlbumID="%s"' % (results.asin, results.getEarliestReleaseDate(), u.extractUuid(results.id))) - - for track in results.tracks: - c.execute('UPDATE tracks SET TrackDuration="%s" WHERE AlbumID="%s" AND TrackID="%s"' % (track.duration, u.extractUuid(results.id), u.extractUuid(track.id))) - conn.commit() - - else: - - logger.log(u"New album found! Adding "+results.title+"to the database...") - c.execute('INSERT INTO albums VALUES( ?, ?, ?, ?, ?, CURRENT_DATE, ?, ?)', (artistid, results.artist.name, results.title, results.asin, results.getEarliestReleaseDate(), u.extractUuid(results.id), 'Skipped')) - conn.commit() - c.execute('SELECT ReleaseDate, DateAdded from albums WHERE AlbumID="%s"' % u.extractUuid(results.id)) - - latestrelease = c.fetchall() - - if latestrelease[0][0] > latestrelease[0][1]: - - c.execute('UPDATE albums SET Status = "Wanted" WHERE AlbumID="%s"' % u.extractUuid(results.id)) - - else: - pass - - for track in results.tracks: - - c.execute('INSERT INTO tracks VALUES( ?, ?, ?, ?, ?, ?, ?, ?)', (artistid, results.artist.name, results.title, results.asin, u.extractUuid(results.id), track.title, track.duration, u.extractUuid(track.id))) - conn.commit() - time.sleep(1) - i += 1 - - conn.commit() - c.close() - conn.close() - diff --git a/webServer.py b/webServer.py deleted file mode 100644 index ce7b3e6d..00000000 --- a/webServer.py +++ /dev/null @@ -1,541 +0,0 @@ -import templates -import config -import cherrypy -import musicbrainz2.webservice as ws -import musicbrainz2.model as m -import musicbrainz2.utils as u -import os -import string -import time -import datetime -import sqlite3 -import sys -import configobj -from headphones import FULL_PATH, config_file -from mb import getReleaseGroup -import logger - -database = os.path.join(FULL_PATH, 'headphones.db') - -class Headphones: - - def index(self): - raise cherrypy.HTTPRedirect("home") - index.exposed=True - - def home(self): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('SELECT ArtistName, ArtistID, Status from artists order by ArtistSortName collate nocase') - results = c.fetchall() - if len(results): - i = 0 - page.append('''
- - - - - - ''') - while i < len(results): - c.execute('''SELECT AlbumTitle, ReleaseDate, DateAdded, AlbumID from albums WHERE ArtistID='%s' order by ReleaseDate DESC''' % results[i][1]) - latestalbum = c.fetchall() - today = datetime.date.today() - if len(latestalbum) > 0: - if latestalbum[0][1] > datetime.date.isoformat(today): - newalbumName = '%s' % (latestalbum[0][3], latestalbum[0][0]) - releaseDate = '(%s)' % latestalbum[0][1] - else: - newalbumName = '%s' % (latestalbum[0][3], latestalbum[0][0]) - releaseDate = "" - if len(latestalbum) == 0: - newalbumName = 'None' - releaseDate = "" - if results[i][2] == 'Paused': - newStatus = '''%s(resume)''' % (results[i][2], results[i][1]) - else: - newStatus = '''%s(pause)''' % (results[i][2], results[i][1]) - page.append(''' - - ''' % (results[i][1], results[i][0], results[i][1], results[i][1], newStatus, newalbumName, releaseDate)) - i = i+1 - c.close() - page.append('''
Artist NameStatusUpcoming Albums
%s - (link) [delete]%s%s %s
''') - page.append(templates._footer) - - else: - page.append("""
Add some artists to the database!
""") - return page - home.exposed = True - - - def artistPage(self, ArtistID): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('''SELECT ArtistName from artists WHERE ArtistID="%s"''' % ArtistID) - artistname = c.fetchall() - c.execute('''SELECT AlbumTitle, ReleaseDate, AlbumID, Status, ArtistName, AlbumASIN from albums WHERE ArtistID="%s" order by ReleaseDate DESC''' % ArtistID) - results = c.fetchall() - c.close() - i = 0 - page.append('''
-

%s

- - - - - - - ''' % (artistname[0])) - while i < len(results): - if results[i][3] == 'Skipped': - newStatus = '''%s [want]''' % (results[i][3], results[i][2], ArtistID) - elif results[i][3] == 'Wanted': - newStatus = '''%s[skip]''' % (results[i][3], results[i][2], ArtistID) - elif results[i][3] == 'Downloaded': - newStatus = '''%s[retry]''' % (results[i][3], results[i][2], ArtistID) - elif results[i][3] == 'Snatched': - newStatus = '''%s[retry]''' % (results[i][3], results[i][2], ArtistID) - else: - newStatus = '%s' % (results[i][3]) - page.append(''' - - - ''' % (results[i][5], results[i][2], results[i][0], results[i][2], results[i][1], newStatus)) - i = i+1 - page.append('''
Album NameRelease DateStatus
%s - (link)%s%s
''') - page.append(templates._footer) - return page - artistPage.exposed = True - - - def albumPage(self, AlbumID): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('''SELECT ArtistID, ArtistName, AlbumTitle, TrackTitle, TrackDuration, TrackID, AlbumASIN from tracks WHERE AlbumID="%s"''' % AlbumID) - results = c.fetchall() - if results[0][6]: - albumart = '''


''' % results[0][6] - else: - albumart = '' - c.close() - i = 0 - page.append('''''') - - - page.append(templates._footer) - return page - - albumPage.exposed = True - - - def findArtist(self, name): - - page = [templates._header] - if len(name) == 0 or name == 'Add an artist': - raise cherrypy.HTTPRedirect("home") - else: - artistResults = ws.Query().getArtists(ws.ArtistFilter(string.replace(name, '&', '%38'), limit=8)) - if len(artistResults) == 0: - logger.log(u"No results found for " + name) - page.append('''No results!Go back''') - return page - elif len(artistResults) > 1: - page.append('''Search returned multiple artists. Click the artist you want to add:

''') - for result in artistResults: - artist = result.artist - detail = artist.getDisambiguation() - if detail: - disambiguation = '(%s)' % detail - else: - disambiguation = '' - page.append('''%s %s (more info)
''' % (u.extractUuid(artist.id), artist.name, disambiguation, u.extractUuid(artist.id))) - return page - else: - for result in artistResults: - artist = result.artist - logger.log(u"Found one artist matching your search term: " + artist.name +" ("+ artist.id+")") - raise cherrypy.HTTPRedirect("addArtist?artistid=%s" % u.extractUuid(artist.id)) - - findArtist.exposed = True - - def artistInfo(self, artistid): - page = [templates._header] - inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), releaseGroups=True) - artist = ws.Query().getArtistById(artistid, inc) - page.append('''Artist Name: %s
''' % artist.name) - page.append('''Unique ID: %s

Albums:
''' % u.extractUuid(artist.id)) - for rg in artist.getReleaseGroups(): - page.append('''%s
''' % rg.title) - return page - - artistInfo.exposed = True - - def addArtist(self, artistid): - inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), releaseGroups=True) - artist = ws.Query().getArtistById(artistid, inc) - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('SELECT ArtistID from artists') - artistlist = c.fetchall() - if any(artistid in x for x in artistlist): - page = [templates._header] - page.append('''%s has already been added. Go back.''' % artist.name) - logger.log(artist.name + u" is already in the database!", logger.WARNING) - c.close() - return page - - else: - logger.log(u"Adding " + artist.name + " to the database.") - c.execute('INSERT INTO artists VALUES( ?, ?, ?, CURRENT_DATE, ?)', (artistid, artist.name, artist.sortName, 'Active')) - for rg in artist.getReleaseGroups(): - rgid = u.extractUuid(rg.id) - - releaseid = getReleaseGroup(rgid) - - inc = ws.ReleaseIncludes(artist=True, releaseEvents= True, tracks= True, releaseGroup=True) - results = ws.Query().getReleaseById(releaseid, inc) - - logger.log(u"Now adding album: " + results.title+ " to the database") - c.execute('INSERT INTO albums VALUES( ?, ?, ?, ?, ?, CURRENT_DATE, ?, ?)', (artistid, results.artist.name, results.title, results.asin, results.getEarliestReleaseDate(), u.extractUuid(results.id), 'Skipped')) - c.execute('SELECT ReleaseDate, DateAdded from albums WHERE AlbumID="%s"' % u.extractUuid(results.id)) - latestrelease = c.fetchall() - - if latestrelease[0][0] > latestrelease[0][1]: - logger.log(results.title + u" is an upcoming album. Setting its status to 'Wanted'...") - c.execute('UPDATE albums SET Status = "Wanted" WHERE AlbumID="%s"' % u.extractUuid(results.id)) - else: - pass - - for track in results.tracks: - c.execute('INSERT INTO tracks VALUES( ?, ?, ?, ?, ?, ?, ?, ?)', (artistid, results.artist.name, results.title, results.asin, u.extractUuid(results.id), track.title, track.duration, u.extractUuid(track.id))) - time.sleep(1) - - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("home") - - addArtist.exposed = True - - def pauseArtist(self, ArtistID): - - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Pausing artist: " + ArtistID) - c.execute('UPDATE artists SET status = "Paused" WHERE ArtistId="%s"' % ArtistID) - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("home") - - pauseArtist.exposed = True - - def resumeArtist(self, ArtistID): - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Resuming artist: " + ArtistID) - c.execute('UPDATE artists SET status = "Active" WHERE ArtistId="%s"' % ArtistID) - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("home") - - resumeArtist.exposed = True - - def deleteArtist(self, ArtistID): - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Deleting all traces of artist: " + ArtistID) - c.execute('''DELETE from artists WHERE ArtistID="%s"''' % ArtistID) - c.execute('''DELETE from albums WHERE ArtistID="%s"''' % ArtistID) - c.execute('''DELETE from tracks WHERE ArtistID="%s"''' % ArtistID) - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("home") - - deleteArtist.exposed = True - - def queueAlbum(self, AlbumID, ArtistID): - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Marking album: " + AlbumID + "as wanted...") - c.execute('UPDATE albums SET status = "Wanted" WHERE AlbumID="%s"' % AlbumID) - conn.commit() - c.close() - import searcher - searcher.searchNZB(AlbumID) - raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID) - - queueAlbum.exposed = True - - def unqueueAlbum(self, AlbumID, ArtistID): - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Marking album: " + AlbumID + "as skipped...") - c.execute('UPDATE albums SET status = "Skipped" WHERE AlbumID="%s"' % AlbumID) - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID) - - unqueueAlbum.exposed = True - - def upcoming(self): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - today = datetime.date.today() - todaysql = datetime.date.isoformat(today) - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('''SELECT AlbumTitle, ReleaseDate, DateAdded, AlbumASIN, AlbumID, ArtistName, ArtistID from albums WHERE ReleaseDate > date('now') order by ReleaseDate DESC''') - albums = c.fetchall() - page.append('''
- - - - - - ''') - if len(albums) == 0: - page.append("""
Upcoming Albums

No albums are coming out soon :(
- (try adding some more artists!)
""") - - i = 0 - while i < len(albums): - - if albums[i][3]: - albumart = '''


''' % (albums[i][3], albums[i][3]) - else: - albumart = 'No Album Art... yet.' - - page.append(''' - - - ''' % (albumart, albums[i][6], albums[i][5], albums[i][4], albums[i][0], albums[i][1])) - i += 1 - page.append('''
%s%s%s (%s)
''') - if len(albums): - page.append(templates._footer) - - return page - upcoming.exposed = True - - def manage(self): - config = configobj.ConfigObj(config_file) - try: - path = config['General']['path_to_xml'] - except: - path = 'Absolute path to iTunes XML or Top-Level Music Directory' - try: - path2 = config['General']['path_to_itunes'] - except: - path2 = 'Enter a directory to scan' - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - page.append(''' -

Scan Music Library


- Where do you keep your music?

- You can put in any directory, and it will scan for audio files in that folder - (including all subdirectories)

For example: '/Users/name/Music' -

- It may take a while depending on how many files you have. You can navigate away from the page
- as soon as you click 'Submit' -

- -
- -


-

Import or Sync Your iTunes Library/Music Folder


- This is here for legacy purposes (try the Music Scanner above!)

- If you'd rather import an iTunes .xml file, you can enter the full path here.

-
- -


- ''' % (path2, path)) - page.append(templates._footer) - return page - manage.exposed = True - - def importItunes(self, path): - config = configobj.ConfigObj(config_file) - config['General']['path_to_xml'] = path - config.write() - import itunesimport - itunesimport.itunesImport(path) - raise cherrypy.HTTPRedirect("home") - importItunes.exposed = True - - def musicScan(self, path): - config = configobj.ConfigObj(config_file) - config['General']['path_to_itunes'] = path - config.write() - import itunesimport - itunesimport.scanMusic(path) - raise cherrypy.HTTPRedirect("home") - musicScan.exposed = True - - def forceUpdate(self): - import updater - updater.dbUpdate() - raise cherrypy.HTTPRedirect("home") - forceUpdate.exposed = True - - def forceSearch(self): - import searcher - searcher.searchNZB() - raise cherrypy.HTTPRedirect("home") - forceSearch.exposed = True - - - def history(self): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - conn=sqlite3.connect(database) - c=conn.cursor() - c.execute('''SELECT AlbumID, Title TEXT, Size INTEGER, URL TEXT, DateAdded TEXT, Status TEXT from snatched order by DateAdded DESC''') - snatched = c.fetchall() - page.append('''
- - - - - - ''') - if len(snatched) == 0: - page.append("""
History clear all

""") - - i = 0 - while i < len(snatched): - mb = snatched[i][2] / 1048576 - size = '%.2fM' % mb - page.append(''' - - - - - ''' % (snatched[i][5], snatched[i][1], size, snatched[i][4])) - i += 1 - page.append('''
%s%s%s%s
''') - if len(snatched): - page.append(templates._footer) - return page - history.exposed = True - - def clearhistory(self): - conn=sqlite3.connect(database) - c=conn.cursor() - logger.log(u"Clearing history") - c.execute('''DELETE from snatched''') - conn.commit() - c.close() - raise cherrypy.HTTPRedirect("history") - clearhistory.exposed = True - - def config(self): - page = [templates._header] - page.append(templates._logobar) - page.append(templates._nav) - page.append(config.form) - #page.append(templates._footer) - return page - - config.exposed = True - - - def configUpdate(self, http_host='127.0.0.1', http_username=None, http_port=8181, http_password=None, launch_browser=0, - sab_host=None, sab_username=None, sab_apikey=None, sab_password=None, sab_category=None, music_download_dir=None, - usenet_retention=None, nzbmatrix=0, nzbmatrix_username=None, nzbmatrix_apikey=None, newznab=0, newznab_host=None, newznab_apikey=None, - nzbsorg=0, nzbsorg_uid=None, nzbsorg_hash=None, include_lossless=0,flac_to_mp3=0, move_to_itunes=0, path_to_itunes=None, rename_mp3s=0, cleanup=0, add_album_art=0): - - configs = configobj.ConfigObj(config_file) - SABnzbd = configs['SABnzbd'] - General = configs['General'] - NZBMatrix = configs['NZBMatrix'] - Newznab = configs['Newznab'] - NZBsorg = configs['NZBsorg'] - General['http_host'] = http_host - General['http_port'] = http_port - General['http_username'] = http_username - General['http_password'] = http_password - General['launch_browser'] = launch_browser - SABnzbd['sab_host'] = sab_host - SABnzbd['sab_username'] = sab_username - SABnzbd['sab_password'] = sab_password - SABnzbd['sab_apikey'] = sab_apikey - SABnzbd['sab_category'] = sab_category - General['music_download_dir'] = music_download_dir - General['usenet_retention'] = usenet_retention - NZBMatrix['nzbmatrix'] = nzbmatrix - NZBMatrix['nzbmatrix_username'] = nzbmatrix_username - NZBMatrix['nzbmatrix_apikey'] = nzbmatrix_apikey - Newznab['newznab'] = newznab - Newznab['newznab_host'] = newznab_host - Newznab['newznab_apikey'] = newznab_apikey - NZBsorg['nzbsorg'] = nzbsorg - NZBsorg['nzbsorg_uid'] = nzbsorg_uid - NZBsorg['nzbsorg_hash'] = nzbsorg_hash - General['include_lossless'] = include_lossless - General['flac_to_mp3'] = flac_to_mp3 - General['move_to_itunes'] = move_to_itunes - General['path_to_itunes'] = path_to_itunes - General['rename_mp3s'] = rename_mp3s - General['cleanup'] = cleanup - General['add_album_art'] = add_album_art - - configs.write() - reload(config) - raise cherrypy.HTTPRedirect("config") - - - configUpdate.exposed = True - - def shutdown(self): - sys.exit(0) - - shutdown.exposed = True - - def restart(self): - logger.log(u"Restarting Headphones.") - restart = True - #answer = raw_input("Do you want to restart this program ? ") - #if answer.strip() in "y Y yes Yes YES".split(): - #restart = True - if restart: - python = sys.executable - os.execl(python, python, * sys.argv) - - restart.exposed = True \ No newline at end of file