diff --git a/com.headphones.headphones.plist b/com.headphones.headphones.plist new file mode 100644 index 00000000..70c64752 --- /dev/null +++ b/com.headphones.headphones.plist @@ -0,0 +1,17 @@ + + + + + Label + com.headphones.headphones + ProgramArguments + + /usr/bin/python + /Applications/Headphones/headphones.py + -q + -d + + RunAtLoad + + + \ No newline at end of file diff --git a/data/images/checkmark.png b/data/images/checkmark.png new file mode 100644 index 00000000..d265c512 Binary files /dev/null and b/data/images/checkmark.png differ diff --git a/headphones/__init__.py b/headphones/__init__.py new file mode 100644 index 00000000..5a6796d1 --- /dev/null +++ b/headphones/__init__.py @@ -0,0 +1,387 @@ +import os, sys, subprocess + +import threading +import webbrowser +import sqlite3 + +from lib.apscheduler.scheduler import Scheduler +from lib.configobj import ConfigObj + +import cherrypy + +from headphones import updater, searcher, itunesimport, versioncheck, logger + +FULL_PATH = None +PROG_DIR = None + +ARGS = None +INVOKED_COMMAND = None + +QUIET = False +DAEMON = False + +SCHED = Scheduler() + +INIT_LOCK = threading.Lock() +__INITIALIZED__ = False +started = False + +DATA_DIR = None + +CONFIG_FILE = None +CFG = None + +DB_FILE = None + +LOG_DIR = None + +HTTP_PORT = None +HTTP_HOST = None +HTTP_USERNAME = None +HTTP_PASSWORD = None +HTTP_ROOT = None +LAUNCH_BROWSER = False + +GIT_PATH = None +CURRENT_VERSION = None +LATEST_VERSION = None +COMMITS_BEHIND = None + +MUSIC_DIR = None +FOLDER_FORMAT = None +FILE_FORMAT = None +PATH_TO_XML = None +PREFER_LOSSLESS = False +FLAC_TO_MP3 = False +MOVE_FILES = False +RENAME_FILES = False +CLEANUP_FILES = False +ADD_ALBUM_ART = False +DOWNLOAD_DIR = None +USENET_RETENTION = None + +NZB_SEARCH_INTERVAL = 360 +LIBRARYSCAN_INTERVAL = 60 + +SAB_HOST = None +SAB_USERNAME = None +SAB_PASSWORD = None +SAB_APIKEY = None +SAB_CATEGORY = None + +NZBMATRIX = False +NZBMATRIX_USERNAME = None +NZBMATRIX_APIKEY = None + +NEWZNAB = False +NEWZNAB_HOST = None +NEWZNAB_APIKEY = None + +NZBSORG = False +NZBSORG_UID = None +NZBSORG_HASH = None + +def CheckSection(sec): + """ Check if INI section exists, if not create it """ + try: + CFG[sec] + return True + except: + CFG[sec] = {} + return False + +################################################################################ +# Check_setting_int # +################################################################################ +def check_setting_int(config, cfg_name, item_name, def_val): + try: + my_val = int(config[cfg_name][item_name]) + except: + my_val = def_val + try: + config[cfg_name][item_name] = my_val + except: + config[cfg_name] = {} + config[cfg_name][item_name] = my_val + logger.debug(item_name + " -> " + str(my_val)) + return my_val + +################################################################################ +# Check_setting_str # +################################################################################ +def check_setting_str(config, cfg_name, item_name, def_val, log=True): + try: + my_val = config[cfg_name][item_name] + except: + my_val = def_val + try: + config[cfg_name][item_name] = my_val + except: + config[cfg_name] = {} + config[cfg_name][item_name] = my_val + + if log: + logger.debug(item_name + " -> " + my_val) + else: + logger.debug(item_name + " -> ******") + return my_val + + +def initialize(): + + with INIT_LOCK: + + global __INITIALIZED__, FULL_PATH, PROG_DIR, QUIET, DAEMON, DATA_DIR, CONFIG_FILE, CFG, LOG_DIR, \ + HTTP_PORT, HTTP_HOST, HTTP_USERNAME, HTTP_PASSWORD, HTTP_ROOT, LAUNCH_BROWSER, GIT_PATH, \ + CURRENT_VERSION, \ + MUSIC_DIR, PREFER_LOSSLESS, FLAC_TO_MP3, MOVE_FILES, RENAME_FILES, FOLDER_FORMAT, \ + FILE_FORMAT, CLEANUP_FILES, ADD_ALBUM_ART, DOWNLOAD_DIR, USENET_RETENTION, \ + NZB_SEARCH_INTERVAL, LIBRARYSCAN_INTERVAL, \ + SAB_HOST, SAB_USERNAME, SAB_PASSWORD, SAB_APIKEY, SAB_CATEGORY, \ + NZBMATRIX, NZBMATRIX_USERNAME, NZBMATRIX_APIKEY, \ + NEWZNAB, NEWZNAB_HOST, NEWZNAB_APIKEY, \ + NZBSORG, NZBSORG_UID, NZBSORG_HASH + + if __INITIALIZED__: + return False + + # Make sure all the config sections exist + CheckSection('General') + CheckSection('SABnzbd') + CheckSection('NZBMatrix') + CheckSection('Newznab') + CheckSection('NZBsorg') + + # Set global variables based on config file or use defaults + try: + HTTP_PORT = check_setting_int(CFG, 'General', 'http_port', 8181) + except: + HTTP_PORT = 8181 + + if HTTP_PORT < 21 or HTTP_PORT > 65535: + HTTP_PORT = 8181 + + HTTP_HOST = check_setting_str(CFG, 'General', 'http_host', '0.0.0.0') + HTTP_USERNAME = check_setting_str(CFG, 'General', 'http_username', '') + HTTP_PASSWORD = check_setting_str(CFG, 'General', 'http_password', '') + HTTP_ROOT = check_setting_str(CFG, 'General', 'http_root', '/') + LAUNCH_BROWSER = bool(check_setting_int(CFG, 'General', 'launch_browser', 1)) + GIT_PATH = check_setting_str(CFG, 'General', 'git_path', '') + + MUSIC_DIR = check_setting_str(CFG, 'General', 'music_dir', '') + PREFER_LOSSLESS = bool(check_setting_int(CFG, 'General', 'prefer_lossless', 0)) + FLAC_TO_MP3 = bool(check_setting_int(CFG, 'General', 'flac_to_mp3', 0)) + MOVE_FILES = bool(check_setting_int(CFG, 'General', 'move_files', 0)) + RENAME_FILES = bool(check_setting_int(CFG, 'General', 'rename_files', 0)) + FOLDER_FORMAT = check_setting_str(CFG, 'General', 'folder_format', '%artist/%album/%track') + FILE_FORMAT = check_setting_str(CFG, 'General', 'file_format', '%tracknumber %artist - %album - %title') + CLEANUP_FILES = bool(check_setting_int(CFG, 'General', 'cleanup_files', 0)) + ADD_ALBUM_ART = bool(check_setting_int(CFG, 'General', 'add_album_art', 0)) + DOWNLOAD_DIR = check_setting_str(CFG, 'General', 'download_dir', '') + USENET_RETENTION = check_setting_int(CFG, 'General', 'usenet_retention', '') + + NZB_SEARCH_INTERVAL = check_setting_int(CFG, 'General', 'nzb_search_interval', 360) + LIBRARYSCAN_INTERVAL = check_setting_int(CFG, 'General', 'libraryscan_interval', 180) + + SAB_HOST = check_setting_str(CFG, 'SABnzbd', 'sab_host', '') + SAB_USERNAME = check_setting_str(CFG, 'SABnzbd', 'sab_username', '') + SAB_PASSWORD = check_setting_str(CFG, 'SABnzbd', 'sab_password', '') + SAB_APIKEY = check_setting_str(CFG, 'SABnzbd', 'sab_apikey', '') + SAB_CATEGORY = check_setting_str(CFG, 'SABnzbd', 'sab_category', '') + + NZBMATRIX = bool(check_setting_int(CFG, 'NZBMatrix', 'nzbmatrix', 0)) + NZBMATRIX_USERNAME = check_setting_str(CFG, 'NZBMatrix', 'nzbmatrix_username', '') + NZBMATRIX_APIKEY = check_setting_str(CFG, 'NZBMatrix', 'nzbmatrix_apikey', '') + + NEWZNAB = bool(check_setting_int(CFG, 'Newznab', 'newznab', 0)) + NEWZNAB_HOST = check_setting_str(CFG, 'Newznab', 'newznab_host', '') + NEWZNAB_APIKEY = check_setting_str(CFG, 'Newznab', 'newznab_apikey', '') + + NZBSORG = bool(check_setting_int(CFG, 'NZBsorg', 'nzbsorg', 0)) + NZBSORG_UID = check_setting_str(CFG, 'NZBsorg', 'nzbsorg_uid', '') + NZBSORG_HASH = check_setting_str(CFG, 'NZBsorg', 'nzbsorg_hash', '') + + # Get the currently installed version + CURRENT_VERSION = versioncheck.getVersion() + + # Put the log dir in the data dir for now + LOG_DIR = os.path.join(DATA_DIR, 'logs') + if not os.path.exists(LOG_DIR): + try: + os.makedirs(LOG_DIR) + except OSError: + if not QUIET: + print 'Unable to create the log directory. Logging to screen only.' + + # Start the logger, silence console logging if we need to + logger.headphones_log.initLogger(quiet=QUIET) + + # Initialize the database + logger.info('Checking to see if the database has all tables....') + try: + dbcheck() + except Exception, e: + logger.error("Can't connect to the database: %s" % e) + + __INITIALIZED__ = True + return True + +def daemonize(): + + if threading.activeCount() != 1: + logger.warn('There are %r active threads. Daemonizing may cause \ + strange behavior.' % threading.enumerate()) + + sys.stdout.flush() + sys.stderr.flush() + + # Do first fork + try: + pid = os.fork() + if pid == 0: + pass + else: + # Exit the parent process + logger.debug('Forking once...') + os._exit(0) + except OSError, e: + sys.exit("1st fork failed: %s [%d]" % (e.strerror, e.errno)) + + os.setsid() + + # Do second fork + try: + pid = os.fork() + if pid > 0: + logger.debug('Forking twice...') + os._exit(0) # Exit second parent process + except OSError, e: + sys.exit("2nd fork failed: %s [%d]" % (e.strerror, e.errno)) + + os.chdir("/") + os.umask(0) + + si = open('/dev/null', "r") + so = open('/dev/null', "a+") + se = open('/dev/null', "a+") + + os.dup2(si.fileno(), sys.stdin.fileno()) + os.dup2(so.fileno(), sys.stdout.fileno()) + os.dup2(se.fileno(), sys.stderr.fileno()) + + logger.info('Daemonized to PID: %s' % os.getpid()) + +def launch_browser(host, port, root): + + if host == '0.0.0.0': + host = 'localhost' + + try: + webbrowser.open('http://%s:%i%s' % (host, port, root)) + except Exception, e: + logger.error('Could not launch browser: %s' % e) + +def config_write(): + + new_config = ConfigObj() + new_config.filename = CONFIG_FILE + + new_config['General'] = {} + new_config['General']['http_port'] = HTTP_PORT + new_config['General']['http_host'] = HTTP_HOST + new_config['General']['http_username'] = HTTP_USERNAME + new_config['General']['http_password'] = HTTP_PASSWORD + new_config['General']['http_root'] = HTTP_ROOT + new_config['General']['launch_browser'] = int(LAUNCH_BROWSER) + new_config['General']['git_path'] = GIT_PATH + + new_config['General']['music_dir'] = MUSIC_DIR + new_config['General']['prefer_lossless'] = int(PREFER_LOSSLESS) + new_config['General']['flac_to_mp3'] = int(FLAC_TO_MP3) + new_config['General']['move_files'] = int(MOVE_FILES) + new_config['General']['rename_files'] = int(RENAME_FILES) + new_config['General']['folder_format'] = FOLDER_FORMAT + new_config['General']['file_format'] = FILE_FORMAT + new_config['General']['cleanup_files'] = int(CLEANUP_FILES) + new_config['General']['add_album_art'] = int(ADD_ALBUM_ART) + new_config['General']['download_dir'] = DOWNLOAD_DIR + new_config['General']['usenet_retention'] = USENET_RETENTION + + new_config['General']['nzb_search_interval'] = NZB_SEARCH_INTERVAL + new_config['General']['libraryscan_interval'] = LIBRARYSCAN_INTERVAL + + new_config['SABnzbd'] = {} + new_config['SABnzbd']['sab_host'] = SAB_HOST + new_config['SABnzbd']['sab_username'] = SAB_USERNAME + new_config['SABnzbd']['sab_password'] = SAB_PASSWORD + new_config['SABnzbd']['sab_apikey'] = SAB_APIKEY + new_config['SABnzbd']['sab_category'] = SAB_CATEGORY + + new_config['NZBMatrix'] = {} + new_config['NZBMatrix']['nzbmatrix'] = int(NZBMATRIX) + new_config['NZBMatrix']['nzbmatrix_username'] = NZBMATRIX_USERNAME + new_config['NZBMatrix']['nzbmatrix_apikey'] = NZBMATRIX_APIKEY + + new_config['Newznab'] = {} + new_config['Newznab']['newznab'] = int(NEWZNAB) + new_config['Newznab']['newznab_host'] = NEWZNAB_HOST + new_config['Newznab']['newznab_apikey'] = NEWZNAB_APIKEY + + new_config['NZBsorg'] = {} + new_config['NZBsorg']['nzbsorg'] = int(NZBSORG) + new_config['NZBsorg']['nzbsorg_uid'] = NZBSORG_UID + new_config['NZBsorg']['nzbsorg_hash'] = NZBSORG_HASH + + new_config.write() + + +def start(): + + global __INITIALIZED__, started + + if __INITIALIZED__: + + SCHED.add_cron_job(updater.dbUpdate, hour=4, minute=0, second=0) + SCHED.add_interval_job(searcher.searchNZB, minutes=NZB_SEARCH_INTERVAL) + SCHED.add_interval_job(itunesimport.scanMusic, minutes=LIBRARYSCAN_INTERVAL) + + SCHED.add_interval_job(versioncheck.checkGithub, minutes=60) + + SCHED.start() + + started = True + +def dbcheck(): + + conn=sqlite3.connect(DB_FILE) + c=conn.cursor() + c.execute('CREATE TABLE IF NOT EXISTS artists (ArtistID TEXT UNIQUE, ArtistName TEXT, ArtistSortName TEXT, DateAdded TEXT, Status TEXT)') + c.execute('CREATE TABLE IF NOT EXISTS albums (ArtistID TEXT, ArtistName TEXT, AlbumTitle TEXT, AlbumASIN TEXT, ReleaseDate TEXT, DateAdded TEXT, AlbumID TEXT UNIQUE, Status TEXT)') + c.execute('CREATE TABLE IF NOT EXISTS tracks (ArtistID TEXT, ArtistName TEXT, AlbumTitle TEXT, AlbumASIN TEXT, AlbumID TEXT, TrackTitle TEXT, TrackDuration, TrackID TEXT)') + c.execute('CREATE TABLE IF NOT EXISTS snatched (AlbumID TEXT, Title TEXT, Size INTEGER, URL TEXT, DateAdded TEXT, Status TEXT)') + c.execute('CREATE TABLE IF NOT EXISTS extras (ArtistID TEXT, ArtistName TEXT, AlbumTitle TEXT, AlbumASIN TEXT, ReleaseDate TEXT, DateAdded TEXT, AlbumID TEXT UNIQUE, Status TEXT)') + c.execute('CREATE TABLE IF NOT EXISTS have (ArtistName TEXT, AlbumTitle TEXT, TrackNumber TEXT, TrackTitle TEXT, TrackLength TEXT, BitRate TEXT, Genre TEXT, Date TEXT, TrackID TEXT)') + conn.commit() + c.close() + + +def shutdown(restart=False, update=False): + + cherrypy.engine.exit() + SCHED.shutdown(wait=False) + + config_write() + + if update: + versioncheck.update() + + if restart: + + popen_list = [sys.executable, FULL_PATH] + popen_list += ARGS + if '--nolaunch' not in popen_list: + popen_list += ['--nolaunch'] + logger.info('Restarting Headphones with ' + str(popen_list)) + subprocess.Popen(popen_list, cwd=os.getcwd()) + + os._exit(0) \ No newline at end of file diff --git a/headphones/helpers.py b/headphones/helpers.py new file mode 100644 index 00000000..46af6c30 --- /dev/null +++ b/headphones/helpers.py @@ -0,0 +1,17 @@ +def multikeysort(items, columns): + from operator import itemgetter + comparers = [ ((itemgetter(col[1:].strip()), -1) if col.startswith('-') else (itemgetter(col.strip()), 1)) for col in columns] + def comparer(left, right): + for fn, mult in comparers: + result = cmp(fn(left), fn(right)) + if result: + return mult * result + else: + return 0 + return sorted(items, cmp=comparer) + +def checked(variable): + if variable: + return 'Checked' + else: + return '' \ No newline at end of file diff --git a/headphones/itunesimport.py b/headphones/itunesimport.py new file mode 100644 index 00000000..b6ae32c6 --- /dev/null +++ b/headphones/itunesimport.py @@ -0,0 +1,128 @@ +from lib.pyItunes import * +from lib.configobj import ConfigObj +import lib.musicbrainz2.webservice as ws +import lib.musicbrainz2.model as m +import lib.musicbrainz2.utils as u +from headphones.mb import getReleaseGroup +import string +import time +import os +import sqlite3 +from lib.beets.mediafile import MediaFile + +import headphones +from headphones import logger + +def scanMusic(dir=None): + + if not dir: + dir = headphones.MUSIC_DIR + + results = [] + + for r,d,f in os.walk(dir): + for files in f: + if any(files.endswith(x) for x in (".mp3", ".flac", ".aac", ".ogg", ".ape")): + results.append(os.path.join(r,files)) + + logger.info(u'%i music files found' % len(results)) + + if results: + + lst = [] + + # open db connection to write songs you have + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + c.execute('''DELETE from have''') + + for song in results: + try: + f = MediaFile(song) + except: + logger.info("Could not read file: '" + song + "'", logger.ERROR) + else: + if not f.artist: + pass + else: + c.execute('INSERT INTO have VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?)', (f.artist, f.album, f.track, f.title, f.length, f.bitrate, f.genre, f.date, f.mb_trackid)) + lst.append(f.artist) + + conn.commit() + c.close() + + artistlist = {}.fromkeys(lst).keys() + logger.info(u"Preparing to import %i artists" % len(artistlist)) + importartist(artistlist) + + + + +def itunesImport(pathtoxml): + if os.path.splitext(pathtoxml)[1] == '.xml': + logger.info(u"Loading xml file from"+ pathtoxml) + pl = XMLLibraryParser(pathtoxml) + l = Library(pl.dictionary) + lst = [] + for song in l.songs: + lst.append(song.artist) + rawlist = {}.fromkeys(lst).keys() + artistlist = [f for f in rawlist if f != None] + importartist(artistlist) + else: + rawlist = os.listdir(pathtoxml) + logger.info(u"Loading artists from directory:" +pathtoxml) + exclude = ['.ds_store', 'various artists', 'untitled folder', 'va'] + artistlist = [f for f in rawlist if f.lower() not in exclude] + importartist(artistlist) + + + +def importartist(artistlist): + for name in artistlist: + logger.info(u"Querying MusicBrainz for: "+name) + artistResults = ws.Query().getArtists(ws.ArtistFilter(string.replace(name, '&', '%38'), limit=1)) + for result in artistResults: + if result.artist.name == 'Various Artists': + logger.info(u"Top result is Various Artists. Skipping.", logger.WARNING) + else: + logger.info(u"Found best match: "+result.artist.name+". Gathering album information...") + artistid = u.extractUuid(result.artist.id) + inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), releaseGroups=True) + artist = ws.Query().getArtistById(artistid, inc) + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + c.execute('SELECT ArtistID from artists') + artistlist = c.fetchall() + if any(artistid in x for x in artistlist): + logger.info(result.artist.name + u" is already in the database, skipping") + else: + c.execute('INSERT INTO artists VALUES( ?, ?, ?, CURRENT_DATE, ?)', (artistid, artist.name, artist.sortName, 'Active')) + for rg in artist.getReleaseGroups(): + rgid = u.extractUuid(rg.id) + + releaseid = getReleaseGroup(rgid) + + inc = ws.ReleaseIncludes(artist=True, releaseEvents= True, tracks= True, releaseGroup=True) + results = ws.Query().getReleaseById(releaseid, inc) + + logger.info(u"Now adding album: " + results.title+ " to the database") + c.execute('INSERT INTO albums VALUES( ?, ?, ?, ?, ?, CURRENT_DATE, ?, ?)', (artistid, results.artist.name, results.title, results.asin, results.getEarliestReleaseDate(), u.extractUuid(results.id), 'Skipped')) + conn.commit() + c.execute('SELECT ReleaseDate, DateAdded from albums WHERE AlbumID="%s"' % u.extractUuid(results.id)) + + latestrelease = c.fetchall() + + if latestrelease[0][0] > latestrelease[0][1]: + logger.info(results.title + u" is an upcoming album. Setting its status to 'Wanted'...") + c.execute('UPDATE albums SET Status = "Wanted" WHERE AlbumID="%s"' % u.extractUuid(results.id)) + else: + pass + + for track in results.tracks: + c.execute('INSERT INTO tracks VALUES( ?, ?, ?, ?, ?, ?, ?, ?)', (artistid, results.artist.name, results.title, results.asin, u.extractUuid(results.id), track.title, track.duration, u.extractUuid(track.id))) + time.sleep(1) + time.sleep(1) + + conn.commit() + c.close() \ No newline at end of file diff --git a/headphones/logger.py b/headphones/logger.py new file mode 100644 index 00000000..9c074e07 --- /dev/null +++ b/headphones/logger.py @@ -0,0 +1,77 @@ +import os +import threading +import logging +from logging import handlers + +import headphones + +MAX_SIZE = 1000000 # 1mb +MAX_FILES = 5 + + +# Simple rotating log handler that uses RotatingFileHandler +class RotatingLogger(object): + + def __init__(self, filename, max_size, max_files): + + self.filename = filename + self.max_size = max_size + self.max_files = max_files + + + def initLogger(self, quiet=False): + + l = logging.getLogger('headphones') + l.setLevel(logging.DEBUG) + + self.filename = os.path.join(headphones.LOG_DIR, self.filename) + + filehandler = handlers.RotatingFileHandler(self.filename, maxBytes=self.max_size, backupCount=self.max_files) + filehandler.setLevel(logging.DEBUG) + + fileformatter = logging.Formatter('%(asctime)s - %(levelname)-7s :: %(message)s', '%d-%b-%Y %H:%M:%S') + + filehandler.setFormatter(fileformatter) + l.addHandler(filehandler) + + if not quiet: + + consolehandler = logging.StreamHandler() + consolehandler.setLevel(logging.DEBUG) + + consoleformatter = logging.Formatter('%(asctime)s - %(levelname)-7s :: %(message)s', '%d-%b-%Y %H:%M:%S') + + consolehandler.setFormatter(consoleformatter) + l.addHandler(consolehandler) + + def log(self, message, level): + + logger = logging.getLogger('headphones') + + threadname = threading.currentThread().getName() + message = threadname + ' : ' + message + + if level == 'debug': + logger.debug(message) + elif level == 'info': + logger.info(message) + elif level == 'warn': + logger.warn(message) + else: + logger.error(message) + + +headphones_log = RotatingLogger('headphones.log', MAX_SIZE, MAX_FILES) + +def debug(message): + headphones_log.log(message, level='debug') + +def info(message): + headphones_log.log(message, level='info') + +def warn(message): + headphones_log.log(message, level='warn') + +def error(message): + headphones_log.log(message, level='error') + diff --git a/headphones/mb.py b/headphones/mb.py new file mode 100644 index 00000000..8082e086 --- /dev/null +++ b/headphones/mb.py @@ -0,0 +1,94 @@ +import time + +import lib.musicbrainz2.webservice as ws +import lib.musicbrainz2.model as m +import lib.musicbrainz2.utils as u + +from lib.musicbrainz2.webservice import WebServiceError + +from headphones.helpers import multikeysort + +q = ws.Query() + + +def findArtist(name, limit=1): + + artistlist = [] + + artistResults = q.getArtists(ws.ArtistFilter(name=name, limit=limit)) + + for result in artistResults: + + artistid = u.extractUuid(result.artist.id) + artistlist.append([result.artist.name, artistid]) + + return artistlist + +def getArtist(artistid): + + + rglist = [] + + #Get all official release groups + inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), ratings=False, releaseGroups=True) + artist = q.getArtistById(artistid, inc) + + for rg in artist.getReleaseGroups(): + + rgid = u.extractUuid(rg.id) + rglist.append([rg.title, rgid]) + + return rglist + +def getReleaseGroup(rgid): + + releaselist = [] + + inc = ws.ReleaseGroupIncludes(releases=True) + releaseGroup = q.getReleaseGroupById(rgid, inc) + + # I think for now we have to make separate queries for each release, in order + # to get more detailed release info (ASIN, track count, etc.) + for release in releaseGroup.releases: + + releaseid = u.extractUuid(release.id) + inc = ws.ReleaseIncludes(tracks=True) + + releaseResult = q.getReleaseById(releaseid, inc) + + release_dict = { + 'asin': bool(releaseResult.asin), + 'tracks': len(releaseResult.getTracks()), + 'releaseid': u.extractUuid(releaseResult.id) + } + + releaselist.append(release_dict) + time.sleep(1) + + a = multikeysort(releaselist, ['-asin', '-tracks']) + + releaseid = a[0]['releaseid'] + + return releaseid + +def getExtras(artistid): + + types = [m.Release.TYPE_EP, m.Release.TYPE_SINGLE, m.Release.TYPE_LIVE, m.Release.TYPE_REMIX, + m.Release.TYPE_COMPILATION] + + for type in types: + + inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, type), releaseGroups=True) + artist = q.getArtistById(artistid, inc) + + for rg in artist.getReleaseGroups(): + + rgid = u.extractUuid(rg.id) + releaseid = getReleaseGroup(rgid) + + inc = ws.ReleaseIncludes(artist=True, releaseEvents= True, tracks= True, releaseGroup=True) + results = ws.Query().getReleaseById(releaseid, inc) + + print results.title + print u.getReleaseTypeName(results.releaseGroup.type) + \ No newline at end of file diff --git a/headphones/mover.py b/headphones/mover.py new file mode 100644 index 00000000..9b19f0fe --- /dev/null +++ b/headphones/mover.py @@ -0,0 +1,13 @@ +import glob, os, shutil + +import headphones + +from headphones import logger + +def moveFiles(): + for root, dirs, files in os.walk(headphones.DOWNLOAD_DIR): + for file in files: + if file[-4:].lower() == '.mp3' and os.path.isfile(file): + print file + shutil.copy2(os.path.join(root, file), + os.path.join(path_to_itunes, file)) diff --git a/headphones/searcher.py b/headphones/searcher.py new file mode 100644 index 00000000..a8b653d1 --- /dev/null +++ b/headphones/searcher.py @@ -0,0 +1,186 @@ +import urllib +import string +import lib.feedparser as feedparser +import sqlite3 +import re + +import headphones +from headphones import logger + +def searchNZB(albumid=None): + + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + + if albumid: + c.execute('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted" AND AlbumID="%s"' % albumid) + else: + c.execute('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted"') + + results = c.fetchall() + + for albums in results: + + reldate = albums[3] + year = reldate[:4] + clname = string.replace(albums[0], ' & ', ' ') + clalbum = string.replace(albums[1], ' & ', ' ') + term1 = re.sub('[\.\-]', ' ', '%s %s %s' % (clname, clalbum, year)).encode('utf-8') + term = string.replace(term1, '"', '') + + logger.info(u"Searching for "+term+" since it was marked as wanted") + + resultlist = [] + + if headphones.NZBMATRIX: + + if headphones.PREFER_LOSSLESS: + categories = "23,22" + maxsize = 2000000000 + else: + categories = "22" + maxsize = 250000000 + + + params = { "page": "download", + "username": headphones.NZBMATRIX_USERNAME, + "apikey": headphones.NZBMATRIX_APIKEY, + "subcat": categories, + "age": headphones.USENET_RETENTION, + "english": 1, + "ssl": 1, + "scenename": 1, + "term": term + } + + searchURL = "http://rss.nzbmatrix.com/rss.php?" + urllib.urlencode(params) + logger.info(u"Parsing results from "+searchURL) + d = feedparser.parse(searchURL) + + for item in d.entries: + try: + url = item.link + title = item.title + size = int(item.links[1]['length']) + if size < maxsize: + resultlist.append((title, size, url)) + logger.info(u"Found " + title +" : " + url + " (Size: " + size + ")") + else: + logger.info(title + u" is larger than the maxsize for this category, skipping. (Size: " + size+")") + + + except: + logger.info(u"No results found") + + if headphones.NEWZNAB: + + if headphones.PREFER_LOSSLESS: + categories = "3040,3010" + maxsize = 2000000000 + else: + categories = "3010" + maxsize = 250000000 + + params = { "t": "search", + "apikey": headphones.NEWZNAB_APIKEY, + "cat": categories, + "maxage": headphones.USENET_RETENTION, + "q": term + } + + searchURL = headphones.NEWZNAB_HOST + '/api?' + urllib.urlencode(params) + logger.info(u"Parsing results from "+searchURL) + + d = feedparser.parse(searchURL) + + for item in d.entries: + try: + url = item.link + title = item.title + size = int(item.links[1]['length']) + if size < maxsize: + resultlist.append((title, size, url)) + logger.info(u"Found " + title +" : " + url + " (Size: " + size + ")") + else: + logger.info(title + u" is larger than the maxsize for this category, skipping. (Size: " + size+")") + + except: + logger.info(u"No results found") + + if headphones.NZBSORG: + + if headphones.PREFER_LOSSLESS: + categories = "5,3010" + maxsize = 2000000000 + else: + categories = "5" + maxsize = 250000000 + + params = { "action": "search", + "dl": 1, + "catid": categories, + "i": headphones.NZBSORG_UID, + "h": headphones.NZBSORG_HASH, + "age": headphones.USENET_RETENTION, + "q": term + } + + searchURL = 'https://secure.nzbs.org/rss.php?' + urllib.urlencode(params) + + logger.info(u"Parsing results from "+searchURL) + d = feedparser.parse(searchURL) + + for item in d.entries: + try: + url = item.link + title = item.title + size = int(item.links[1]['length']) + if size < maxsize: + resultlist.append((title, size, url)) + logger.info(u"Found " + title +" : " + url + " (Size: " + size + ")") + else: + logger.info(title + u" is larger than the maxsize for this category, skipping. (Size: " + size +")") + + + except: + logger.info(u"No results found") + + if len(resultlist): + bestqual = sorted(resultlist, key=lambda title: title[1], reverse=True)[0] + + logger.info(u"Downloading: " + bestqual[0]) + downloadurl = bestqual[2] + + linkparams = {} + + linkparams["mode"] = "addurl" + + if headphones.SAB_APIKEY: + linkparams["apikey"] = headphones.SAB_APIKEY + if headphones.SAB_USERNAME: + linkparams["ma_username"] = headphones.SAB_USERNAME + if headphones.SAB_PASSWORD: + linkparams["ma_password"] = headphones.SAB_PASSWORD + if headphones.SAB_CATEGORY: + linkparams["cat"] = headphones.SAB_CATEGORY + + linkparams["name"] = downloadurl + + saburl = 'http://' + headphones.SAB_HOST + '/sabnzbd/api?' + urllib.urlencode(linkparams) + logger.info(u"Sending link to SABNZBD: " + saburl) + + try: + urllib.urlopen(saburl) + + except: + logger.error(u"Unable to send link. Are you sure the host address is correct?") + + c.execute('UPDATE albums SET status = "Snatched" WHERE AlbumID="%s"' % albums[2]) + c.execute('INSERT INTO snatched VALUES( ?, ?, ?, ?, CURRENT_DATE, ?)', (albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched")) + conn.commit() + + else: + pass + + c.close() + \ No newline at end of file diff --git a/headphones/templates.py b/headphones/templates.py new file mode 100644 index 00000000..ecc49446 --- /dev/null +++ b/headphones/templates.py @@ -0,0 +1,280 @@ +_header = ''' + + + Headphones + + + + + +
''' + +_logobar = ''' + +
+ ''' + +_nav = '''
''' + +_footer = ''' +
+ + ''' + +configform = form = ''' +
+
+
+ Web Interface | + Download Settings | + Search Providers | + Quality & Post Processing +
+
+
+
+
+

Web Interface

+ + + + + + + + + + + + + + + + +
+

+ HTTP Host:

+
+ i.e. localhost or 0.0.0.0 +

+
+

+ HTTP Username:

+ +

+
+

+ HTTP Port:

+ +

+
+

+ HTTP Password:

+ +

+
+

Launch Browser on Startup:

+
+ +

Download Settings

+ + + + + + + + + + + + + + + + + + + + + + + +
+

SABnzbd Host:


+ + usually localhost:8080 +
+

SABnzbd Username:

+
+
+ +

SABnzbd API:

+
+
+ +

SABnzbd Password:

+
+
+ +

SABnzbd Category:

+
+
+ +

Music Download Directory:


+ + Absolute or relative path to the dir where SAB downloads your music
+ i.e. Downloads/music or /Users/name/Downloads/music
+
+
+ +

Usenet Retention:

+
+ +

Search Providers

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+

NZBMatrix:

+
+

+ NZBMatrix Username:
+ +

+
+

+ NZBMatrix API:
+ +

+
+
+ +

Newznab:

+
+
+ +

+ Newznab Host:
+
+ i.e. http://nzb.su +

+
+
+ +

+ Newznab API:
+ +

+
+
+ +

NZBs.org:

+
+
+ +

+ NZBs.org UID:
+ +

+
+
+ +

+ NZBs.org Hash:
+ +

+
+ +

Quality & Post Processing

+ + + + + + + + + + + + + + + + +
+

Album Quality:

+ Prefer lossless
+ Convert lossless to mp3 +
+

+

iTunes:

+ Move downloads to Music Folder +

+
+
+ +

Path to Music folder:
+
+ i.e. /Users/name/Music/iTunes or /Volumes/share/music +

+
+ Renaming & Metadata: +

+ Rename & add metadata +
+ Delete leftover files +

+
+
+

Album Art:

+ Add album art +
+ +


+ (Web Interface changes require a restart to take effect)

+
+
+
''' \ No newline at end of file diff --git a/headphones/updater.py b/headphones/updater.py new file mode 100644 index 00000000..59226c3e --- /dev/null +++ b/headphones/updater.py @@ -0,0 +1,77 @@ +import lib.musicbrainz2.webservice as ws +import lib.musicbrainz2.model as m +import lib.musicbrainz2.utils as u +from headphones.mb import getReleaseGroup +import sqlite3 +import time +import os + +import headphones +from headphones import logger + +def dbUpdate(): + + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + c.execute('SELECT ArtistID, ArtistName from artists WHERE Status="Active"') + + activeartists = c.fetchall() + + i = 0 + + while i < len(activeartists): + + artistid = activeartists[i][0] + artistname = activeartists[i][1] + logger.info(u"Updating album information for artist: " + artistname) + + c.execute('SELECT AlbumID from albums WHERE ArtistID="%s"' % artistid) + albumlist = c.fetchall() + + inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), releaseGroups=True) + artist = ws.Query().getArtistById(artistid, inc) + + for rg in artist.getReleaseGroups(): + + rgid = u.extractUuid(rg.id) + releaseid = getReleaseGroup(rgid) + inc = ws.ReleaseIncludes(artist=True, releaseEvents= True, tracks= True, releaseGroup=True) + results = ws.Query().getReleaseById(releaseid, inc) + + if any(releaseid in x for x in albumlist): + + logger.info(results.title + " already exists in the database. Updating ASIN, Release Date, Tracks") + + c.execute('UPDATE albums SET AlbumASIN="%s", ReleaseDate="%s" WHERE AlbumID="%s"' % (results.asin, results.getEarliestReleaseDate(), u.extractUuid(results.id))) + + for track in results.tracks: + c.execute('UPDATE tracks SET TrackDuration="%s" WHERE AlbumID="%s" AND TrackID="%s"' % (track.duration, u.extractUuid(results.id), u.extractUuid(track.id))) + conn.commit() + + else: + + logger.info(u"New album found! Adding "+results.title+"to the database...") + c.execute('INSERT INTO albums VALUES( ?, ?, ?, ?, ?, CURRENT_DATE, ?, ?)', (artistid, results.artist.name, results.title, results.asin, results.getEarliestReleaseDate(), u.extractUuid(results.id), 'Skipped')) + conn.commit() + c.execute('SELECT ReleaseDate, DateAdded from albums WHERE AlbumID="%s"' % u.extractUuid(results.id)) + + latestrelease = c.fetchall() + + if latestrelease[0][0] > latestrelease[0][1]: + + c.execute('UPDATE albums SET Status = "Wanted" WHERE AlbumID="%s"' % u.extractUuid(results.id)) + + else: + pass + + for track in results.tracks: + + c.execute('INSERT INTO tracks VALUES( ?, ?, ?, ?, ?, ?, ?, ?)', (artistid, results.artist.name, results.title, results.asin, u.extractUuid(results.id), track.title, track.duration, u.extractUuid(track.id))) + conn.commit() + time.sleep(1) + i += 1 + + conn.commit() + c.close() + conn.close() + diff --git a/headphones/versioncheck.py b/headphones/versioncheck.py new file mode 100644 index 00000000..0e10be28 --- /dev/null +++ b/headphones/versioncheck.py @@ -0,0 +1,107 @@ +import platform, subprocess, re + +import headphones +from headphones import logger + +from lib.pygithub import github + + +def runGit(args): + + if headphones.GIT_PATH: + git_locations = ['"'+headphones.GIT_PATH+'"'] + else: + git_locations = ['git'] + + if platform.system().lower() == 'darwin': + git_locations.append('/usr/local/git/bin/git') + + + output = err = None + + for cur_git in git_locations: + + cmd = cur_git+' '+args + + try: + logger.debug('Trying to execute: "' + cmd + '" with shell in ' + headphones.PROG_DIR) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, cwd=headphones.PROG_DIR) + output, err = p.communicate() + logger.debug('Git output: ' + output) + except OSError: + logger.debug('Command ' + cmd + ' didn\'t work, couldn\'t find git') + continue + + if 'not found' in output or "not recognized as an internal or external command" in output: + logger.debug('Unable to find git with command ' + cmd) + output = None + elif 'fatal:' in output or err: + logger.error('Git returned bad info. Are you sure this is a git installation?') + output = None + elif output: + break + + return (output, err) + +def getVersion(): + + output, err = runGit('rev-parse HEAD') + + if not output: + logger.error('Couldn\'t find latest installed version.') + return None + + cur_commit_hash = output.strip() + + if not re.match('^[a-z0-9]+$', cur_commit_hash): + logger.error('Output doesn\'t look like a hash, not using it') + return None + + return cur_commit_hash + + +def checkGithub(): + + commits_behind = 0 + cur_commit = headphones.CURRENT_VERSION + latest_commit = None + + gh = github.GitHub() + + for curCommit in gh.commits.forBranch('rembo10', 'headphones', 'master'): + if not latest_commit: + latest_commit = curCommit.id + if not cur_commit: + break + + if curCommit.id == cur_commit: + break + + commits_behind += 1 + + headphones.LATEST_VERSION = latest_commit + headphones.COMMITS_BEHIND = commits_behind + + if headphones.LATEST_VERSION == headphones.CURRENT_VERSION: + logger.info('Headphones is already up-to-date.') + + + +def update(): + + output, err = runGit('pull origin '+headphones.LATEST_VERSION) + + if not output: + logger.error('Couldn\'t download latest version') + + for line in output.split('\n'): + + if 'Already up-to-date.' in line: + logger.info('No update available, not updating') + logger.info('Output: ' + str(output)) + elif line.endswith('Aborting.'): + logger.error('Unable to update from git: '+line) + logger.info('Output: ' + str(output)) + + + \ No newline at end of file diff --git a/headphones/webserve.py b/headphones/webserve.py new file mode 100644 index 00000000..5c208b43 --- /dev/null +++ b/headphones/webserve.py @@ -0,0 +1,628 @@ +import os, sys + +import cherrypy + +import lib.musicbrainz2.webservice as ws +import lib.musicbrainz2.model as m +import lib.musicbrainz2.utils as u + +import string +import time +import datetime +import sqlite3 +from threading import Thread + +import headphones +from headphones.mb import getReleaseGroup +from headphones import templates, logger, searcher +from headphones.helpers import checked + + +class WebInterface(object): + + def index(self): + raise cherrypy.HTTPRedirect("home") + index.exposed=True + + def home(self): + page = [templates._header] + if headphones.LATEST_VERSION: + if headphones.CURRENT_VERSION != headphones.LATEST_VERSION: + page.append('''
A + newer version is available. You're %s commits behind. Click here to update
+ ''' % (headphones.CURRENT_VERSION, headphones.LATEST_VERSION, headphones.COMMITS_BEHIND)) + page.append(templates._logobar) + page.append(templates._nav) + + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + c.execute('SELECT ArtistName, ArtistID, Status from artists order by ArtistSortName collate nocase') + results = c.fetchall() + if len(results): + i = 0 + page.append('''
+ + + + + + ''') + while i < len(results): + c.execute('''SELECT AlbumTitle, ReleaseDate, DateAdded, AlbumID from albums WHERE ArtistID='%s' order by ReleaseDate DESC''' % results[i][1]) + latestalbum = c.fetchall() + c.execute('''SELECT TrackTitle from tracks WHERE ArtistID="%s"''' % results[i][1]) + totaltracks = len(c.fetchall()) + c.execute('''SELECT TrackTitle from have WHERE ArtistName like "%s"''' % results[i][0]) + havetracks = len(c.fetchall()) + try: + percent = (havetracks*100)/totaltracks + except ZeroDivisionError: + percent = 100 + today = datetime.date.today() + if len(latestalbum) > 0: + if latestalbum[0][1] > datetime.date.isoformat(today): + newalbumName = '%s' % (latestalbum[0][3], latestalbum[0][0]) + releaseDate = '(%s)' % latestalbum[0][1] + else: + newalbumName = '%s' % (latestalbum[0][3], latestalbum[0][0]) + releaseDate = "" + if len(latestalbum) == 0: + newalbumName = 'None' + releaseDate = "" + if results[i][2] == 'Paused': + newStatus = '''%s(resume)''' % (results[i][2], results[i][1]) + else: + newStatus = '''%s(pause)''' % (results[i][2], results[i][1]) + page.append(''' + + + + ''' % (results[i][1], results[i][0], results[i][1], results[i][1], newStatus, newalbumName, releaseDate, percent)) + i = i+1 + c.close() + page.append('''
Artist NameStatusUpcoming AlbumsHave
%s + (link) [delete]%s%s %s
''') + page.append(templates._footer) + + else: + page.append("""
Add some artists to the database!
""") + return page + home.exposed = True + + + def artistPage(self, ArtistID): + page = [templates._header] + page.append(templates._logobar) + page.append(templates._nav) + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + c.execute('''SELECT ArtistName from artists WHERE ArtistID="%s"''' % ArtistID) + artistname = c.fetchall() + c.execute('''SELECT AlbumTitle, ReleaseDate, AlbumID, Status, ArtistName, AlbumASIN from albums WHERE ArtistID="%s" order by ReleaseDate DESC''' % ArtistID) + results = c.fetchall() + c.close() + i = 0 + page.append('''
+

%s

+ + + + + + + ''' % (artistname[0])) + while i < len(results): + c.execute('''SELECT TrackTitle from tracks WHERE AlbumID="%s"''' % results[i][2]) + totaltracks = len(c.fetchall()) + c.execute('''SELECT TrackTitle from have WHERE ArtistName like "%s" AND AlbumTitle like "%s"''' % (results[i][4], results[i][0])) + havetracks = len(c.fetchall()) + try: + percent = (havetracks*100)/totaltracks + except ZeroDivisionError: + percent = 100 + if results[i][3] == 'Skipped': + newStatus = '''%s [want]''' % (results[i][3], results[i][2], ArtistID) + elif results[i][3] == 'Wanted': + newStatus = '''%s[skip]''' % (results[i][3], results[i][2], ArtistID) + elif results[i][3] == 'Downloaded': + newStatus = '''%s[retry]''' % (results[i][3], results[i][2], ArtistID) + elif results[i][3] == 'Snatched': + newStatus = '''%s[retry]''' % (results[i][3], results[i][2], ArtistID) + else: + newStatus = '%s' % (results[i][3]) + page.append(''' + + + + ''' % (results[i][5], results[i][2], results[i][0], results[i][2], results[i][1], newStatus, percent)) + i = i+1 + page.append('''
Album NameRelease DateStatusHave
%s + (link)%s%s
''') + page.append(templates._footer) + return page + artistPage.exposed = True + + + def albumPage(self, AlbumID): + page = [templates._header] + page.append(templates._logobar) + page.append(templates._nav) + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + c.execute('''SELECT ArtistID, ArtistName, AlbumTitle, TrackTitle, TrackDuration, TrackID, AlbumASIN from tracks WHERE AlbumID="%s"''' % AlbumID) + results = c.fetchall() + + if results[0][6]: + albumart = '''


''' % results[0][6] + else: + albumart = '' + c.close() + i = 0 + page.append('''
+ %s - %s
+ Download
%s
+
+ + + + + ''' % (results[0][0], results[0][1], results[0][2], AlbumID, results[0][0], albumart)) + while i < len(results): + c.execute('''SELECT TrackTitle from have WHERE ArtistName like "%s" AND AlbumTitle like "%s" AND TrackTitle like "%s"''' % (results[i][1], results[i][2], results[i][3])) + trackmatches = c.fetchall() + if len(trackmatches): + have = '' + else: + have = '' + if results[i][4]: + duration = time.strftime("%M:%S", time.gmtime(int(results[i][4])/1000)) + else: + duration = 'n/a' + page.append(''' + + + ''' % (i+1, results[i][3], results[i][5], duration, have)) + i = i+1 + page.append('''
Track #Track TitleDuration
%s%s (link)%s%s
''') + + + page.append(templates._footer) + return page + + albumPage.exposed = True + + + def findArtist(self, name): + + page = [templates._header] + if len(name) == 0 or name == 'Add an artist': + raise cherrypy.HTTPRedirect("home") + else: + artistResults = ws.Query().getArtists(ws.ArtistFilter(string.replace(name, '&', '%38'), limit=8)) + if len(artistResults) == 0: + logger.log(u"No results found for " + name) + page.append('''No results!Go back''') + return page + elif len(artistResults) > 1: + page.append('''Search returned multiple artists. Click the artist you want to add:

''') + for result in artistResults: + artist = result.artist + detail = artist.getDisambiguation() + if detail: + disambiguation = '(%s)' % detail + else: + disambiguation = '' + page.append('''%s %s (more info)
''' % (u.extractUuid(artist.id), artist.name, disambiguation, u.extractUuid(artist.id))) + return page + else: + for result in artistResults: + artist = result.artist + logger.info(u"Found one artist matching your search term: " + artist.name +" ("+ artist.id+")") + raise cherrypy.HTTPRedirect("addArtist?artistid=%s" % u.extractUuid(artist.id)) + + findArtist.exposed = True + + def artistInfo(self, artistid): + page = [templates._header] + inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), releaseGroups=True) + artist = ws.Query().getArtistById(artistid, inc) + page.append('''Artist Name: %s
''' % artist.name) + page.append('''Unique ID: %s

Albums:
''' % u.extractUuid(artist.id)) + for rg in artist.getReleaseGroups(): + page.append('''%s
''' % rg.title) + return page + + artistInfo.exposed = True + + def addArtist(self, artistid): + inc = ws.ArtistIncludes(releases=(m.Release.TYPE_OFFICIAL, m.Release.TYPE_ALBUM), releaseGroups=True) + artist = ws.Query().getArtistById(artistid, inc) + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + c.execute('SELECT ArtistID from artists') + artistlist = c.fetchall() + if any(artistid in x for x in artistlist): + page = [templates._header] + page.append('''%s has already been added. Go back.''' % artist.name) + logger.info(artist.name + u" is already in the database!") + c.close() + return page + + else: + logger.info(u"Adding " + artist.name + " to the database.") + c.execute('INSERT INTO artists VALUES( ?, ?, ?, CURRENT_DATE, ?)', (artistid, artist.name, artist.sortName, 'Active')) + for rg in artist.getReleaseGroups(): + rgid = u.extractUuid(rg.id) + + releaseid = getReleaseGroup(rgid) + + inc = ws.ReleaseIncludes(artist=True, releaseEvents= True, tracks= True, releaseGroup=True) + results = ws.Query().getReleaseById(releaseid, inc) + + logger.info(u"Now adding album: " + results.title+ " to the database") + c.execute('INSERT INTO albums VALUES( ?, ?, ?, ?, ?, CURRENT_DATE, ?, ?)', (artistid, results.artist.name, results.title, results.asin, results.getEarliestReleaseDate(), u.extractUuid(results.id), 'Skipped')) + c.execute('SELECT ReleaseDate, DateAdded from albums WHERE AlbumID="%s"' % u.extractUuid(results.id)) + latestrelease = c.fetchall() + + if latestrelease[0][0] > latestrelease[0][1]: + logger.info(results.title + u" is an upcoming album. Setting its status to 'Wanted'...") + c.execute('UPDATE albums SET Status = "Wanted" WHERE AlbumID="%s"' % u.extractUuid(results.id)) + else: + pass + + for track in results.tracks: + c.execute('INSERT INTO tracks VALUES( ?, ?, ?, ?, ?, ?, ?, ?)', (artistid, results.artist.name, results.title, results.asin, u.extractUuid(results.id), track.title, track.duration, u.extractUuid(track.id))) + time.sleep(1) + + conn.commit() + c.close() + raise cherrypy.HTTPRedirect("home") + + addArtist.exposed = True + + def pauseArtist(self, ArtistID): + + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + logger.info(u"Pausing artist: " + ArtistID) + c.execute('UPDATE artists SET status = "Paused" WHERE ArtistId="%s"' % ArtistID) + conn.commit() + c.close() + raise cherrypy.HTTPRedirect("home") + + pauseArtist.exposed = True + + def resumeArtist(self, ArtistID): + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + logger.info(u"Resuming artist: " + ArtistID) + c.execute('UPDATE artists SET status = "Active" WHERE ArtistId="%s"' % ArtistID) + conn.commit() + c.close() + raise cherrypy.HTTPRedirect("home") + + resumeArtist.exposed = True + + def deleteArtist(self, ArtistID): + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + logger.info(u"Deleting all traces of artist: " + ArtistID) + c.execute('''DELETE from artists WHERE ArtistID="%s"''' % ArtistID) + c.execute('''DELETE from albums WHERE ArtistID="%s"''' % ArtistID) + c.execute('''DELETE from tracks WHERE ArtistID="%s"''' % ArtistID) + conn.commit() + c.close() + raise cherrypy.HTTPRedirect("home") + + deleteArtist.exposed = True + + def queueAlbum(self, AlbumID, ArtistID): + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + logger.info(u"Marking album: " + AlbumID + "as wanted...") + c.execute('UPDATE albums SET status = "Wanted" WHERE AlbumID="%s"' % AlbumID) + conn.commit() + c.close() + import searcher + searcher.searchNZB(AlbumID) + raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID) + + queueAlbum.exposed = True + + def unqueueAlbum(self, AlbumID, ArtistID): + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + logger.info(u"Marking album: " + AlbumID + "as skipped...") + c.execute('UPDATE albums SET status = "Skipped" WHERE AlbumID="%s"' % AlbumID) + conn.commit() + c.close() + raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID) + + unqueueAlbum.exposed = True + + def upcoming(self): + page = [templates._header] + page.append(templates._logobar) + page.append(templates._nav) + today = datetime.date.today() + todaysql = datetime.date.isoformat(today) + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + c.execute('''SELECT AlbumTitle, ReleaseDate, DateAdded, AlbumASIN, AlbumID, ArtistName, ArtistID from albums WHERE ReleaseDate > date('now') order by ReleaseDate DESC''') + albums = c.fetchall() + c.execute('''SELECT AlbumTitle, ReleaseDate, DateAdded, AlbumASIN, AlbumID, ArtistName, ArtistID from albums WHERE Status="Wanted"''') + wanted = c.fetchall() + page.append('''
+ + + + + + ''') + if len(albums) == 0: + page.append("""
Upcoming Albums

No albums are coming out soon :(
+ (try adding some more artists!)
""") + + i = 0 + while i < len(albums): + + if albums[i][3]: + albumart = '''


''' % (albums[i][3], albums[i][3]) + else: + albumart = 'No Album Art... yet.' + + page.append(''' + + + ''' % (albumart, albums[i][6], albums[i][5], albums[i][4], albums[i][0], albums[i][1])) + i += 1 + page.append('''
%s%s%s (%s)
''') + if len(wanted): + page.append('''
+ + + + + + ''') + i = 0 + while i < len(albums): + + if albums[i][3]: + albumart = '''


''' % (albums[i][3], albums[i][3]) + else: + albumart = 'No Album Art... yet.' + + page.append(''' + + + ''' % (albumart, wanted[i][6], wanted[i][5], wanted[i][4], wanted[i][0], wanted[i][1])) + i += 1 + page.append('''
Wanted Albums

%s%s%s (%s)
''') + if len(albums): + page.append(templates._footer) + + return page + upcoming.exposed = True + + def manage(self): + if headphones.PATH_TO_XML: + path = headphones.PATH_TO_XML + else: + path = 'Absolute path to iTunes XML or Top-Level Music Directory' + if headphones.MUSIC_DIR: + path2 = headphones.MUSIC_DIR + else: + path2 = 'Enter a directory to scan' + page = [templates._header] + page.append(templates._logobar) + page.append(templates._nav) + page.append(''' +

Scan Music Library


+ Where do you keep your music?

+ You can put in any directory, and it will scan for audio files in that folder + (including all subdirectories)

For example: '/Users/name/Music' +

+ It may take a while depending on how many files you have. You can navigate away from the page
+ as soon as you click 'Submit' +

+ +
+ +


+

Import or Sync Your iTunes Library/Music Folder


+ This is here for legacy purposes (try the Music Scanner above!)

+ If you'd rather import an iTunes .xml file, you can enter the full path here.

+
+ +


+

Force Search


+ Force Check for Wanted Albums

+ Force Update Active Artists

+ Check for Headphones Updates


''' % (path2, path)) + page.append(templates._footer) + return page + manage.exposed = True + + def importItunes(self, path): + headphones.PATH_TO_XML = path + headphones.config_write() + from headphones import itunesimport + itunesimport.itunesImport(path) + raise cherrypy.HTTPRedirect("home") + importItunes.exposed = True + + def musicScan(self, path): + from headphones import itunesimport + headphones.MUSIC_DIR = path + headphones.config_write() + try: + itunesimport.scanMusic(path) + except Exception, e: + logger.error('Unable to complete the scan: %s' % e) + raise cherrypy.HTTPRedirect("home") + musicScan.exposed = True + + def forceUpdate(self): + from headphones import updater + updater.dbUpdate() + raise cherrypy.HTTPRedirect("home") + forceUpdate.exposed = True + + def forceSearch(self): + from headphones import searcher + searcher.searchNZB() + raise cherrypy.HTTPRedirect("home") + forceSearch.exposed = True + + def checkGithub(self): + from headphones import versioncheck + versioncheck.checkGithub() + raise cherrypy.HTTPRedirect("home") + checkGithub.exposed = True + + def history(self): + page = [templates._header] + page.append(templates._logobar) + page.append(templates._nav) + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + c.execute('''SELECT AlbumID, Title TEXT, Size INTEGER, URL TEXT, DateAdded TEXT, Status TEXT from snatched order by DateAdded DESC''') + snatched = c.fetchall() + page.append('''
+ + + + + + ''') + if len(snatched) == 0: + page.append("""
History clear all

""") + + i = 0 + while i < len(snatched): + mb = snatched[i][2] / 1048576 + size = '%.2fM' % mb + page.append(''' + + + + + ''' % (snatched[i][5], snatched[i][1], size, snatched[i][4])) + i += 1 + page.append('''
%s%s%s%s
''') + if len(snatched): + page.append(templates._footer) + return page + history.exposed = True + + def clearhistory(self): + conn=sqlite3.connect(headphones.DB_FILE) + c=conn.cursor() + logger.info(u"Clearing history") + c.execute('''DELETE from snatched''') + conn.commit() + c.close() + raise cherrypy.HTTPRedirect("history") + clearhistory.exposed = True + + def config(self): + page = [templates._header] + page.append(templates._logobar) + page.append(templates._nav) + page.append(templates.configform % ( + headphones.HTTP_HOST, + headphones.HTTP_USERNAME, + headphones.HTTP_PORT, + headphones.HTTP_PASSWORD, + checked(headphones.LAUNCH_BROWSER), + headphones.SAB_HOST, + headphones.SAB_USERNAME, + headphones.SAB_APIKEY, + headphones.SAB_PASSWORD, + headphones.SAB_CATEGORY, + headphones.DOWNLOAD_DIR, + headphones.USENET_RETENTION, + checked(headphones.NZBMATRIX), + headphones.NZBMATRIX_USERNAME, + headphones.NZBMATRIX_APIKEY, + checked(headphones.NEWZNAB), + headphones.NEWZNAB_HOST, + headphones.NEWZNAB_APIKEY, + checked(headphones.NZBSORG), + headphones.NZBSORG_UID, + headphones.NZBSORG_HASH, + checked(headphones.PREFER_LOSSLESS), + checked(headphones.FLAC_TO_MP3), + checked(headphones.MOVE_FILES), + headphones.MUSIC_DIR, + checked(headphones.RENAME_FILES), + checked(headphones.CLEANUP_FILES), + checked(headphones.ADD_ALBUM_ART) + )) + #page.append(templates._footer) + return page + + config.exposed = True + + + def configUpdate(self, http_host='0.0.0.0', http_username=None, http_port=8181, http_password=None, launch_browser=0, + sab_host=None, sab_username=None, sab_apikey=None, sab_password=None, sab_category=None, download_dir=None, + usenet_retention=None, nzbmatrix=0, nzbmatrix_username=None, nzbmatrix_apikey=None, newznab=0, newznab_host=None, newznab_apikey=None, + nzbsorg=0, nzbsorg_uid=None, nzbsorg_hash=None, prefer_lossless=0, flac_to_mp3=0, move_files=0, music_dir=None, rename_files=0, cleanup_files=0, add_album_art=0): + + headphones.HTTP_HOST = http_host + headphones.HTTP_PORT = http_port + headphones.HTTP_USERNAME = http_username + headphones.HTTP_PASSWORD = http_password + headphones.LAUNCH_BROWSER = launch_browser + headphones.SAB_HOST = sab_host + headphones.SAB_USERNAME = sab_username + headphones.SAB_PASSWORD = sab_password + headphones.SAB_APIKEY = sab_apikey + headphones.SAB_CATEGORY = sab_category + headphones.DOWNLOAD_DIR = download_dir + headphones.USENET_RETENTION = usenet_retention + headphones.NZBMATRIX = nzbmatrix + headphones.NZBMATRIX_USERNAME = nzbmatrix_username + headphones.NZBMATRIX_APIKEY = nzbmatrix_apikey + headphones.NEWZNAB = newznab + headphones.NEWZNAB_HOST = newznab_host + headphones.NEWZNAB_APIKEY = newznab_apikey + headphones.NZBSORG = nzbsorg + headphones.NZBSORG_UID = nzbsorg_uid + headphones.NZBSORG_HASH = nzbsorg_hash + headphones.PREFER_LOSSLESS = prefer_lossless + headphones.FLAC_TO_MP3 = flac_to_mp3 + headphones.MOVE_FILES = move_files + headphones.MUSIC_DIR = music_dir + headphones.RENAME_FILES = rename_files + headphones.CLEANUP_FILES = cleanup_files + headphones.ADD_ALBUM_ART = add_album_art + + headphones.config_write() + + raise cherrypy.HTTPRedirect("config") + + configUpdate.exposed = True + + def shutdown(self): + logger.info(u"Headphones is shutting down...") + headphones.shutdown() + return 'Shutting down Headphones...' + + shutdown.exposed = True + + def restart(self): + logger.info(u"Headphones is restarting...") + headphones.shutdown(restart=True) + return 'Restarting Headphones...' + + restart.exposed = True + + def update(self): + logger.info('Headphones is updating...') + headphones.shutdown(restart=True, update=True) + return 'Updating Headphones...' \ No newline at end of file diff --git a/headphones/webstart.py b/headphones/webstart.py new file mode 100644 index 00000000..f8b3c2e7 --- /dev/null +++ b/headphones/webstart.py @@ -0,0 +1,63 @@ +import os +import sys + +import cherrypy + +import headphones + +from headphones.webserve import WebInterface + +def initialize(options={}): + + + cherrypy.config.update({ + 'log.screen': False, + 'server.thread_pool': 10, + 'server.socket_port': options['http_port'], + 'server.socket_host': options['http_host'], + 'engine.autoreload_on': False, + }) + + conf = { + '/': { + 'tools.staticdir.root': os.path.join(headphones.PROG_DIR, 'data') + }, + '/images':{ + 'tools.staticdir.on': True, + 'tools.staticdir.dir': "images" + }, + '/css':{ + 'tools.staticdir.on': True, + 'tools.staticdir.dir': "css" + }, + '/js':{ + 'tools.staticdir.on': True, + 'tools.staticdir.dir': "js" + } + } + + + if options['http_password'] != "": + conf['/'].update({ + 'tools.auth_basic.on': True, + 'tools.auth_basic.realm': 'Headphones', + 'tools.auth_basic.checkpassword': cherrypy.lib.auth_basic.checkpassword_dict( + {options['http_username']:options['http_password']}) + }) + + + # Prevent time-outs + cherrypy.engine.timeout_monitor.unsubscribe() + + cherrypy.tree.mount(WebInterface(), options['http_root'], config = conf) + + try: + cherrypy.process.servers.check_port(options['http_host'], options['http_port']) + cherrypy.server.start() + except IOError: + print 'Failed to start on port: %i. Is something else running?' % (options['http_port']) + sys.exit(0) + + cherrypy.server.wait() + + \ No newline at end of file diff --git a/lib/apscheduler/__init__.py b/lib/apscheduler/__init__.py new file mode 100644 index 00000000..6b502147 --- /dev/null +++ b/lib/apscheduler/__init__.py @@ -0,0 +1,3 @@ +version_info = (2, 0, 0, 'rc', 2) +version = '.'.join(str(n) for n in version_info[:3]) +release = version + ''.join(str(n) for n in version_info[3:]) diff --git a/lib/apscheduler/events.py b/lib/apscheduler/events.py new file mode 100644 index 00000000..80bde8e6 --- /dev/null +++ b/lib/apscheduler/events.py @@ -0,0 +1,64 @@ +__all__ = ('EVENT_SCHEDULER_START', 'EVENT_SCHEDULER_SHUTDOWN', + 'EVENT_JOBSTORE_ADDED', 'EVENT_JOBSTORE_REMOVED', + 'EVENT_JOBSTORE_JOB_ADDED', 'EVENT_JOBSTORE_JOB_REMOVED', + 'EVENT_JOB_EXECUTED', 'EVENT_JOB_ERROR', 'EVENT_JOB_MISSED', + 'EVENT_ALL', 'SchedulerEvent', 'JobStoreEvent', 'JobEvent') + + +EVENT_SCHEDULER_START = 1 # The scheduler was started +EVENT_SCHEDULER_SHUTDOWN = 2 # The scheduler was shut down +EVENT_JOBSTORE_ADDED = 4 # A job store was added to the scheduler +EVENT_JOBSTORE_REMOVED = 8 # A job store was removed from the scheduler +EVENT_JOBSTORE_JOB_ADDED = 16 # A job was added to a job store +EVENT_JOBSTORE_JOB_REMOVED = 32 # A job was removed from a job store +EVENT_JOB_EXECUTED = 64 # A job was executed successfully +EVENT_JOB_ERROR = 128 # A job raised an exception during execution +EVENT_JOB_MISSED = 256 # A job's execution was missed +EVENT_ALL = (EVENT_SCHEDULER_START | EVENT_SCHEDULER_SHUTDOWN | + EVENT_JOBSTORE_ADDED | EVENT_JOBSTORE_REMOVED | + EVENT_JOBSTORE_JOB_ADDED | EVENT_JOBSTORE_JOB_REMOVED | + EVENT_JOB_EXECUTED | EVENT_JOB_ERROR | EVENT_JOB_MISSED) + + +class SchedulerEvent(object): + """ + An event that concerns the scheduler itself. + + :var code: the type code of this event + """ + def __init__(self, code): + self.code = code + + +class JobStoreEvent(SchedulerEvent): + """ + An event that concerns job stores. + + :var alias: the alias of the job store involved + :var job: the new job if a job was added + """ + def __init__(self, code, alias, job=None): + SchedulerEvent.__init__(self, code) + self.alias = alias + if job: + self.job = job + + +class JobEvent(SchedulerEvent): + """ + An event that concerns the execution of individual jobs. + + :var job: the job instance in question + :var scheduled_run_time: the time when the job was scheduled to be run + :var retval: the return value of the successfully executed job + :var exception: the exception raised by the job + :var traceback: the traceback object associated with the exception + """ + def __init__(self, code, job, scheduled_run_time, retval=None, + exception=None, traceback=None): + SchedulerEvent.__init__(self, code) + self.job = job + self.scheduled_run_time = scheduled_run_time + self.retval = retval + self.exception = exception + self.traceback = traceback diff --git a/lib/apscheduler/job.py b/lib/apscheduler/job.py new file mode 100644 index 00000000..c863bc3b --- /dev/null +++ b/lib/apscheduler/job.py @@ -0,0 +1,134 @@ +""" +Jobs represent scheduled tasks. +""" + +from threading import Lock +from datetime import timedelta + +from lib.apscheduler.util import to_unicode, ref_to_obj, get_callable_name,\ + obj_to_ref + + +class MaxInstancesReachedError(Exception): + pass + + +class Job(object): + """ + Encapsulates the actual Job along with its metadata. Job instances + are created by the scheduler when adding jobs, and it should not be + directly instantiated. + + :param trigger: trigger that determines the execution times + :param func: callable to call when the trigger is triggered + :param args: list of positional arguments to call func with + :param kwargs: dict of keyword arguments to call func with + :param name: name of the job (optional) + :param misfire_grace_time: seconds after the designated run time that + the job is still allowed to be run + :param coalesce: run once instead of many times if the scheduler determines + that the job should be run more than once in succession + :param max_runs: maximum number of times this job is allowed to be + triggered + :param max_instances: maximum number of concurrently running + instances allowed for this job + """ + id = None + next_run_time = None + + def __init__(self, trigger, func, args, kwargs, misfire_grace_time, + coalesce, name=None, max_runs=None, max_instances=1): + if not trigger: + raise ValueError('The trigger must not be None') + if not hasattr(func, '__call__'): + raise TypeError('func must be callable') + if not hasattr(args, '__getitem__'): + raise TypeError('args must be a list-like object') + if not hasattr(kwargs, '__getitem__'): + raise TypeError('kwargs must be a dict-like object') + if misfire_grace_time <= 0: + raise ValueError('misfire_grace_time must be a positive value') + if max_runs is not None and max_runs <= 0: + raise ValueError('max_runs must be a positive value') + if max_instances <= 0: + raise ValueError('max_instances must be a positive value') + + self._lock = Lock() + + self.trigger = trigger + self.func = func + self.args = args + self.kwargs = kwargs + self.name = to_unicode(name or get_callable_name(func)) + self.misfire_grace_time = misfire_grace_time + self.coalesce = coalesce + self.max_runs = max_runs + self.max_instances = max_instances + self.runs = 0 + self.instances = 0 + + def compute_next_run_time(self, now): + if self.runs == self.max_runs: + self.next_run_time = None + else: + self.next_run_time = self.trigger.get_next_fire_time(now) + + return self.next_run_time + + def get_run_times(self, now): + """ + Computes the scheduled run times between ``next_run_time`` and ``now``. + """ + run_times = [] + run_time = self.next_run_time + increment = timedelta(microseconds=1) + while ((not self.max_runs or self.runs < self.max_runs) and + run_time and run_time <= now): + run_times.append(run_time) + run_time = self.trigger.get_next_fire_time(run_time + increment) + + return run_times + + def add_instance(self): + self._lock.acquire() + try: + if self.instances == self.max_instances: + raise MaxInstancesReachedError + self.instances += 1 + finally: + self._lock.release() + + def remove_instance(self): + self._lock.acquire() + try: + assert self.instances > 0, 'Already at 0 instances' + self.instances -= 1 + finally: + self._lock.release() + + def __getstate__(self): + # Prevents the unwanted pickling of transient or unpicklable variables + state = self.__dict__.copy() + state.pop('instances', None) + state.pop('func', None) + state.pop('_lock', None) + state['func_ref'] = obj_to_ref(self.func) + return state + + def __setstate__(self, state): + state['instances'] = 0 + state['func'] = ref_to_obj(state.pop('func_ref')) + state['_lock'] = Lock() + self.__dict__ = state + + def __eq__(self, other): + if isinstance(other, Job): + return self.id is not None and other.id == self.id or self is other + return NotImplemented + + def __repr__(self): + return '' % (self.name, repr(self.trigger)) + + def __str__(self): + return '%s (trigger: %s, next run at: %s)' % (self.name, + str(self.trigger), str(self.next_run_time)) diff --git a/lib/apscheduler/jobstores/__init__.py b/lib/apscheduler/jobstores/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/apscheduler/jobstores/base.py b/lib/apscheduler/jobstores/base.py new file mode 100644 index 00000000..f0a16ddb --- /dev/null +++ b/lib/apscheduler/jobstores/base.py @@ -0,0 +1,25 @@ +""" +Abstract base class that provides the interface needed by all job stores. +Job store methods are also documented here. +""" + + +class JobStore(object): + def add_job(self, job): + """Adds the given job from this store.""" + raise NotImplementedError + + def update_job(self, job): + """Persists the running state of the given job.""" + raise NotImplementedError + + def remove_job(self, job): + """Removes the given jobs from this store.""" + raise NotImplementedError + + def load_jobs(self): + """Loads jobs from this store into memory.""" + raise NotImplementedError + + def close(self): + """Frees any resources still bound to this job store.""" diff --git a/lib/apscheduler/jobstores/mongodb_store.py b/lib/apscheduler/jobstores/mongodb_store.py new file mode 100644 index 00000000..d1093860 --- /dev/null +++ b/lib/apscheduler/jobstores/mongodb_store.py @@ -0,0 +1,84 @@ +""" +Stores jobs in a MongoDB database. +""" +import logging + +from lib.apscheduler.jobstores.base import JobStore +from lib.apscheduler.job import Job + +try: + import cPickle as pickle +except ImportError: # pragma: nocover + import pickle + +try: + from bson.binary import Binary + from pymongo.connection import Connection +except ImportError: # pragma: nocover + raise ImportError('MongoDBJobStore requires PyMongo installed') + +logger = logging.getLogger(__name__) + + +class MongoDBJobStore(JobStore): + def __init__(self, database='apscheduler', collection='jobs', + connection=None, pickle_protocol=pickle.HIGHEST_PROTOCOL, + **connect_args): + self.jobs = [] + self.pickle_protocol = pickle_protocol + + if not database: + raise ValueError('The "database" parameter must not be empty') + if not collection: + raise ValueError('The "collection" parameter must not be empty') + + if connection: + self.connection = connection + else: + self.connection = Connection(**connect_args) + + self.collection = self.connection[database][collection] + + def add_job(self, job): + job_dict = job.__getstate__() + job_dict['trigger'] = Binary(pickle.dumps(job.trigger, + self.pickle_protocol)) + job_dict['args'] = Binary(pickle.dumps(job.args, + self.pickle_protocol)) + job_dict['kwargs'] = Binary(pickle.dumps(job.kwargs, + self.pickle_protocol)) + job.id = self.collection.insert(job_dict) + self.jobs.append(job) + + def remove_job(self, job): + self.collection.remove(job.id) + self.jobs.remove(job) + + def load_jobs(self): + jobs = [] + for job_dict in self.collection.find(): + try: + job = Job.__new__(Job) + job_dict['id'] = job_dict.pop('_id') + job_dict['trigger'] = pickle.loads(job_dict['trigger']) + job_dict['args'] = pickle.loads(job_dict['args']) + job_dict['kwargs'] = pickle.loads(job_dict['kwargs']) + job.__setstate__(job_dict) + jobs.append(job) + except Exception: + job_name = job_dict.get('name', '(unknown)') + logger.exception('Unable to restore job "%s"', job_name) + self.jobs = jobs + + def update_job(self, job): + spec = {'_id': job.id} + document = {'$set': {'next_run_time': job.next_run_time}, + '$inc': {'runs': 1}} + self.collection.update(spec, document) + + def close(self): + self.connection.disconnect() + + def __repr__(self): + connection = self.collection.database.connection + return '<%s (connection=%s)>' % (self.__class__.__name__, connection) diff --git a/lib/apscheduler/jobstores/ram_store.py b/lib/apscheduler/jobstores/ram_store.py new file mode 100644 index 00000000..1c3c667e --- /dev/null +++ b/lib/apscheduler/jobstores/ram_store.py @@ -0,0 +1,25 @@ +""" +Stores jobs in an array in RAM. Provides no persistence support. +""" + +from lib.apscheduler.jobstores.base import JobStore + + +class RAMJobStore(JobStore): + def __init__(self): + self.jobs = [] + + def add_job(self, job): + self.jobs.append(job) + + def update_job(self, job): + pass + + def remove_job(self, job): + self.jobs.remove(job) + + def load_jobs(self): + pass + + def __repr__(self): + return '<%s>' % (self.__class__.__name__) diff --git a/lib/apscheduler/jobstores/shelve_store.py b/lib/apscheduler/jobstores/shelve_store.py new file mode 100644 index 00000000..f29d53cb --- /dev/null +++ b/lib/apscheduler/jobstores/shelve_store.py @@ -0,0 +1,65 @@ +""" +Stores jobs in a file governed by the :mod:`shelve` module. +""" + +import shelve +import pickle +import random +import logging + +from lib.apscheduler.jobstores.base import JobStore +from lib.apscheduler.job import Job +from lib.apscheduler.util import itervalues + +logger = logging.getLogger(__name__) + + +class ShelveJobStore(JobStore): + MAX_ID = 1000000 + + def __init__(self, path, pickle_protocol=pickle.HIGHEST_PROTOCOL): + self.jobs = [] + self.path = path + self.pickle_protocol = pickle_protocol + self.store = shelve.open(path, 'c', self.pickle_protocol) + + def _generate_id(self): + id = None + while not id: + id = str(random.randint(1, self.MAX_ID)) + if not id in self.store: + return id + + def add_job(self, job): + job.id = self._generate_id() + self.jobs.append(job) + self.store[job.id] = job.__getstate__() + + def update_job(self, job): + job_dict = self.store[job.id] + job_dict['next_run_time'] = job.next_run_time + job_dict['runs'] = job.runs + self.store[job.id] = job_dict + + def remove_job(self, job): + del self.store[job.id] + self.jobs.remove(job) + + def load_jobs(self): + jobs = [] + for job_dict in itervalues(self.store): + try: + job = Job.__new__(Job) + job.__setstate__(job_dict) + jobs.append(job) + except Exception: + job_name = job_dict.get('name', '(unknown)') + logger.exception('Unable to restore job "%s"', job_name) + + self.jobs = jobs + + def close(self): + self.store.close() + + def __repr__(self): + return '<%s (path=%s)>' % (self.__class__.__name__, self.path) diff --git a/lib/apscheduler/jobstores/sqlalchemy_store.py b/lib/apscheduler/jobstores/sqlalchemy_store.py new file mode 100644 index 00000000..c0fee127 --- /dev/null +++ b/lib/apscheduler/jobstores/sqlalchemy_store.py @@ -0,0 +1,87 @@ +""" +Stores jobs in a database table using SQLAlchemy. +""" +import pickle +import logging + +from lib.apscheduler.jobstores.base import JobStore +from lib.apscheduler.job import Job + +try: + from sqlalchemy import * +except ImportError: # pragma: nocover + raise ImportError('SQLAlchemyJobStore requires SQLAlchemy installed') + +logger = logging.getLogger(__name__) + + +class SQLAlchemyJobStore(JobStore): + def __init__(self, url=None, engine=None, tablename='apscheduler_jobs', + metadata=None, pickle_protocol=pickle.HIGHEST_PROTOCOL): + self.jobs = [] + self.pickle_protocol = pickle_protocol + + if engine: + self.engine = engine + elif url: + self.engine = create_engine(url) + else: + raise ValueError('Need either "engine" or "url" defined') + + self.jobs_t = Table(tablename, metadata or MetaData(), + Column('id', Integer, + Sequence(tablename + '_id_seq', optional=True), + primary_key=True), + Column('trigger', PickleType(pickle_protocol, mutable=False), + nullable=False), + Column('func_ref', String(1024), nullable=False), + Column('args', PickleType(pickle_protocol, mutable=False), + nullable=False), + Column('kwargs', PickleType(pickle_protocol, mutable=False), + nullable=False), + Column('name', Unicode(1024), unique=True), + Column('misfire_grace_time', Integer, nullable=False), + Column('coalesce', Boolean, nullable=False), + Column('max_runs', Integer), + Column('max_instances', Integer), + Column('next_run_time', DateTime, nullable=False), + Column('runs', BigInteger)) + + self.jobs_t.create(self.engine, True) + + def add_job(self, job): + job_dict = job.__getstate__() + result = self.engine.execute(self.jobs_t.insert().values(**job_dict)) + job.id = result.inserted_primary_key[0] + self.jobs.append(job) + + def remove_job(self, job): + delete = self.jobs_t.delete().where(self.jobs_t.c.id == job.id) + self.engine.execute(delete) + self.jobs.remove(job) + + def load_jobs(self): + jobs = [] + for row in self.engine.execute(select([self.jobs_t])): + try: + job = Job.__new__(Job) + job_dict = dict(row.items()) + job.__setstate__(job_dict) + jobs.append(job) + except Exception: + job_name = job_dict.get('name', '(unknown)') + logger.exception('Unable to restore job "%s"', job_name) + self.jobs = jobs + + def update_job(self, job): + job_dict = job.__getstate__() + update = self.jobs_t.update().where(self.jobs_t.c.id == job.id).\ + values(next_run_time=job_dict['next_run_time'], + runs=job_dict['runs']) + self.engine.execute(update) + + def close(self): + self.engine.dispose() + + def __repr__(self): + return '<%s (url=%s)>' % (self.__class__.__name__, self.engine.url) diff --git a/lib/apscheduler/scheduler.py b/lib/apscheduler/scheduler.py new file mode 100644 index 00000000..461cfea4 --- /dev/null +++ b/lib/apscheduler/scheduler.py @@ -0,0 +1,559 @@ +""" +This module is the main part of the library. It houses the Scheduler class +and related exceptions. +""" + +from threading import Thread, Event, Lock +from datetime import datetime, timedelta +from logging import getLogger +import os +import sys + +from lib.apscheduler.util import * +from lib.apscheduler.triggers import SimpleTrigger, IntervalTrigger, CronTrigger +from lib.apscheduler.jobstores.ram_store import RAMJobStore +from lib.apscheduler.job import Job, MaxInstancesReachedError +from lib.apscheduler.events import * +from lib.apscheduler.threadpool import ThreadPool + +logger = getLogger(__name__) + + +class SchedulerAlreadyRunningError(Exception): + """ + Raised when attempting to start or configure the scheduler when it's + already running. + """ + + def __str__(self): + return 'Scheduler is already running' + + +class Scheduler(object): + """ + This class is responsible for scheduling jobs and triggering + their execution. + """ + + _stopped = False + _thread = None + + def __init__(self, gconfig={}, **options): + self._wakeup = Event() + self._jobstores = {} + self._jobstores_lock = Lock() + self._listeners = [] + self._listeners_lock = Lock() + self._pending_jobs = [] + self.configure(gconfig, **options) + + def configure(self, gconfig={}, **options): + """ + Reconfigures the scheduler with the given options. Can only be done + when the scheduler isn't running. + """ + if self.running: + raise SchedulerAlreadyRunningError + + # Set general options + config = combine_opts(gconfig, 'apscheduler.', options) + self.misfire_grace_time = int(config.pop('misfire_grace_time', 1)) + self.coalesce = asbool(config.pop('coalesce', True)) + self.daemonic = asbool(config.pop('daemonic', True)) + + # Configure the thread pool + if 'threadpool' in config: + self._threadpool = maybe_ref(config['threadpool']) + else: + threadpool_opts = combine_opts(config, 'threadpool.') + self._threadpool = ThreadPool(**threadpool_opts) + + # Configure job stores + jobstore_opts = combine_opts(config, 'jobstore.') + jobstores = {} + for key, value in jobstore_opts.items(): + store_name, option = key.split('.', 1) + opts_dict = jobstores.setdefault(store_name, {}) + opts_dict[option] = value + + for alias, opts in jobstores.items(): + classname = opts.pop('class') + cls = maybe_ref(classname) + jobstore = cls(**opts) + self.add_jobstore(jobstore, alias, True) + + def start(self): + """ + Starts the scheduler in a new thread. + """ + if self.running: + raise SchedulerAlreadyRunningError + + # Create a RAMJobStore as the default if there is no default job store + if not 'default' in self._jobstores: + self.add_jobstore(RAMJobStore(), 'default', True) + + # Schedule all pending jobs + for job, jobstore in self._pending_jobs: + self._real_add_job(job, jobstore, False) + del self._pending_jobs[:] + + self._stopped = False + self._thread = Thread(target=self._main_loop, name='APScheduler') + self._thread.setDaemon(self.daemonic) + self._thread.start() + + def shutdown(self, wait=True, shutdown_threadpool=True): + """ + Shuts down the scheduler and terminates the thread. + Does not interrupt any currently running jobs. + + :param wait: ``True`` to wait until all currently executing jobs have + finished (if ``shutdown_threadpool`` is also ``True``) + :param shutdown_threadpool: ``True`` to shut down the thread pool + """ + if not self.running: + return + + self._stopped = True + self._wakeup.set() + + # Shut down the thread pool + if shutdown_threadpool: + self._threadpool.shutdown(wait) + + # Wait until the scheduler thread terminates + self._thread.join() + + @property + def running(self): + return not self._stopped and self._thread and self._thread.isAlive() + + def add_jobstore(self, jobstore, alias, quiet=False): + """ + Adds a job store to this scheduler. + + :param jobstore: job store to be added + :param alias: alias for the job store + :param quiet: True to suppress scheduler thread wakeup + :type jobstore: instance of + :class:`~apscheduler.jobstores.base.JobStore` + :type alias: str + """ + self._jobstores_lock.acquire() + try: + if alias in self._jobstores: + raise KeyError('Alias "%s" is already in use' % alias) + self._jobstores[alias] = jobstore + jobstore.load_jobs() + finally: + self._jobstores_lock.release() + + # Notify listeners that a new job store has been added + self._notify_listeners(JobStoreEvent(EVENT_JOBSTORE_ADDED, alias)) + + # Notify the scheduler so it can scan the new job store for jobs + if not quiet: + self._wakeup.set() + + def remove_jobstore(self, alias): + """ + Removes the job store by the given alias from this scheduler. + + :type alias: str + """ + self._jobstores_lock.acquire() + try: + try: + del self._jobstores[alias] + except KeyError: + raise KeyError('No such job store: %s' % alias) + finally: + self._jobstores_lock.release() + + # Notify listeners that a job store has been removed + self._notify_listeners(JobStoreEvent(EVENT_JOBSTORE_REMOVED, alias)) + + def add_listener(self, callback, mask=EVENT_ALL): + """ + Adds a listener for scheduler events. When a matching event occurs, + ``callback`` is executed with the event object as its sole argument. + If the ``mask`` parameter is not provided, the callback will receive + events of all types. + + :param callback: any callable that takes one argument + :param mask: bitmask that indicates which events should be listened to + """ + self._listeners_lock.acquire() + try: + self._listeners.append((callback, mask)) + finally: + self._listeners_lock.release() + + def remove_listener(self, callback): + """ + Removes a previously added event listener. + """ + self._listeners_lock.acquire() + try: + for i, (cb, _) in enumerate(self._listeners): + if callback == cb: + del self._listeners[i] + finally: + self._listeners_lock.release() + + def _notify_listeners(self, event): + self._listeners_lock.acquire() + try: + listeners = tuple(self._listeners) + finally: + self._listeners_lock.release() + + for cb, mask in listeners: + if event.code & mask: + try: + cb(event) + except: + logger.exception('Error notifying listener') + + def _real_add_job(self, job, jobstore, wakeup): + job.compute_next_run_time(datetime.now()) + if not job.next_run_time: + raise ValueError('Not adding job since it would never be run') + + self._jobstores_lock.acquire() + try: + try: + store = self._jobstores[jobstore] + except KeyError: + raise KeyError('No such job store: %s' % jobstore) + store.add_job(job) + finally: + self._jobstores_lock.release() + + # Notify listeners that a new job has been added + event = JobStoreEvent(EVENT_JOBSTORE_JOB_ADDED, jobstore, job) + self._notify_listeners(event) + + logger.info('Added job "%s" to job store "%s"', job, jobstore) + + # Notify the scheduler about the new job + if wakeup: + self._wakeup.set() + + def add_job(self, trigger, func, args, kwargs, jobstore='default', + **options): + """ + Adds the given job to the job list and notifies the scheduler thread. + + :param trigger: alias of the job store to store the job in + :param func: callable to run at the given time + :param args: list of positional arguments to call func with + :param kwargs: dict of keyword arguments to call func with + :param jobstore: alias of the job store to store the job in + :rtype: :class:`~apscheduler.job.Job` + """ + job = Job(trigger, func, args or [], kwargs or {}, + options.pop('misfire_grace_time', self.misfire_grace_time), + options.pop('coalesce', self.coalesce), **options) + if not self.running: + self._pending_jobs.append((job, jobstore)) + logger.info('Adding job tentatively -- it will be properly ' + 'scheduled when the scheduler starts') + else: + self._real_add_job(job, jobstore, True) + return job + + def _remove_job(self, job, alias, jobstore): + jobstore.remove_job(job) + + # Notify listeners that a job has been removed + event = JobStoreEvent(EVENT_JOBSTORE_JOB_REMOVED, alias, job) + self._notify_listeners(event) + + logger.info('Removed job "%s"', job) + + def add_date_job(self, func, date, args=None, kwargs=None, **options): + """ + Schedules a job to be completed on a specific date and time. + + :param func: callable to run at the given time + :param date: the date/time to run the job at + :param name: name of the job + :param jobstore: stored the job in the named (or given) job store + :param misfire_grace_time: seconds after the designated run time that + the job is still allowed to be run + :type date: :class:`datetime.date` + :rtype: :class:`~apscheduler.job.Job` + """ + trigger = SimpleTrigger(date) + return self.add_job(trigger, func, args, kwargs, **options) + + def add_interval_job(self, func, weeks=0, days=0, hours=0, minutes=0, + seconds=0, start_date=None, args=None, kwargs=None, + **options): + """ + Schedules a job to be completed on specified intervals. + + :param func: callable to run + :param weeks: number of weeks to wait + :param days: number of days to wait + :param hours: number of hours to wait + :param minutes: number of minutes to wait + :param seconds: number of seconds to wait + :param start_date: when to first execute the job and start the + counter (default is after the given interval) + :param args: list of positional arguments to call func with + :param kwargs: dict of keyword arguments to call func with + :param name: name of the job + :param jobstore: alias of the job store to add the job to + :param misfire_grace_time: seconds after the designated run time that + the job is still allowed to be run + :rtype: :class:`~apscheduler.job.Job` + """ + interval = timedelta(weeks=weeks, days=days, hours=hours, + minutes=minutes, seconds=seconds) + trigger = IntervalTrigger(interval, start_date) + return self.add_job(trigger, func, args, kwargs, **options) + + def add_cron_job(self, func, year='*', month='*', day='*', week='*', + day_of_week='*', hour='*', minute='*', second='*', + start_date=None, args=None, kwargs=None, **options): + """ + Schedules a job to be completed on times that match the given + expressions. + + :param func: callable to run + :param year: year to run on + :param month: month to run on (0 = January) + :param day: day of month to run on + :param week: week of the year to run on + :param day_of_week: weekday to run on (0 = Monday) + :param hour: hour to run on + :param second: second to run on + :param args: list of positional arguments to call func with + :param kwargs: dict of keyword arguments to call func with + :param name: name of the job + :param jobstore: alias of the job store to add the job to + :param misfire_grace_time: seconds after the designated run time that + the job is still allowed to be run + :return: the scheduled job + :rtype: :class:`~apscheduler.job.Job` + """ + trigger = CronTrigger(year=year, month=month, day=day, week=week, + day_of_week=day_of_week, hour=hour, + minute=minute, second=second, + start_date=start_date) + return self.add_job(trigger, func, args, kwargs, **options) + + def cron_schedule(self, **options): + """ + Decorator version of :meth:`add_cron_job`. + This decorator does not wrap its host function. + Unscheduling decorated functions is possible by passing the ``job`` + attribute of the scheduled function to :meth:`unschedule_job`. + """ + def inner(func): + func.job = self.add_cron_job(func, **options) + return func + return inner + + def interval_schedule(self, **options): + """ + Decorator version of :meth:`add_interval_job`. + This decorator does not wrap its host function. + Unscheduling decorated functions is possible by passing the ``job`` + attribute of the scheduled function to :meth:`unschedule_job`. + """ + def inner(func): + func.job = self.add_interval_job(func, **options) + return func + return inner + + def get_jobs(self): + """ + Returns a list of all scheduled jobs. + + :return: list of :class:`~apscheduler.job.Job` objects + """ + self._jobstores_lock.acquire() + try: + jobs = [] + for jobstore in itervalues(self._jobstores): + jobs.extend(jobstore.jobs) + return jobs + finally: + self._jobstores_lock.release() + + def unschedule_job(self, job): + """ + Removes a job, preventing it from being run any more. + """ + self._jobstores_lock.acquire() + try: + for alias, jobstore in iteritems(self._jobstores): + if job in list(jobstore.jobs): + self._remove_job(job, alias, jobstore) + return + finally: + self._jobstores_lock.release() + + raise KeyError('Job "%s" is not scheduled in any job store' % job) + + def unschedule_func(self, func): + """ + Removes all jobs that would execute the given function. + """ + found = False + self._jobstores_lock.acquire() + try: + for alias, jobstore in iteritems(self._jobstores): + for job in list(jobstore.jobs): + if job.func == func: + self._remove_job(job, alias, jobstore) + found = True + finally: + self._jobstores_lock.release() + + if not found: + raise KeyError('The given function is not scheduled in this ' + 'scheduler') + + def print_jobs(self, out=None): + """ + Prints out a textual listing of all jobs currently scheduled on this + scheduler. + + :param out: a file-like object to print to (defaults to **sys.stdout** + if nothing is given) + """ + out = out or sys.stdout + job_strs = [] + self._jobstores_lock.acquire() + try: + for alias, jobstore in iteritems(self._jobstores): + job_strs.append('Jobstore %s:' % alias) + if jobstore.jobs: + for job in jobstore.jobs: + job_strs.append(' %s' % job) + else: + job_strs.append(' No scheduled jobs') + finally: + self._jobstores_lock.release() + + out.write(os.linesep.join(job_strs)) + + def _run_job(self, job, run_times): + """ + Acts as a harness that runs the actual job code in a thread. + """ + for run_time in run_times: + # See if the job missed its run time window, and handle possible + # misfires accordingly + difference = datetime.now() - run_time + grace_time = timedelta(seconds=job.misfire_grace_time) + if difference > grace_time: + # Notify listeners about a missed run + event = JobEvent(EVENT_JOB_MISSED, job, run_time) + self._notify_listeners(event) + logger.warning('Run time of job "%s" was missed by %s', + job, difference) + else: + try: + job.add_instance() + except MaxInstancesReachedError: + event = JobEvent(EVENT_JOB_MISSED, job, run_time) + self._notify_listeners(event) + logger.warning('Execution of job "%s" skipped: ' + 'maximum number of running instances ' + 'reached (%d)', job, job.max_instances) + break + + logger.info('Running job "%s" (scheduled at %s)', job, + run_time) + + try: + retval = job.func(*job.args, **job.kwargs) + except: + # Notify listeners about the exception + exc, tb = sys.exc_info()[1:] + event = JobEvent(EVENT_JOB_ERROR, job, run_time, + exception=exc, traceback=tb) + self._notify_listeners(event) + + logger.exception('Job "%s" raised an exception', job) + else: + # Notify listeners about successful execution + event = JobEvent(EVENT_JOB_EXECUTED, job, run_time, + retval=retval) + self._notify_listeners(event) + + logger.info('Job "%s" executed successfully', job) + + job.remove_instance() + + # If coalescing is enabled, don't attempt any further runs + if job.coalesce: + break + + def _process_jobs(self, now): + """ + Iterates through jobs in every jobstore, starts pending jobs + and figures out the next wakeup time. + """ + next_wakeup_time = None + self._jobstores_lock.acquire() + try: + for alias, jobstore in iteritems(self._jobstores): + for job in tuple(jobstore.jobs): + run_times = job.get_run_times(now) + if run_times: + self._threadpool.submit(self._run_job, job, run_times) + + # Increase the job's run count + if job.coalesce: + job.runs += 1 + else: + job.runs += len(run_times) + + # Update the job, but don't keep finished jobs around + if job.compute_next_run_time(now + timedelta(microseconds=1)): + jobstore.update_job(job) + else: + self._remove_job(job, alias, jobstore) + + if not next_wakeup_time: + next_wakeup_time = job.next_run_time + elif job.next_run_time: + next_wakeup_time = min(next_wakeup_time, + job.next_run_time) + return next_wakeup_time + finally: + self._jobstores_lock.release() + + def _main_loop(self): + """Executes jobs on schedule.""" + + logger.info('Scheduler started') + self._notify_listeners(SchedulerEvent(EVENT_SCHEDULER_START)) + + self._wakeup.clear() + while not self._stopped: + logger.debug('Looking for jobs to run') + now = datetime.now() + next_wakeup_time = self._process_jobs(now) + + # Sleep until the next job is scheduled to be run, + # a new job is added or the scheduler is stopped + if next_wakeup_time is not None: + wait_seconds = time_difference(next_wakeup_time, now) + logger.debug('Next wakeup is due at %s (in %f seconds)', + next_wakeup_time, wait_seconds) + self._wakeup.wait(wait_seconds) + else: + logger.debug('No jobs; waiting until a job is added') + self._wakeup.wait() + self._wakeup.clear() + + logger.info('Scheduler has been shut down') + self._notify_listeners(SchedulerEvent(EVENT_SCHEDULER_SHUTDOWN)) diff --git a/lib/apscheduler/threadpool.py b/lib/apscheduler/threadpool.py new file mode 100644 index 00000000..8ec47da0 --- /dev/null +++ b/lib/apscheduler/threadpool.py @@ -0,0 +1,133 @@ +""" +Generic thread pool class. Modeled after Java's ThreadPoolExecutor. +Please note that this ThreadPool does *not* fully implement the PEP 3148 +ThreadPool! +""" + +from threading import Thread, Lock, currentThread +from weakref import ref +import logging +import atexit + +try: + from queue import Queue, Empty +except ImportError: + from Queue import Queue, Empty + +logger = logging.getLogger(__name__) +_threadpools = set() + + +# Worker threads are daemonic in order to let the interpreter exit without +# an explicit shutdown of the thread pool. The following trick is necessary +# to allow worker threads to finish cleanly. +def _shutdown_all(): + for pool_ref in tuple(_threadpools): + pool = pool_ref() + if pool: + pool.shutdown() + +atexit.register(_shutdown_all) + + +class ThreadPool(object): + def __init__(self, core_threads=0, max_threads=20, keepalive=1): + """ + :param core_threads: maximum number of persistent threads in the pool + :param max_threads: maximum number of total threads in the pool + :param thread_class: callable that creates a Thread object + :param keepalive: seconds to keep non-core worker threads waiting + for new tasks + """ + self.core_threads = core_threads + self.max_threads = max(max_threads, core_threads, 1) + self.keepalive = keepalive + self._queue = Queue() + self._threads_lock = Lock() + self._threads = set() + self._shutdown = False + + _threadpools.add(ref(self)) + logger.info('Started thread pool with %d core threads and %s maximum ' + 'threads', core_threads, max_threads or 'unlimited') + + def _adjust_threadcount(self): + self._threads_lock.acquire() + try: + if self.num_threads < self.max_threads: + self._add_thread(self.num_threads < self.core_threads) + finally: + self._threads_lock.release() + + def _add_thread(self, core): + t = Thread(target=self._run_jobs, args=(core,)) + t.setDaemon(True) + t.start() + self._threads.add(t) + + def _run_jobs(self, core): + logger.debug('Started worker thread') + block = True + timeout = None + if not core: + block = self.keepalive > 0 + timeout = self.keepalive + + while True: + try: + func, args, kwargs = self._queue.get(block, timeout) + except Empty: + break + + if self._shutdown: + break + + try: + func(*args, **kwargs) + except: + logger.exception('Error in worker thread') + + self._threads_lock.acquire() + self._threads.remove(currentThread()) + self._threads_lock.release() + + logger.debug('Exiting worker thread') + + @property + def num_threads(self): + return len(self._threads) + + def submit(self, func, *args, **kwargs): + if self._shutdown: + raise RuntimeError('Cannot schedule new tasks after shutdown') + + self._queue.put((func, args, kwargs)) + self._adjust_threadcount() + + def shutdown(self, wait=True): + if self._shutdown: + return + + logging.info('Shutting down thread pool') + self._shutdown = True + _threadpools.remove(ref(self)) + + self._threads_lock.acquire() + for _ in range(self.num_threads): + self._queue.put((None, None, None)) + self._threads_lock.release() + + if wait: + self._threads_lock.acquire() + threads = tuple(self._threads) + self._threads_lock.release() + for thread in threads: + thread.join() + + def __repr__(self): + if self.max_threads: + threadcount = '%d/%d' % (self.num_threads, self.max_threads) + else: + threadcount = '%d' % self.num_threads + + return '' % (id(self), threadcount) diff --git a/lib/apscheduler/triggers/__init__.py b/lib/apscheduler/triggers/__init__.py new file mode 100644 index 00000000..a40ece20 --- /dev/null +++ b/lib/apscheduler/triggers/__init__.py @@ -0,0 +1,3 @@ +from lib.apscheduler.triggers.cron import CronTrigger +from lib.apscheduler.triggers.interval import IntervalTrigger +from lib.apscheduler.triggers.simple import SimpleTrigger diff --git a/lib/apscheduler/triggers/cron/__init__.py b/lib/apscheduler/triggers/cron/__init__.py new file mode 100644 index 00000000..665d2dae --- /dev/null +++ b/lib/apscheduler/triggers/cron/__init__.py @@ -0,0 +1,135 @@ +from datetime import date, datetime + +from lib.apscheduler.triggers.cron.fields import * +from lib.apscheduler.util import datetime_ceil, convert_to_datetime + + +class CronTrigger(object): + FIELD_NAMES = ('year', 'month', 'day', 'week', 'day_of_week', 'hour', + 'minute', 'second') + FIELDS_MAP = {'year': BaseField, + 'month': BaseField, + 'week': WeekField, + 'day': DayOfMonthField, + 'day_of_week': DayOfWeekField, + 'hour': BaseField, + 'minute': BaseField, + 'second': BaseField} + + def __init__(self, **values): + self.start_date = values.pop('start_date', None) + if self.start_date: + self.start_date = convert_to_datetime(self.start_date) + + self.fields = [] + for field_name in self.FIELD_NAMES: + if field_name in values: + exprs = values.pop(field_name) + is_default = False + elif not values: + exprs = DEFAULT_VALUES[field_name] + is_default = True + else: + exprs = '*' + is_default = True + + field_class = self.FIELDS_MAP[field_name] + field = field_class(field_name, exprs, is_default) + self.fields.append(field) + + def _increment_field_value(self, dateval, fieldnum): + """ + Increments the designated field and resets all less significant fields + to their minimum values. + + :type dateval: datetime + :type fieldnum: int + :type amount: int + :rtype: tuple + :return: a tuple containing the new date, and the number of the field + that was actually incremented + """ + i = 0 + values = {} + while i < len(self.fields): + field = self.fields[i] + if not field.REAL: + if i == fieldnum: + fieldnum -= 1 + i -= 1 + else: + i += 1 + continue + + if i < fieldnum: + values[field.name] = field.get_value(dateval) + i += 1 + elif i > fieldnum: + values[field.name] = field.get_min(dateval) + i += 1 + else: + value = field.get_value(dateval) + maxval = field.get_max(dateval) + if value == maxval: + fieldnum -= 1 + i -= 1 + else: + values[field.name] = value + 1 + i += 1 + + return datetime(**values), fieldnum + + def _set_field_value(self, dateval, fieldnum, new_value): + values = {} + for i, field in enumerate(self.fields): + if field.REAL: + if i < fieldnum: + values[field.name] = field.get_value(dateval) + elif i > fieldnum: + values[field.name] = field.get_min(dateval) + else: + values[field.name] = new_value + + return datetime(**values) + + def get_next_fire_time(self, start_date): + if self.start_date: + start_date = max(start_date, self.start_date) + next_date = datetime_ceil(start_date) + fieldnum = 0 + while 0 <= fieldnum < len(self.fields): + field = self.fields[fieldnum] + curr_value = field.get_value(next_date) + next_value = field.get_next_value(next_date) + + if next_value is None: + # No valid value was found + next_date, fieldnum = self._increment_field_value(next_date, + fieldnum - 1) + elif next_value > curr_value: + # A valid, but higher than the starting value, was found + if field.REAL: + next_date = self._set_field_value(next_date, fieldnum, + next_value) + fieldnum += 1 + else: + next_date, fieldnum = self._increment_field_value(next_date, + fieldnum) + else: + # A valid value was found, no changes necessary + fieldnum += 1 + + if fieldnum >= 0: + return next_date + + def __str__(self): + options = ["%s='%s'" % (f.name, str(f)) for f in self.fields + if not f.is_default] + return 'cron[%s]' % (', '.join(options)) + + def __repr__(self): + options = ["%s='%s'" % (f.name, str(f)) for f in self.fields + if not f.is_default] + if self.start_date: + options.append("start_date='%s'" % self.start_date.isoformat(' ')) + return '<%s (%s)>' % (self.__class__.__name__, ', '.join(options)) diff --git a/lib/apscheduler/triggers/cron/expressions.py b/lib/apscheduler/triggers/cron/expressions.py new file mode 100644 index 00000000..646d6f3c --- /dev/null +++ b/lib/apscheduler/triggers/cron/expressions.py @@ -0,0 +1,178 @@ +""" +This module contains the expressions applicable for CronTrigger's fields. +""" + +from calendar import monthrange +import re + +from lib.apscheduler.util import asint + +__all__ = ('AllExpression', 'RangeExpression', 'WeekdayRangeExpression', + 'WeekdayPositionExpression') + + +WEEKDAYS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] + + +class AllExpression(object): + value_re = re.compile(r'\*(?:/(?P\d+))?$') + + def __init__(self, step=None): + self.step = asint(step) + if self.step == 0: + raise ValueError('Increment must be higher than 0') + + def get_next_value(self, date, field): + start = field.get_value(date) + minval = field.get_min(date) + maxval = field.get_max(date) + start = max(start, minval) + + if not self.step: + next = start + else: + distance_to_next = (self.step - (start - minval)) % self.step + next = start + distance_to_next + + if next <= maxval: + return next + + def __str__(self): + if self.step: + return '*/%d' % self.step + return '*' + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self.step) + + +class RangeExpression(AllExpression): + value_re = re.compile( + r'(?P\d+)(?:-(?P\d+))?(?:/(?P\d+))?$') + + def __init__(self, first, last=None, step=None): + AllExpression.__init__(self, step) + first = asint(first) + last = asint(last) + if last is None and step is None: + last = first + if last is not None and first > last: + raise ValueError('The minimum value in a range must not be ' + 'higher than the maximum') + self.first = first + self.last = last + + def get_next_value(self, date, field): + start = field.get_value(date) + minval = field.get_min(date) + maxval = field.get_max(date) + + # Apply range limits + minval = max(minval, self.first) + if self.last is not None: + maxval = min(maxval, self.last) + start = max(start, minval) + + if not self.step: + next = start + else: + distance_to_next = (self.step - (start - minval)) % self.step + next = start + distance_to_next + + if next <= maxval: + return next + + def __str__(self): + if self.last != self.first and self.last is not None: + range = '%d-%d' % (self.first, self.last) + else: + range = str(self.first) + + if self.step: + return '%s/%d' % (range, self.step) + return range + + def __repr__(self): + args = [str(self.first)] + if self.last != self.first and self.last is not None or self.step: + args.append(str(self.last)) + if self.step: + args.append(str(self.step)) + return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) + + +class WeekdayRangeExpression(RangeExpression): + value_re = re.compile(r'(?P[a-z]+)(?:-(?P[a-z]+))?', + re.IGNORECASE) + + def __init__(self, first, last=None): + try: + first_num = WEEKDAYS.index(first.lower()) + except ValueError: + raise ValueError('Invalid weekday name "%s"' % first) + + if last: + try: + last_num = WEEKDAYS.index(last.lower()) + except ValueError: + raise ValueError('Invalid weekday name "%s"' % last) + else: + last_num = None + + RangeExpression.__init__(self, first_num, last_num) + + def __str__(self): + if self.last != self.first and self.last is not None: + return '%s-%s' % (WEEKDAYS[self.first], WEEKDAYS[self.last]) + return WEEKDAYS[self.first] + + def __repr__(self): + args = ["'%s'" % WEEKDAYS[self.first]] + if self.last != self.first and self.last is not None: + args.append("'%s'" % WEEKDAYS[self.last]) + return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) + + +class WeekdayPositionExpression(AllExpression): + options = ['1st', '2nd', '3rd', '4th', '5th', 'last'] + value_re = re.compile(r'(?P%s) +(?P(?:\d+|\w+))' + % '|'.join(options), re.IGNORECASE) + + def __init__(self, option_name, weekday_name): + try: + self.option_num = self.options.index(option_name.lower()) + except ValueError: + raise ValueError('Invalid weekday position "%s"' % option_name) + + try: + self.weekday = WEEKDAYS.index(weekday_name.lower()) + except ValueError: + raise ValueError('Invalid weekday name "%s"' % weekday_name) + + def get_next_value(self, date, field): + # Figure out the weekday of the month's first day and the number + # of days in that month + first_day_wday, last_day = monthrange(date.year, date.month) + + # Calculate which day of the month is the first of the target weekdays + first_hit_day = self.weekday - first_day_wday + 1 + if first_hit_day <= 0: + first_hit_day += 7 + + # Calculate what day of the month the target weekday would be + if self.option_num < 5: + target_day = first_hit_day + self.option_num * 7 + else: + target_day = first_hit_day + ((last_day - first_hit_day) / 7) * 7 + + if target_day <= last_day and target_day >= date.day: + return target_day + + def __str__(self): + return '%s %s' % (self.options[self.option_num], + WEEKDAYS[self.weekday]) + + def __repr__(self): + return "%s('%s', '%s')" % (self.__class__.__name__, + self.options[self.option_num], + WEEKDAYS[self.weekday]) diff --git a/lib/apscheduler/triggers/cron/fields.py b/lib/apscheduler/triggers/cron/fields.py new file mode 100644 index 00000000..24cb1b31 --- /dev/null +++ b/lib/apscheduler/triggers/cron/fields.py @@ -0,0 +1,99 @@ +""" +Fields represent CronTrigger options which map to :class:`~datetime.datetime` +fields. +""" + +from calendar import monthrange + +from lib.apscheduler.triggers.cron.expressions import * + +__all__ = ('MIN_VALUES', 'MAX_VALUES', 'DEFAULT_VALUES', 'BaseField', + 'WeekField', 'DayOfMonthField', 'DayOfWeekField') + + +MIN_VALUES = {'year': 1970, 'month': 1, 'day': 1, 'week': 1, + 'day_of_week': 0, 'hour': 0, 'minute': 0, 'second': 0} +MAX_VALUES = {'year': 2 ** 63, 'month': 12, 'day:': 31, 'week': 53, + 'day_of_week': 6, 'hour': 23, 'minute': 59, 'second': 59} +DEFAULT_VALUES = {'year': '*', 'month': 1, 'day': 1, 'week': '*', + 'day_of_week': '*', 'hour': 0, 'minute': 0, 'second': 0} + + +class BaseField(object): + REAL = True + COMPILERS = [AllExpression, RangeExpression] + + def __init__(self, name, exprs, is_default=False): + self.name = name + self.is_default = is_default + self.compile_expressions(exprs) + + def get_min(self, dateval): + return MIN_VALUES[self.name] + + def get_max(self, dateval): + return MAX_VALUES[self.name] + + def get_value(self, dateval): + return getattr(dateval, self.name) + + def get_next_value(self, dateval): + smallest = None + for expr in self.expressions: + value = expr.get_next_value(dateval, self) + if smallest is None or (value is not None and value < smallest): + smallest = value + + return smallest + + def compile_expressions(self, exprs): + self.expressions = [] + + # Split a comma-separated expression list, if any + exprs = str(exprs).strip() + if ',' in exprs: + for expr in exprs.split(','): + self.compile_expression(expr) + else: + self.compile_expression(exprs) + + def compile_expression(self, expr): + for compiler in self.COMPILERS: + match = compiler.value_re.match(expr) + if match: + compiled_expr = compiler(**match.groupdict()) + self.expressions.append(compiled_expr) + return + + raise ValueError('Unrecognized expression "%s" for field "%s"' % + (expr, self.name)) + + def __str__(self): + expr_strings = (str(e) for e in self.expressions) + return ','.join(expr_strings) + + def __repr__(self): + return "%s('%s', '%s')" % (self.__class__.__name__, self.name, + str(self)) + + +class WeekField(BaseField): + REAL = False + + def get_value(self, dateval): + return dateval.isocalendar()[1] + + +class DayOfMonthField(BaseField): + COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression] + + def get_max(self, dateval): + return monthrange(dateval.year, dateval.month)[1] + + +class DayOfWeekField(BaseField): + REAL = False + COMPILERS = BaseField.COMPILERS + [WeekdayRangeExpression] + + def get_value(self, dateval): + return dateval.weekday() diff --git a/lib/apscheduler/triggers/interval.py b/lib/apscheduler/triggers/interval.py new file mode 100644 index 00000000..a7c1ee11 --- /dev/null +++ b/lib/apscheduler/triggers/interval.py @@ -0,0 +1,39 @@ +from datetime import datetime, timedelta +from math import ceil + +from lib.apscheduler.util import convert_to_datetime, timedelta_seconds + + +class IntervalTrigger(object): + def __init__(self, interval, start_date=None): + if not isinstance(interval, timedelta): + raise TypeError('interval must be a timedelta') + if start_date: + start_date = convert_to_datetime(start_date) + + self.interval = interval + self.interval_length = timedelta_seconds(self.interval) + if self.interval_length == 0: + self.interval = timedelta(seconds=1) + self.interval_length = 1 + + if start_date is None: + self.start_date = datetime.now() + self.interval + else: + self.start_date = convert_to_datetime(start_date) + + def get_next_fire_time(self, start_date): + if start_date < self.start_date: + return self.start_date + + timediff_seconds = timedelta_seconds(start_date - self.start_date) + next_interval_num = int(ceil(timediff_seconds / self.interval_length)) + return self.start_date + self.interval * next_interval_num + + def __str__(self): + return 'interval[%s]' % str(self.interval) + + def __repr__(self): + return "<%s (interval=%s, start_date=%s)>" % ( + self.__class__.__name__, repr(self.interval), + repr(self.start_date)) diff --git a/lib/apscheduler/triggers/simple.py b/lib/apscheduler/triggers/simple.py new file mode 100644 index 00000000..702ed78b --- /dev/null +++ b/lib/apscheduler/triggers/simple.py @@ -0,0 +1,17 @@ +from lib.apscheduler.util import convert_to_datetime + + +class SimpleTrigger(object): + def __init__(self, run_date): + self.run_date = convert_to_datetime(run_date) + + def get_next_fire_time(self, start_date): + if self.run_date >= start_date: + return self.run_date + + def __str__(self): + return 'date[%s]' % str(self.run_date) + + def __repr__(self): + return '<%s (run_date=%s)>' % ( + self.__class__.__name__, repr(self.run_date)) diff --git a/lib/apscheduler/util.py b/lib/apscheduler/util.py new file mode 100644 index 00000000..af28ae49 --- /dev/null +++ b/lib/apscheduler/util.py @@ -0,0 +1,204 @@ +""" +This module contains several handy functions primarily meant for internal use. +""" + +from datetime import date, datetime, timedelta +from time import mktime +import re +import sys + +__all__ = ('asint', 'asbool', 'convert_to_datetime', 'timedelta_seconds', + 'time_difference', 'datetime_ceil', 'combine_opts', + 'get_callable_name', 'obj_to_ref', 'ref_to_obj', 'maybe_ref', + 'to_unicode', 'iteritems', 'itervalues', 'xrange') + + +def asint(text): + """ + Safely converts a string to an integer, returning None if the string + is None. + + :type text: str + :rtype: int + """ + if text is not None: + return int(text) + + +def asbool(obj): + """ + Interprets an object as a boolean value. + + :rtype: bool + """ + if isinstance(obj, str): + obj = obj.strip().lower() + if obj in ('true', 'yes', 'on', 'y', 't', '1'): + return True + if obj in ('false', 'no', 'off', 'n', 'f', '0'): + return False + raise ValueError('Unable to interpret value "%s" as boolean' % obj) + return bool(obj) + + +_DATE_REGEX = re.compile( + r'(?P\d{4})-(?P\d{1,2})-(?P\d{1,2})' + r'(?: (?P\d{1,2}):(?P\d{1,2}):(?P\d{1,2})' + r'(?:\.(?P\d{1,6}))?)?') + + +def convert_to_datetime(input): + """ + Converts the given object to a datetime object, if possible. + If an actual datetime object is passed, it is returned unmodified. + If the input is a string, it is parsed as a datetime. + + Date strings are accepted in three different forms: date only (Y-m-d), + date with time (Y-m-d H:M:S) or with date+time with microseconds + (Y-m-d H:M:S.micro). + + :rtype: datetime + """ + if isinstance(input, datetime): + return input + elif isinstance(input, date): + return datetime.fromordinal(input.toordinal()) + elif isinstance(input, str): + m = _DATE_REGEX.match(input) + if not m: + raise ValueError('Invalid date string') + values = [(k, int(v or 0)) for k, v in m.groupdict().items()] + values = dict(values) + return datetime(**values) + raise TypeError('Unsupported input type: %s' % type(input)) + + +def timedelta_seconds(delta): + """ + Converts the given timedelta to seconds. + + :type delta: timedelta + :rtype: float + """ + return delta.days * 24 * 60 * 60 + delta.seconds + \ + delta.microseconds / 1000000.0 + + +def time_difference(date1, date2): + """ + Returns the time difference in seconds between the given two + datetime objects. The difference is calculated as: date1 - date2. + + :param date1: the later datetime + :type date1: datetime + :param date2: the earlier datetime + :type date2: datetime + :rtype: float + """ + later = mktime(date1.timetuple()) + date1.microsecond / 1000000.0 + earlier = mktime(date2.timetuple()) + date2.microsecond / 1000000.0 + return later - earlier + + +def datetime_ceil(dateval): + """ + Rounds the given datetime object upwards. + + :type dateval: datetime + """ + if dateval.microsecond > 0: + return dateval + timedelta(seconds=1, + microseconds=-dateval.microsecond) + return dateval + + +def combine_opts(global_config, prefix, local_config={}): + """ + Returns a subdictionary from keys and values of ``global_config`` where + the key starts with the given prefix, combined with options from + local_config. The keys in the subdictionary have the prefix removed. + + :type global_config: dict + :type prefix: str + :type local_config: dict + :rtype: dict + """ + prefixlen = len(prefix) + subconf = {} + for key, value in global_config.items(): + if key.startswith(prefix): + key = key[prefixlen:] + subconf[key] = value + subconf.update(local_config) + return subconf + + +def get_callable_name(func): + """ + Returns the best available display name for the given function/callable. + """ + name = func.__module__ + if hasattr(func, '__self__') and func.__self__: + name += '.' + func.__self__.__name__ + elif hasattr(func, 'im_self') and func.im_self: # py2.4, 2.5 + name += '.' + func.im_self.__name__ + if hasattr(func, '__name__'): + name += '.' + func.__name__ + return name + + +def obj_to_ref(obj): + """ + Returns the path to the given object. + """ + ref = '%s:%s' % (obj.__module__, obj.__name__) + try: + obj2 = ref_to_obj(ref) + except AttributeError: + pass + else: + if obj2 == obj: + return ref + + raise ValueError('Only module level objects are supported') + + +def ref_to_obj(ref): + """ + Returns the object pointed to by ``ref``. + """ + modulename, rest = ref.split(':', 1) + obj = __import__(modulename) + for name in modulename.split('.')[1:] + rest.split('.'): + obj = getattr(obj, name) + return obj + + +def maybe_ref(ref): + """ + Returns the object that the given reference points to, if it is indeed + a reference. If it is not a reference, the object is returned as-is. + """ + if not isinstance(ref, str): + return ref + return ref_to_obj(ref) + + +def to_unicode(string, encoding='ascii'): + """ + Safely converts a string to a unicode representation on any + Python version. + """ + if hasattr(string, 'decode'): + return string.decode(encoding, 'ignore') + return string + + +if sys.version_info < (3, 0): # pragma: nocover + iteritems = lambda d: d.iteritems() + itervalues = lambda d: d.itervalues() + xrange = xrange +else: # pragma: nocover + iteritems = lambda d: d.items() + itervalues = lambda d: d.values() + xrange = range diff --git a/lib/argparse.py b/lib/argparse.py new file mode 100644 index 00000000..f0cfe27e --- /dev/null +++ b/lib/argparse.py @@ -0,0 +1,2386 @@ +# Author: Steven J. Bethard . + +"""Command-line parsing library + +This module is an optparse-inspired command-line parsing library that: + + - handles both optional and positional arguments + - produces highly informative usage messages + - supports parsers that dispatch to sub-parsers + +The following is a simple usage example that sums integers from the +command-line and writes the result to a file:: + + parser = argparse.ArgumentParser( + description='sum the integers at the command line') + parser.add_argument( + 'integers', metavar='int', nargs='+', type=int, + help='an integer to be summed') + parser.add_argument( + '--log', default=sys.stdout, type=argparse.FileType('w'), + help='the file where the sum should be written') + args = parser.parse_args() + args.log.write('%s' % sum(args.integers)) + args.log.close() + +The module contains the following public classes: + + - ArgumentParser -- The main entry point for command-line parsing. As the + example above shows, the add_argument() method is used to populate + the parser with actions for optional and positional arguments. Then + the parse_args() method is invoked to convert the args at the + command-line into an object with attributes. + + - ArgumentError -- The exception raised by ArgumentParser objects when + there are errors with the parser's actions. Errors raised while + parsing the command-line are caught by ArgumentParser and emitted + as command-line messages. + + - FileType -- A factory for defining types of files to be created. As the + example above shows, instances of FileType are typically passed as + the type= argument of add_argument() calls. + + - Action -- The base class for parser actions. Typically actions are + selected by passing strings like 'store_true' or 'append_const' to + the action= argument of add_argument(). However, for greater + customization of ArgumentParser actions, subclasses of Action may + be defined and passed as the action= argument. + + - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter, + ArgumentDefaultsHelpFormatter -- Formatter classes which + may be passed as the formatter_class= argument to the + ArgumentParser constructor. HelpFormatter is the default, + RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser + not to change the formatting for help text, and + ArgumentDefaultsHelpFormatter adds information about argument defaults + to the help. + +All other classes in this module are considered implementation details. +(Also note that HelpFormatter and RawDescriptionHelpFormatter are only +considered public as object names -- the API of the formatter objects is +still considered an implementation detail.) +""" + +__version__ = '1.1' +__all__ = [ + 'ArgumentParser', + 'ArgumentError', + 'ArgumentTypeError', + 'FileType', + 'HelpFormatter', + 'ArgumentDefaultsHelpFormatter', + 'RawDescriptionHelpFormatter', + 'RawTextHelpFormatter', + 'MetavarTypeHelpFormatter', + 'Namespace', + 'Action', + 'ONE_OR_MORE', + 'OPTIONAL', + 'PARSER', + 'REMAINDER', + 'SUPPRESS', + 'ZERO_OR_MORE', +] + + +import collections as _collections +import copy as _copy +import os as _os +import re as _re +import sys as _sys +import textwrap as _textwrap + +from gettext import gettext as _, ngettext + + +def _callable(obj): + return hasattr(obj, '__call__') or hasattr(obj, '__bases__') + + +SUPPRESS = '==SUPPRESS==' + +OPTIONAL = '?' +ZERO_OR_MORE = '*' +ONE_OR_MORE = '+' +PARSER = 'A...' +REMAINDER = '...' +_UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args' + +# ============================= +# Utility functions and classes +# ============================= + +class _AttributeHolder(object): + """Abstract base class that provides __repr__. + + The __repr__ method returns a string in the format:: + ClassName(attr=name, attr=name, ...) + The attributes are determined either by a class-level attribute, + '_kwarg_names', or by inspecting the instance __dict__. + """ + + def __repr__(self): + type_name = type(self).__name__ + arg_strings = [] + for arg in self._get_args(): + arg_strings.append(repr(arg)) + for name, value in self._get_kwargs(): + arg_strings.append('%s=%r' % (name, value)) + return '%s(%s)' % (type_name, ', '.join(arg_strings)) + + def _get_kwargs(self): + return sorted(self.__dict__.items()) + + def _get_args(self): + return [] + + +def _ensure_value(namespace, name, value): + if getattr(namespace, name, None) is None: + setattr(namespace, name, value) + return getattr(namespace, name) + + +# =============== +# Formatting Help +# =============== + +class HelpFormatter(object): + """Formatter for generating usage messages and argument help strings. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def __init__(self, + prog, + indent_increment=2, + max_help_position=24, + width=None): + + # default setting for width + if width is None: + try: + width = int(_os.environ['COLUMNS']) + except (KeyError, ValueError): + width = 80 + width -= 2 + + self._prog = prog + self._indent_increment = indent_increment + self._max_help_position = max_help_position + self._width = width + + self._current_indent = 0 + self._level = 0 + self._action_max_length = 0 + + self._root_section = self._Section(self, None) + self._current_section = self._root_section + + self._whitespace_matcher = _re.compile(r'\s+') + self._long_break_matcher = _re.compile(r'\n\n\n+') + + # =============================== + # Section and indentation methods + # =============================== + def _indent(self): + self._current_indent += self._indent_increment + self._level += 1 + + def _dedent(self): + self._current_indent -= self._indent_increment + assert self._current_indent >= 0, 'Indent decreased below 0.' + self._level -= 1 + + class _Section(object): + + def __init__(self, formatter, parent, heading=None): + self.formatter = formatter + self.parent = parent + self.heading = heading + self.items = [] + + def format_help(self): + # format the indented section + if self.parent is not None: + self.formatter._indent() + join = self.formatter._join_parts + for func, args in self.items: + func(*args) + item_help = join([func(*args) for func, args in self.items]) + if self.parent is not None: + self.formatter._dedent() + + # return nothing if the section was empty + if not item_help: + return '' + + # add the heading if the section was non-empty + if self.heading is not SUPPRESS and self.heading is not None: + current_indent = self.formatter._current_indent + heading = '%*s%s:\n' % (current_indent, '', self.heading) + else: + heading = '' + + # join the section-initial newline, the heading and the help + return join(['\n', heading, item_help, '\n']) + + def _add_item(self, func, args): + self._current_section.items.append((func, args)) + + # ======================== + # Message building methods + # ======================== + def start_section(self, heading): + self._indent() + section = self._Section(self, self._current_section, heading) + self._add_item(section.format_help, []) + self._current_section = section + + def end_section(self): + self._current_section = self._current_section.parent + self._dedent() + + def add_text(self, text): + if text is not SUPPRESS and text is not None: + self._add_item(self._format_text, [text]) + + def add_usage(self, usage, actions, groups, prefix=None): + if usage is not SUPPRESS: + args = usage, actions, groups, prefix + self._add_item(self._format_usage, args) + + def add_argument(self, action): + if action.help is not SUPPRESS: + + # find all invocations + get_invocation = self._format_action_invocation + invocations = [get_invocation(action)] + for subaction in self._iter_indented_subactions(action): + invocations.append(get_invocation(subaction)) + + # update the maximum item length + invocation_length = max([len(s) for s in invocations]) + action_length = invocation_length + self._current_indent + self._action_max_length = max(self._action_max_length, + action_length) + + # add the item to the list + self._add_item(self._format_action, [action]) + + def add_arguments(self, actions): + for action in actions: + self.add_argument(action) + + # ======================= + # Help-formatting methods + # ======================= + def format_help(self): + help = self._root_section.format_help() + if help: + help = self._long_break_matcher.sub('\n\n', help) + help = help.strip('\n') + '\n' + return help + + def _join_parts(self, part_strings): + return ''.join([part + for part in part_strings + if part and part is not SUPPRESS]) + + def _format_usage(self, usage, actions, groups, prefix): + if prefix is None: + prefix = _('usage: ') + + # if usage is specified, use that + if usage is not None: + usage = usage % dict(prog=self._prog) + + # if no optionals or positionals are available, usage is just prog + elif usage is None and not actions: + usage = '%(prog)s' % dict(prog=self._prog) + + # if optionals and positionals are available, calculate usage + elif usage is None: + prog = '%(prog)s' % dict(prog=self._prog) + + # split optionals from positionals + optionals = [] + positionals = [] + for action in actions: + if action.option_strings: + optionals.append(action) + else: + positionals.append(action) + + # build full usage string + format = self._format_actions_usage + action_usage = format(optionals + positionals, groups) + usage = ' '.join([s for s in [prog, action_usage] if s]) + + # wrap the usage parts if it's too long + text_width = self._width - self._current_indent + if len(prefix) + len(usage) > text_width: + + # break usage into wrappable parts + part_regexp = r'\(.*?\)+|\[.*?\]+|\S+' + opt_usage = format(optionals, groups) + pos_usage = format(positionals, groups) + opt_parts = _re.findall(part_regexp, opt_usage) + pos_parts = _re.findall(part_regexp, pos_usage) + assert ' '.join(opt_parts) == opt_usage + assert ' '.join(pos_parts) == pos_usage + + # helper for wrapping lines + def get_lines(parts, indent, prefix=None): + lines = [] + line = [] + if prefix is not None: + line_len = len(prefix) - 1 + else: + line_len = len(indent) - 1 + for part in parts: + if line_len + 1 + len(part) > text_width: + lines.append(indent + ' '.join(line)) + line = [] + line_len = len(indent) - 1 + line.append(part) + line_len += len(part) + 1 + if line: + lines.append(indent + ' '.join(line)) + if prefix is not None: + lines[0] = lines[0][len(indent):] + return lines + + # if prog is short, follow it with optionals or positionals + if len(prefix) + len(prog) <= 0.75 * text_width: + indent = ' ' * (len(prefix) + len(prog) + 1) + if opt_parts: + lines = get_lines([prog] + opt_parts, indent, prefix) + lines.extend(get_lines(pos_parts, indent)) + elif pos_parts: + lines = get_lines([prog] + pos_parts, indent, prefix) + else: + lines = [prog] + + # if prog is long, put it on its own line + else: + indent = ' ' * len(prefix) + parts = opt_parts + pos_parts + lines = get_lines(parts, indent) + if len(lines) > 1: + lines = [] + lines.extend(get_lines(opt_parts, indent)) + lines.extend(get_lines(pos_parts, indent)) + lines = [prog] + lines + + # join lines into usage + usage = '\n'.join(lines) + + # prefix with 'usage:' + return '%s%s\n\n' % (prefix, usage) + + def _format_actions_usage(self, actions, groups): + # find group indices and identify actions in groups + group_actions = set() + inserts = {} + for group in groups: + try: + start = actions.index(group._group_actions[0]) + except ValueError: + continue + else: + end = start + len(group._group_actions) + if actions[start:end] == group._group_actions: + for action in group._group_actions: + group_actions.add(action) + if not group.required: + if start in inserts: + inserts[start] += ' [' + else: + inserts[start] = '[' + inserts[end] = ']' + else: + if start in inserts: + inserts[start] += ' (' + else: + inserts[start] = '(' + inserts[end] = ')' + for i in range(start + 1, end): + inserts[i] = '|' + + # collect all actions format strings + parts = [] + for i, action in enumerate(actions): + + # suppressed arguments are marked with None + # remove | separators for suppressed arguments + if action.help is SUPPRESS: + parts.append(None) + if inserts.get(i) == '|': + inserts.pop(i) + elif inserts.get(i + 1) == '|': + inserts.pop(i + 1) + + # produce all arg strings + elif not action.option_strings: + default = self._get_default_metavar_for_positional(action) + part = self._format_args(action, default) + + # if it's in a group, strip the outer [] + if action in group_actions: + if part[0] == '[' and part[-1] == ']': + part = part[1:-1] + + # add the action string to the list + parts.append(part) + + # produce the first way to invoke the option in brackets + else: + option_string = action.option_strings[0] + + # if the Optional doesn't take a value, format is: + # -s or --long + if action.nargs == 0: + part = '%s' % option_string + + # if the Optional takes a value, format is: + # -s ARGS or --long ARGS + else: + default = self._get_default_metavar_for_optional(action) + args_string = self._format_args(action, default) + part = '%s %s' % (option_string, args_string) + + # make it look optional if it's not required or in a group + if not action.required and action not in group_actions: + part = '[%s]' % part + + # add the action string to the list + parts.append(part) + + # insert things at the necessary indices + for i in sorted(inserts, reverse=True): + parts[i:i] = [inserts[i]] + + # join all the action items with spaces + text = ' '.join([item for item in parts if item is not None]) + + # clean up separators for mutually exclusive groups + open = r'[\[(]' + close = r'[\])]' + text = _re.sub(r'(%s) ' % open, r'\1', text) + text = _re.sub(r' (%s)' % close, r'\1', text) + text = _re.sub(r'%s *%s' % (open, close), r'', text) + text = _re.sub(r'\(([^|]*)\)', r'\1', text) + text = text.strip() + + # return the text + return text + + def _format_text(self, text): + if '%(prog)' in text: + text = text % dict(prog=self._prog) + text_width = self._width - self._current_indent + indent = ' ' * self._current_indent + return self._fill_text(text, text_width, indent) + '\n\n' + + def _format_action(self, action): + # determine the required width and the entry label + help_position = min(self._action_max_length + 2, + self._max_help_position) + help_width = self._width - help_position + action_width = help_position - self._current_indent - 2 + action_header = self._format_action_invocation(action) + + # ho nelp; start on same line and add a final newline + if not action.help: + tup = self._current_indent, '', action_header + action_header = '%*s%s\n' % tup + + # short action name; start on the same line and pad two spaces + elif len(action_header) <= action_width: + tup = self._current_indent, '', action_width, action_header + action_header = '%*s%-*s ' % tup + indent_first = 0 + + # long action name; start on the next line + else: + tup = self._current_indent, '', action_header + action_header = '%*s%s\n' % tup + indent_first = help_position + + # collect the pieces of the action help + parts = [action_header] + + # if there was help for the action, add lines of help text + if action.help: + help_text = self._expand_help(action) + help_lines = self._split_lines(help_text, help_width) + parts.append('%*s%s\n' % (indent_first, '', help_lines[0])) + for line in help_lines[1:]: + parts.append('%*s%s\n' % (help_position, '', line)) + + # or add a newline if the description doesn't end with one + elif not action_header.endswith('\n'): + parts.append('\n') + + # if there are any sub-actions, add their help as well + for subaction in self._iter_indented_subactions(action): + parts.append(self._format_action(subaction)) + + # return a single string + return self._join_parts(parts) + + def _format_action_invocation(self, action): + if not action.option_strings: + default = self._get_default_metavar_for_positional(action) + metavar, = self._metavar_formatter(action, default)(1) + return metavar + + else: + parts = [] + + # if the Optional doesn't take a value, format is: + # -s, --long + if action.nargs == 0: + parts.extend(action.option_strings) + + # if the Optional takes a value, format is: + # -s ARGS, --long ARGS + else: + default = self._get_default_metavar_for_optional(action) + args_string = self._format_args(action, default) + for option_string in action.option_strings: + parts.append('%s %s' % (option_string, args_string)) + + return ', '.join(parts) + + def _metavar_formatter(self, action, default_metavar): + if action.metavar is not None: + result = action.metavar + elif action.choices is not None: + choice_strs = [str(choice) for choice in action.choices] + result = '{%s}' % ','.join(choice_strs) + else: + result = default_metavar + + def format(tuple_size): + if isinstance(result, tuple): + return result + else: + return (result, ) * tuple_size + return format + + def _format_args(self, action, default_metavar): + get_metavar = self._metavar_formatter(action, default_metavar) + if action.nargs is None: + result = '%s' % get_metavar(1) + elif action.nargs == OPTIONAL: + result = '[%s]' % get_metavar(1) + elif action.nargs == ZERO_OR_MORE: + result = '[%s [%s ...]]' % get_metavar(2) + elif action.nargs == ONE_OR_MORE: + result = '%s [%s ...]' % get_metavar(2) + elif action.nargs == REMAINDER: + result = '...' + elif action.nargs == PARSER: + result = '%s ...' % get_metavar(1) + else: + formats = ['%s' for _ in range(action.nargs)] + result = ' '.join(formats) % get_metavar(action.nargs) + return result + + def _expand_help(self, action): + params = dict(vars(action), prog=self._prog) + for name in list(params): + if params[name] is SUPPRESS: + del params[name] + for name in list(params): + if hasattr(params[name], '__name__'): + params[name] = params[name].__name__ + if params.get('choices') is not None: + choices_str = ', '.join([str(c) for c in params['choices']]) + params['choices'] = choices_str + return self._get_help_string(action) % params + + def _iter_indented_subactions(self, action): + try: + get_subactions = action._get_subactions + except AttributeError: + pass + else: + self._indent() + for subaction in get_subactions(): + yield subaction + self._dedent() + + def _split_lines(self, text, width): + text = self._whitespace_matcher.sub(' ', text).strip() + return _textwrap.wrap(text, width) + + def _fill_text(self, text, width, indent): + text = self._whitespace_matcher.sub(' ', text).strip() + return _textwrap.fill(text, width, initial_indent=indent, + subsequent_indent=indent) + + def _get_help_string(self, action): + return action.help + + def _get_default_metavar_for_optional(self, action): + return action.dest.upper() + + def _get_default_metavar_for_positional(self, action): + return action.dest + + +class RawDescriptionHelpFormatter(HelpFormatter): + """Help message formatter which retains any formatting in descriptions. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _fill_text(self, text, width, indent): + return ''.join([indent + line for line in text.splitlines(True)]) + + +class RawTextHelpFormatter(RawDescriptionHelpFormatter): + """Help message formatter which retains formatting of all help text. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _split_lines(self, text, width): + return text.splitlines() + + +class ArgumentDefaultsHelpFormatter(HelpFormatter): + """Help message formatter which adds default values to argument help. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _get_help_string(self, action): + help = action.help + if '%(default)' not in action.help: + if action.default is not SUPPRESS: + defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] + if action.option_strings or action.nargs in defaulting_nargs: + help += ' (default: %(default)s)' + return help + + +class MetavarTypeHelpFormatter(HelpFormatter): + """Help message formatter which uses the argument 'type' as the default + metavar value (instead of the argument 'dest') + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _get_default_metavar_for_optional(self, action): + return action.type.__name__ + + def _get_default_metavar_for_positional(self, action): + return action.type.__name__ + + + +# ===================== +# Options and Arguments +# ===================== + +def _get_action_name(argument): + if argument is None: + return None + elif argument.option_strings: + return '/'.join(argument.option_strings) + elif argument.metavar not in (None, SUPPRESS): + return argument.metavar + elif argument.dest not in (None, SUPPRESS): + return argument.dest + else: + return None + + +class ArgumentError(Exception): + """An error from creating or using an argument (optional or positional). + + The string value of this exception is the message, augmented with + information about the argument that caused it. + """ + + def __init__(self, argument, message): + self.argument_name = _get_action_name(argument) + self.message = message + + def __str__(self): + if self.argument_name is None: + format = '%(message)s' + else: + format = 'argument %(argument_name)s: %(message)s' + return format % dict(message=self.message, + argument_name=self.argument_name) + + +class ArgumentTypeError(Exception): + """An error from trying to convert a command line string to a type.""" + pass + + +# ============== +# Action classes +# ============== + +class Action(_AttributeHolder): + """Information about how to convert command line strings to Python objects. + + Action objects are used by an ArgumentParser to represent the information + needed to parse a single argument from one or more strings from the + command line. The keyword arguments to the Action constructor are also + all attributes of Action instances. + + Keyword Arguments: + + - option_strings -- A list of command-line option strings which + should be associated with this action. + + - dest -- The name of the attribute to hold the created object(s) + + - nargs -- The number of command-line arguments that should be + consumed. By default, one argument will be consumed and a single + value will be produced. Other values include: + - N (an integer) consumes N arguments (and produces a list) + - '?' consumes zero or one arguments + - '*' consumes zero or more arguments (and produces a list) + - '+' consumes one or more arguments (and produces a list) + Note that the difference between the default and nargs=1 is that + with the default, a single value will be produced, while with + nargs=1, a list containing a single value will be produced. + + - const -- The value to be produced if the option is specified and the + option uses an action that takes no values. + + - default -- The value to be produced if the option is not specified. + + - type -- The type which the command-line arguments should be converted + to, should be one of 'string', 'int', 'float', 'complex' or a + callable object that accepts a single string argument. If None, + 'string' is assumed. + + - choices -- A container of values that should be allowed. If not None, + after a command-line argument has been converted to the appropriate + type, an exception will be raised if it is not a member of this + collection. + + - required -- True if the action must always be specified at the + command line. This is only meaningful for optional command-line + arguments. + + - help -- The help string describing the argument. + + - metavar -- The name to be used for the option's argument with the + help string. If None, the 'dest' value will be used as the name. + """ + + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + self.option_strings = option_strings + self.dest = dest + self.nargs = nargs + self.const = const + self.default = default + self.type = type + self.choices = choices + self.required = required + self.help = help + self.metavar = metavar + + def _get_kwargs(self): + names = [ + 'option_strings', + 'dest', + 'nargs', + 'const', + 'default', + 'type', + 'choices', + 'help', + 'metavar', + ] + return [(name, getattr(self, name)) for name in names] + + def __call__(self, parser, namespace, values, option_string=None): + raise NotImplementedError(_('.__call__() not defined')) + + +class _StoreAction(Action): + + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + if nargs == 0: + raise ValueError('nargs for store actions must be > 0; if you ' + 'have nothing to store, actions such as store ' + 'true or store const may be more appropriate') + if const is not None and nargs != OPTIONAL: + raise ValueError('nargs must be %r to supply const' % OPTIONAL) + super(_StoreAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=nargs, + const=const, + default=default, + type=type, + choices=choices, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, values) + + +class _StoreConstAction(Action): + + def __init__(self, + option_strings, + dest, + const, + default=None, + required=False, + help=None, + metavar=None): + super(_StoreConstAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + const=const, + default=default, + required=required, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, self.const) + + +class _StoreTrueAction(_StoreConstAction): + + def __init__(self, + option_strings, + dest, + default=False, + required=False, + help=None): + super(_StoreTrueAction, self).__init__( + option_strings=option_strings, + dest=dest, + const=True, + default=default, + required=required, + help=help) + + +class _StoreFalseAction(_StoreConstAction): + + def __init__(self, + option_strings, + dest, + default=True, + required=False, + help=None): + super(_StoreFalseAction, self).__init__( + option_strings=option_strings, + dest=dest, + const=False, + default=default, + required=required, + help=help) + + +class _AppendAction(Action): + + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + if nargs == 0: + raise ValueError('nargs for append actions must be > 0; if arg ' + 'strings are not supplying the value to append, ' + 'the append const action may be more appropriate') + if const is not None and nargs != OPTIONAL: + raise ValueError('nargs must be %r to supply const' % OPTIONAL) + super(_AppendAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=nargs, + const=const, + default=default, + type=type, + choices=choices, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + items = _copy.copy(_ensure_value(namespace, self.dest, [])) + items.append(values) + setattr(namespace, self.dest, items) + + +class _AppendConstAction(Action): + + def __init__(self, + option_strings, + dest, + const, + default=None, + required=False, + help=None, + metavar=None): + super(_AppendConstAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + const=const, + default=default, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + items = _copy.copy(_ensure_value(namespace, self.dest, [])) + items.append(self.const) + setattr(namespace, self.dest, items) + + +class _CountAction(Action): + + def __init__(self, + option_strings, + dest, + default=None, + required=False, + help=None): + super(_CountAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + default=default, + required=required, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + new_count = _ensure_value(namespace, self.dest, 0) + 1 + setattr(namespace, self.dest, new_count) + + +class _HelpAction(Action): + + def __init__(self, + option_strings, + dest=SUPPRESS, + default=SUPPRESS, + help=None): + super(_HelpAction, self).__init__( + option_strings=option_strings, + dest=dest, + default=default, + nargs=0, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + parser.print_help() + parser.exit() + + +class _VersionAction(Action): + + def __init__(self, + option_strings, + version=None, + dest=SUPPRESS, + default=SUPPRESS, + help="show program's version number and exit"): + super(_VersionAction, self).__init__( + option_strings=option_strings, + dest=dest, + default=default, + nargs=0, + help=help) + self.version = version + + def __call__(self, parser, namespace, values, option_string=None): + version = self.version + if version is None: + version = parser.version + formatter = parser._get_formatter() + formatter.add_text(version) + parser.exit(message=formatter.format_help()) + + +class _SubParsersAction(Action): + + class _ChoicesPseudoAction(Action): + + def __init__(self, name, aliases, help): + metavar = dest = name + if aliases: + metavar += ' (%s)' % ', '.join(aliases) + sup = super(_SubParsersAction._ChoicesPseudoAction, self) + sup.__init__(option_strings=[], dest=dest, help=help, + metavar=metavar) + + def __init__(self, + option_strings, + prog, + parser_class, + dest=SUPPRESS, + help=None, + metavar=None): + + self._prog_prefix = prog + self._parser_class = parser_class + self._name_parser_map = _collections.OrderedDict() + self._choices_actions = [] + + super(_SubParsersAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=PARSER, + choices=self._name_parser_map, + help=help, + metavar=metavar) + + def add_parser(self, name, **kwargs): + # set prog from the existing prefix + if kwargs.get('prog') is None: + kwargs['prog'] = '%s %s' % (self._prog_prefix, name) + + aliases = kwargs.pop('aliases', ()) + + # create a pseudo-action to hold the choice help + if 'help' in kwargs: + help = kwargs.pop('help') + choice_action = self._ChoicesPseudoAction(name, aliases, help) + self._choices_actions.append(choice_action) + + # create the parser and add it to the map + parser = self._parser_class(**kwargs) + self._name_parser_map[name] = parser + + # make parser available under aliases also + for alias in aliases: + self._name_parser_map[alias] = parser + + return parser + + def _get_subactions(self): + return self._choices_actions + + def __call__(self, parser, namespace, values, option_string=None): + parser_name = values[0] + arg_strings = values[1:] + + # set the parser name if requested + if self.dest is not SUPPRESS: + setattr(namespace, self.dest, parser_name) + + # select the parser + try: + parser = self._name_parser_map[parser_name] + except KeyError: + args = {'parser_name': parser_name, + 'choices': ', '.join(self._name_parser_map)} + msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args + raise ArgumentError(self, msg) + + # parse all the remaining options into the namespace + # store any unrecognized options on the object, so that the top + # level parser can decide what to do with them + namespace, arg_strings = parser.parse_known_args(arg_strings, namespace) + if arg_strings: + vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) + getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) + + +# ============== +# Type classes +# ============== + +class FileType(object): + """Factory for creating file object types + + Instances of FileType are typically passed as type= arguments to the + ArgumentParser add_argument() method. + + Keyword Arguments: + - mode -- A string indicating how the file is to be opened. Accepts the + same values as the builtin open() function. + - bufsize -- The file's desired buffer size. Accepts the same values as + the builtin open() function. + """ + + def __init__(self, mode='r', bufsize=-1): + self._mode = mode + self._bufsize = bufsize + + def __call__(self, string): + # the special argument "-" means sys.std{in,out} + if string == '-': + if 'r' in self._mode: + return _sys.stdin + elif 'w' in self._mode: + return _sys.stdout + else: + msg = _('argument "-" with mode %r') % self._mode + raise ValueError(msg) + + # all other arguments are used as file names + try: + return open(string, self._mode, self._bufsize) + except IOError as e: + message = _("can't open '%s': %s") + raise ArgumentTypeError(message % (string, e)) + + def __repr__(self): + args = self._mode, self._bufsize + args_str = ', '.join(repr(arg) for arg in args if arg != -1) + return '%s(%s)' % (type(self).__name__, args_str) + +# =========================== +# Optional and Positional Parsing +# =========================== + +class Namespace(_AttributeHolder): + """Simple object for storing attributes. + + Implements equality by attribute names and values, and provides a simple + string representation. + """ + + def __init__(self, **kwargs): + for name in kwargs: + setattr(self, name, kwargs[name]) + + def __eq__(self, other): + return vars(self) == vars(other) + + def __ne__(self, other): + return not (self == other) + + def __contains__(self, key): + return key in self.__dict__ + + +class _ActionsContainer(object): + + def __init__(self, + description, + prefix_chars, + argument_default, + conflict_handler): + super(_ActionsContainer, self).__init__() + + self.description = description + self.argument_default = argument_default + self.prefix_chars = prefix_chars + self.conflict_handler = conflict_handler + + # set up registries + self._registries = {} + + # register actions + self.register('action', None, _StoreAction) + self.register('action', 'store', _StoreAction) + self.register('action', 'store_const', _StoreConstAction) + self.register('action', 'store_true', _StoreTrueAction) + self.register('action', 'store_false', _StoreFalseAction) + self.register('action', 'append', _AppendAction) + self.register('action', 'append_const', _AppendConstAction) + self.register('action', 'count', _CountAction) + self.register('action', 'help', _HelpAction) + self.register('action', 'version', _VersionAction) + self.register('action', 'parsers', _SubParsersAction) + + # raise an exception if the conflict handler is invalid + self._get_handler() + + # action storage + self._actions = [] + self._option_string_actions = {} + + # groups + self._action_groups = [] + self._mutually_exclusive_groups = [] + + # defaults storage + self._defaults = {} + + # determines whether an "option" looks like a negative number + self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') + + # whether or not there are any optionals that look like negative + # numbers -- uses a list so it can be shared and edited + self._has_negative_number_optionals = [] + + # ==================== + # Registration methods + # ==================== + def register(self, registry_name, value, object): + registry = self._registries.setdefault(registry_name, {}) + registry[value] = object + + def _registry_get(self, registry_name, value, default=None): + return self._registries[registry_name].get(value, default) + + # ================================== + # Namespace default accessor methods + # ================================== + def set_defaults(self, **kwargs): + self._defaults.update(kwargs) + + # if these defaults match any existing arguments, replace + # the previous default on the object with the new one + for action in self._actions: + if action.dest in kwargs: + action.default = kwargs[action.dest] + + def get_default(self, dest): + for action in self._actions: + if action.dest == dest and action.default is not None: + return action.default + return self._defaults.get(dest, None) + + + # ======================= + # Adding argument actions + # ======================= + def add_argument(self, *args, **kwargs): + """ + add_argument(dest, ..., name=value, ...) + add_argument(option_string, option_string, ..., name=value, ...) + """ + + # if no positional args are supplied or only one is supplied and + # it doesn't look like an option string, parse a positional + # argument + chars = self.prefix_chars + if not args or len(args) == 1 and args[0][0] not in chars: + if args and 'dest' in kwargs: + raise ValueError('dest supplied twice for positional argument') + kwargs = self._get_positional_kwargs(*args, **kwargs) + + # otherwise, we're adding an optional argument + else: + kwargs = self._get_optional_kwargs(*args, **kwargs) + + # if no default was supplied, use the parser-level default + if 'default' not in kwargs: + dest = kwargs['dest'] + if dest in self._defaults: + kwargs['default'] = self._defaults[dest] + elif self.argument_default is not None: + kwargs['default'] = self.argument_default + + # create the action object, and add it to the parser + action_class = self._pop_action_class(kwargs) + if not _callable(action_class): + raise ValueError('unknown action "%s"' % (action_class,)) + action = action_class(**kwargs) + + # raise an error if the action type is not callable + type_func = self._registry_get('type', action.type, action.type) + if not _callable(type_func): + raise ValueError('%r is not callable' % (type_func,)) + + # raise an error if the metavar does not match the type + if hasattr(self, "_get_formatter"): + try: + self._get_formatter()._format_args(action, None) + except TypeError: + raise ValueError("length of metavar tuple does not match nargs") + + return self._add_action(action) + + def add_argument_group(self, *args, **kwargs): + group = _ArgumentGroup(self, *args, **kwargs) + self._action_groups.append(group) + return group + + def add_mutually_exclusive_group(self, **kwargs): + group = _MutuallyExclusiveGroup(self, **kwargs) + self._mutually_exclusive_groups.append(group) + return group + + def _add_action(self, action): + # resolve any conflicts + self._check_conflict(action) + + # add to actions list + self._actions.append(action) + action.container = self + + # index the action by any option strings it has + for option_string in action.option_strings: + self._option_string_actions[option_string] = action + + # set the flag if any option strings look like negative numbers + for option_string in action.option_strings: + if self._negative_number_matcher.match(option_string): + if not self._has_negative_number_optionals: + self._has_negative_number_optionals.append(True) + + # return the created action + return action + + def _remove_action(self, action): + self._actions.remove(action) + + def _add_container_actions(self, container): + # collect groups by titles + title_group_map = {} + for group in self._action_groups: + if group.title in title_group_map: + msg = _('cannot merge actions - two groups are named %r') + raise ValueError(msg % (group.title)) + title_group_map[group.title] = group + + # map each action to its group + group_map = {} + for group in container._action_groups: + + # if a group with the title exists, use that, otherwise + # create a new group matching the container's group + if group.title not in title_group_map: + title_group_map[group.title] = self.add_argument_group( + title=group.title, + description=group.description, + conflict_handler=group.conflict_handler) + + # map the actions to their new group + for action in group._group_actions: + group_map[action] = title_group_map[group.title] + + # add container's mutually exclusive groups + # NOTE: if add_mutually_exclusive_group ever gains title= and + # description= then this code will need to be expanded as above + for group in container._mutually_exclusive_groups: + mutex_group = self.add_mutually_exclusive_group( + required=group.required) + + # map the actions to their new mutex group + for action in group._group_actions: + group_map[action] = mutex_group + + # add all actions to this container or their group + for action in container._actions: + group_map.get(action, self)._add_action(action) + + def _get_positional_kwargs(self, dest, **kwargs): + # make sure required is not specified + if 'required' in kwargs: + msg = _("'required' is an invalid argument for positionals") + raise TypeError(msg) + + # mark positional arguments as required if at least one is + # always required + if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: + kwargs['required'] = True + if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: + kwargs['required'] = True + + # return the keyword arguments with no option strings + return dict(kwargs, dest=dest, option_strings=[]) + + def _get_optional_kwargs(self, *args, **kwargs): + # determine short and long option strings + option_strings = [] + long_option_strings = [] + for option_string in args: + # error on strings that don't start with an appropriate prefix + if not option_string[0] in self.prefix_chars: + args = {'option': option_string, + 'prefix_chars': self.prefix_chars} + msg = _('invalid option string %(option)r: ' + 'must start with a character %(prefix_chars)r') + raise ValueError(msg % args) + + # strings starting with two prefix characters are long options + option_strings.append(option_string) + if option_string[0] in self.prefix_chars: + if len(option_string) > 1: + if option_string[1] in self.prefix_chars: + long_option_strings.append(option_string) + + # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' + dest = kwargs.pop('dest', None) + if dest is None: + if long_option_strings: + dest_option_string = long_option_strings[0] + else: + dest_option_string = option_strings[0] + dest = dest_option_string.lstrip(self.prefix_chars) + if not dest: + msg = _('dest= is required for options like %r') + raise ValueError(msg % option_string) + dest = dest.replace('-', '_') + + # return the updated keyword arguments + return dict(kwargs, dest=dest, option_strings=option_strings) + + def _pop_action_class(self, kwargs, default=None): + action = kwargs.pop('action', default) + return self._registry_get('action', action, action) + + def _get_handler(self): + # determine function from conflict handler string + handler_func_name = '_handle_conflict_%s' % self.conflict_handler + try: + return getattr(self, handler_func_name) + except AttributeError: + msg = _('invalid conflict_resolution value: %r') + raise ValueError(msg % self.conflict_handler) + + def _check_conflict(self, action): + + # find all options that conflict with this option + confl_optionals = [] + for option_string in action.option_strings: + if option_string in self._option_string_actions: + confl_optional = self._option_string_actions[option_string] + confl_optionals.append((option_string, confl_optional)) + + # resolve any conflicts + if confl_optionals: + conflict_handler = self._get_handler() + conflict_handler(action, confl_optionals) + + def _handle_conflict_error(self, action, conflicting_actions): + message = ngettext('conflicting option string: %s', + 'conflicting option strings: %s', + len(conflicting_actions)) + conflict_string = ', '.join([option_string + for option_string, action + in conflicting_actions]) + raise ArgumentError(action, message % conflict_string) + + def _handle_conflict_resolve(self, action, conflicting_actions): + + # remove all conflicting options + for option_string, action in conflicting_actions: + + # remove the conflicting option + action.option_strings.remove(option_string) + self._option_string_actions.pop(option_string, None) + + # if the option now has no option string, remove it from the + # container holding it + if not action.option_strings: + action.container._remove_action(action) + + +class _ArgumentGroup(_ActionsContainer): + + def __init__(self, container, title=None, description=None, **kwargs): + # add any missing keyword arguments by checking the container + update = kwargs.setdefault + update('conflict_handler', container.conflict_handler) + update('prefix_chars', container.prefix_chars) + update('argument_default', container.argument_default) + super_init = super(_ArgumentGroup, self).__init__ + super_init(description=description, **kwargs) + + # group attributes + self.title = title + self._group_actions = [] + + # share most attributes with the container + self._registries = container._registries + self._actions = container._actions + self._option_string_actions = container._option_string_actions + self._defaults = container._defaults + self._has_negative_number_optionals = \ + container._has_negative_number_optionals + self._mutually_exclusive_groups = container._mutually_exclusive_groups + + def _add_action(self, action): + action = super(_ArgumentGroup, self)._add_action(action) + self._group_actions.append(action) + return action + + def _remove_action(self, action): + super(_ArgumentGroup, self)._remove_action(action) + self._group_actions.remove(action) + + +class _MutuallyExclusiveGroup(_ArgumentGroup): + + def __init__(self, container, required=False): + super(_MutuallyExclusiveGroup, self).__init__(container) + self.required = required + self._container = container + + def _add_action(self, action): + if action.required: + msg = _('mutually exclusive arguments must be optional') + raise ValueError(msg) + action = self._container._add_action(action) + self._group_actions.append(action) + return action + + def _remove_action(self, action): + self._container._remove_action(action) + self._group_actions.remove(action) + + +class ArgumentParser(_AttributeHolder, _ActionsContainer): + """Object for parsing command line strings into Python objects. + + Keyword Arguments: + - prog -- The name of the program (default: sys.argv[0]) + - usage -- A usage message (default: auto-generated from arguments) + - description -- A description of what the program does + - epilog -- Text following the argument descriptions + - parents -- Parsers whose arguments should be copied into this one + - formatter_class -- HelpFormatter class for printing help messages + - prefix_chars -- Characters that prefix optional arguments + - fromfile_prefix_chars -- Characters that prefix files containing + additional arguments + - argument_default -- The default value for all arguments + - conflict_handler -- String indicating how to handle conflicts + - add_help -- Add a -h/-help option + """ + + def __init__(self, + prog=None, + usage=None, + description=None, + epilog=None, + version=None, + parents=[], + formatter_class=HelpFormatter, + prefix_chars='-', + fromfile_prefix_chars=None, + argument_default=None, + conflict_handler='error', + add_help=True): + + if version is not None: + import warnings + warnings.warn( + """The "version" argument to ArgumentParser is deprecated. """ + """Please use """ + """"add_argument(..., action='version', version="N", ...)" """ + """instead""", DeprecationWarning) + + superinit = super(ArgumentParser, self).__init__ + superinit(description=description, + prefix_chars=prefix_chars, + argument_default=argument_default, + conflict_handler=conflict_handler) + + # default setting for prog + if prog is None: + prog = _os.path.basename(_sys.argv[0]) + + self.prog = prog + self.usage = usage + self.epilog = epilog + self.version = version + self.formatter_class = formatter_class + self.fromfile_prefix_chars = fromfile_prefix_chars + self.add_help = add_help + + add_group = self.add_argument_group + self._positionals = add_group(_('positional arguments')) + self._optionals = add_group(_('optional arguments')) + self._subparsers = None + + # register types + def identity(string): + return string + self.register('type', None, identity) + + # add help and version arguments if necessary + # (using explicit default to override global argument_default) + default_prefix = '-' if '-' in prefix_chars else prefix_chars[0] + if self.add_help: + self.add_argument( + default_prefix+'h', default_prefix*2+'help', + action='help', default=SUPPRESS, + help=_('show this help message and exit')) + if self.version: + self.add_argument( + default_prefix+'v', default_prefix*2+'version', + action='version', default=SUPPRESS, + version=self.version, + help=_("show program's version number and exit")) + + # add parent arguments and defaults + for parent in parents: + self._add_container_actions(parent) + try: + defaults = parent._defaults + except AttributeError: + pass + else: + self._defaults.update(defaults) + + # ======================= + # Pretty __repr__ methods + # ======================= + def _get_kwargs(self): + names = [ + 'prog', + 'usage', + 'description', + 'version', + 'formatter_class', + 'conflict_handler', + 'add_help', + ] + return [(name, getattr(self, name)) for name in names] + + # ================================== + # Optional/Positional adding methods + # ================================== + def add_subparsers(self, **kwargs): + if self._subparsers is not None: + self.error(_('cannot have multiple subparser arguments')) + + # add the parser class to the arguments if it's not present + kwargs.setdefault('parser_class', type(self)) + + if 'title' in kwargs or 'description' in kwargs: + title = _(kwargs.pop('title', 'subcommands')) + description = _(kwargs.pop('description', None)) + self._subparsers = self.add_argument_group(title, description) + else: + self._subparsers = self._positionals + + # prog defaults to the usage message of this parser, skipping + # optional arguments and with no "usage:" prefix + if kwargs.get('prog') is None: + formatter = self._get_formatter() + positionals = self._get_positional_actions() + groups = self._mutually_exclusive_groups + formatter.add_usage(self.usage, positionals, groups, '') + kwargs['prog'] = formatter.format_help().strip() + + # create the parsers action and add it to the positionals list + parsers_class = self._pop_action_class(kwargs, 'parsers') + action = parsers_class(option_strings=[], **kwargs) + self._subparsers._add_action(action) + + # return the created parsers action + return action + + def _add_action(self, action): + if action.option_strings: + self._optionals._add_action(action) + else: + self._positionals._add_action(action) + return action + + def _get_optional_actions(self): + return [action + for action in self._actions + if action.option_strings] + + def _get_positional_actions(self): + return [action + for action in self._actions + if not action.option_strings] + + # ===================================== + # Command line argument parsing methods + # ===================================== + def parse_args(self, args=None, namespace=None): + args, argv = self.parse_known_args(args, namespace) + if argv: + msg = _('unrecognized arguments: %s') + self.error(msg % ' '.join(argv)) + return args + + def parse_known_args(self, args=None, namespace=None): + # args default to the system args + if args is None: + args = _sys.argv[1:] + + # default Namespace built from parser defaults + if namespace is None: + namespace = Namespace() + + # add any action defaults that aren't present + for action in self._actions: + if action.dest is not SUPPRESS: + if not hasattr(namespace, action.dest): + if action.default is not SUPPRESS: + default = action.default + if isinstance(action.default, str): + default = self._get_value(action, default) + setattr(namespace, action.dest, default) + + # add any parser defaults that aren't present + for dest in self._defaults: + if not hasattr(namespace, dest): + setattr(namespace, dest, self._defaults[dest]) + + # parse the arguments and exit if there are any errors + try: + namespace, args = self._parse_known_args(args, namespace) + if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): + args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) + delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) + return namespace, args + except ArgumentError: + err = _sys.exc_info()[1] + self.error(str(err)) + + def _parse_known_args(self, arg_strings, namespace): + # replace arg strings that are file references + if self.fromfile_prefix_chars is not None: + arg_strings = self._read_args_from_files(arg_strings) + + # map all mutually exclusive arguments to the other arguments + # they can't occur with + action_conflicts = {} + for mutex_group in self._mutually_exclusive_groups: + group_actions = mutex_group._group_actions + for i, mutex_action in enumerate(mutex_group._group_actions): + conflicts = action_conflicts.setdefault(mutex_action, []) + conflicts.extend(group_actions[:i]) + conflicts.extend(group_actions[i + 1:]) + + # find all option indices, and determine the arg_string_pattern + # which has an 'O' if there is an option at an index, + # an 'A' if there is an argument, or a '-' if there is a '--' + option_string_indices = {} + arg_string_pattern_parts = [] + arg_strings_iter = iter(arg_strings) + for i, arg_string in enumerate(arg_strings_iter): + + # all args after -- are non-options + if arg_string == '--': + arg_string_pattern_parts.append('-') + for arg_string in arg_strings_iter: + arg_string_pattern_parts.append('A') + + # otherwise, add the arg to the arg strings + # and note the index if it was an option + else: + option_tuple = self._parse_optional(arg_string) + if option_tuple is None: + pattern = 'A' + else: + option_string_indices[i] = option_tuple + pattern = 'O' + arg_string_pattern_parts.append(pattern) + + # join the pieces together to form the pattern + arg_strings_pattern = ''.join(arg_string_pattern_parts) + + # converts arg strings to the appropriate and then takes the action + seen_actions = set() + seen_non_default_actions = set() + + def take_action(action, argument_strings, option_string=None): + seen_actions.add(action) + argument_values = self._get_values(action, argument_strings) + + # error if this argument is not allowed with other previously + # seen arguments, assuming that actions that use the default + # value don't really count as "present" + if argument_values is not action.default: + seen_non_default_actions.add(action) + for conflict_action in action_conflicts.get(action, []): + if conflict_action in seen_non_default_actions: + msg = _('not allowed with argument %s') + action_name = _get_action_name(conflict_action) + raise ArgumentError(action, msg % action_name) + + # take the action if we didn't receive a SUPPRESS value + # (e.g. from a default) + if argument_values is not SUPPRESS: + action(self, namespace, argument_values, option_string) + + # function to convert arg_strings into an optional action + def consume_optional(start_index): + + # get the optional identified at this index + option_tuple = option_string_indices[start_index] + action, option_string, explicit_arg = option_tuple + + # identify additional optionals in the same arg string + # (e.g. -xyz is the same as -x -y -z if no args are required) + match_argument = self._match_argument + action_tuples = [] + while True: + + # if we found no optional action, skip it + if action is None: + extras.append(arg_strings[start_index]) + return start_index + 1 + + # if there is an explicit argument, try to match the + # optional's string arguments to only this + if explicit_arg is not None: + arg_count = match_argument(action, 'A') + + # if the action is a single-dash option and takes no + # arguments, try to parse more single-dash options out + # of the tail of the option string + chars = self.prefix_chars + if arg_count == 0 and option_string[1] not in chars: + action_tuples.append((action, [], option_string)) + char = option_string[0] + option_string = char + explicit_arg[0] + new_explicit_arg = explicit_arg[1:] or None + optionals_map = self._option_string_actions + if option_string in optionals_map: + action = optionals_map[option_string] + explicit_arg = new_explicit_arg + else: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) + + # if the action expect exactly one argument, we've + # successfully matched the option; exit the loop + elif arg_count == 1: + stop = start_index + 1 + args = [explicit_arg] + action_tuples.append((action, args, option_string)) + break + + # error if a double-dash option did not use the + # explicit argument + else: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) + + # if there is no explicit argument, try to match the + # optional's string arguments with the following strings + # if successful, exit the loop + else: + start = start_index + 1 + selected_patterns = arg_strings_pattern[start:] + arg_count = match_argument(action, selected_patterns) + stop = start + arg_count + args = arg_strings[start:stop] + action_tuples.append((action, args, option_string)) + break + + # add the Optional to the list and return the index at which + # the Optional's string args stopped + assert action_tuples + for action, args, option_string in action_tuples: + take_action(action, args, option_string) + return stop + + # the list of Positionals left to be parsed; this is modified + # by consume_positionals() + positionals = self._get_positional_actions() + + # function to convert arg_strings into positional actions + def consume_positionals(start_index): + # match as many Positionals as possible + match_partial = self._match_arguments_partial + selected_pattern = arg_strings_pattern[start_index:] + arg_counts = match_partial(positionals, selected_pattern) + + # slice off the appropriate arg strings for each Positional + # and add the Positional and its args to the list + for action, arg_count in zip(positionals, arg_counts): + args = arg_strings[start_index: start_index + arg_count] + start_index += arg_count + take_action(action, args) + + # slice off the Positionals that we just parsed and return the + # index at which the Positionals' string args stopped + positionals[:] = positionals[len(arg_counts):] + return start_index + + # consume Positionals and Optionals alternately, until we have + # passed the last option string + extras = [] + start_index = 0 + if option_string_indices: + max_option_string_index = max(option_string_indices) + else: + max_option_string_index = -1 + while start_index <= max_option_string_index: + + # consume any Positionals preceding the next option + next_option_string_index = min([ + index + for index in option_string_indices + if index >= start_index]) + if start_index != next_option_string_index: + positionals_end_index = consume_positionals(start_index) + + # only try to parse the next optional if we didn't consume + # the option string during the positionals parsing + if positionals_end_index > start_index: + start_index = positionals_end_index + continue + else: + start_index = positionals_end_index + + # if we consumed all the positionals we could and we're not + # at the index of an option string, there were extra arguments + if start_index not in option_string_indices: + strings = arg_strings[start_index:next_option_string_index] + extras.extend(strings) + start_index = next_option_string_index + + # consume the next optional and any arguments for it + start_index = consume_optional(start_index) + + # consume any positionals following the last Optional + stop_index = consume_positionals(start_index) + + # if we didn't consume all the argument strings, there were extras + extras.extend(arg_strings[stop_index:]) + + # make sure all required actions were present + required_actions = [_get_action_name(action) for action in self._actions + if action.required and action not in seen_actions] + if required_actions: + self.error(_('the following arguments are required: %s') % + ', '.join(required_actions)) + + # make sure all required groups had one option present + for group in self._mutually_exclusive_groups: + if group.required: + for action in group._group_actions: + if action in seen_non_default_actions: + break + + # if no actions were used, report the error + else: + names = [_get_action_name(action) + for action in group._group_actions + if action.help is not SUPPRESS] + msg = _('one of the arguments %s is required') + self.error(msg % ' '.join(names)) + + # return the updated namespace and the extra arguments + return namespace, extras + + def _read_args_from_files(self, arg_strings): + # expand arguments referencing files + new_arg_strings = [] + for arg_string in arg_strings: + + # for regular arguments, just add them back into the list + if arg_string[0] not in self.fromfile_prefix_chars: + new_arg_strings.append(arg_string) + + # replace arguments referencing files with the file content + else: + try: + args_file = open(arg_string[1:]) + try: + arg_strings = [] + for arg_line in args_file.read().splitlines(): + for arg in self.convert_arg_line_to_args(arg_line): + arg_strings.append(arg) + arg_strings = self._read_args_from_files(arg_strings) + new_arg_strings.extend(arg_strings) + finally: + args_file.close() + except IOError: + err = _sys.exc_info()[1] + self.error(str(err)) + + # return the modified argument list + return new_arg_strings + + def convert_arg_line_to_args(self, arg_line): + return [arg_line] + + def _match_argument(self, action, arg_strings_pattern): + # match the pattern for this action to the arg strings + nargs_pattern = self._get_nargs_pattern(action) + match = _re.match(nargs_pattern, arg_strings_pattern) + + # raise an exception if we weren't able to find a match + if match is None: + nargs_errors = { + None: _('expected one argument'), + OPTIONAL: _('expected at most one argument'), + ONE_OR_MORE: _('expected at least one argument'), + } + default = ngettext('expected %s argument', + 'expected %s arguments', + action.nargs) % action.nargs + msg = nargs_errors.get(action.nargs, default) + raise ArgumentError(action, msg) + + # return the number of arguments matched + return len(match.group(1)) + + def _match_arguments_partial(self, actions, arg_strings_pattern): + # progressively shorten the actions list by slicing off the + # final actions until we find a match + result = [] + for i in range(len(actions), 0, -1): + actions_slice = actions[:i] + pattern = ''.join([self._get_nargs_pattern(action) + for action in actions_slice]) + match = _re.match(pattern, arg_strings_pattern) + if match is not None: + result.extend([len(string) for string in match.groups()]) + break + + # return the list of arg string counts + return result + + def _parse_optional(self, arg_string): + # if it's an empty string, it was meant to be a positional + if not arg_string: + return None + + # if it doesn't start with a prefix, it was meant to be positional + if not arg_string[0] in self.prefix_chars: + return None + + # if the option string is present in the parser, return the action + if arg_string in self._option_string_actions: + action = self._option_string_actions[arg_string] + return action, arg_string, None + + # if it's just a single character, it was meant to be positional + if len(arg_string) == 1: + return None + + # if the option string before the "=" is present, return the action + if '=' in arg_string: + option_string, explicit_arg = arg_string.split('=', 1) + if option_string in self._option_string_actions: + action = self._option_string_actions[option_string] + return action, option_string, explicit_arg + + # search through all possible prefixes of the option string + # and all actions in the parser for possible interpretations + option_tuples = self._get_option_tuples(arg_string) + + # if multiple actions match, the option string was ambiguous + if len(option_tuples) > 1: + options = ', '.join([option_string + for action, option_string, explicit_arg in option_tuples]) + args = {'option': arg_string, 'matches': options} + msg = _('ambiguous option: %(option)s could match %(matches)s') + self.error(msg % args) + + # if exactly one action matched, this segmentation is good, + # so return the parsed action + elif len(option_tuples) == 1: + option_tuple, = option_tuples + return option_tuple + + # if it was not found as an option, but it looks like a negative + # number, it was meant to be positional + # unless there are negative-number-like options + if self._negative_number_matcher.match(arg_string): + if not self._has_negative_number_optionals: + return None + + # if it contains a space, it was meant to be a positional + if ' ' in arg_string: + return None + + # it was meant to be an optional but there is no such option + # in this parser (though it might be a valid option in a subparser) + return None, arg_string, None + + def _get_option_tuples(self, option_string): + result = [] + + # option strings starting with two prefix characters are only + # split at the '=' + chars = self.prefix_chars + if option_string[0] in chars and option_string[1] in chars: + if '=' in option_string: + option_prefix, explicit_arg = option_string.split('=', 1) + else: + option_prefix = option_string + explicit_arg = None + for option_string in self._option_string_actions: + if option_string.startswith(option_prefix): + action = self._option_string_actions[option_string] + tup = action, option_string, explicit_arg + result.append(tup) + + # single character options can be concatenated with their arguments + # but multiple character options always have to have their argument + # separate + elif option_string[0] in chars and option_string[1] not in chars: + option_prefix = option_string + explicit_arg = None + short_option_prefix = option_string[:2] + short_explicit_arg = option_string[2:] + + for option_string in self._option_string_actions: + if option_string == short_option_prefix: + action = self._option_string_actions[option_string] + tup = action, option_string, short_explicit_arg + result.append(tup) + elif option_string.startswith(option_prefix): + action = self._option_string_actions[option_string] + tup = action, option_string, explicit_arg + result.append(tup) + + # shouldn't ever get here + else: + self.error(_('unexpected option string: %s') % option_string) + + # return the collected option tuples + return result + + def _get_nargs_pattern(self, action): + # in all examples below, we have to allow for '--' args + # which are represented as '-' in the pattern + nargs = action.nargs + + # the default (None) is assumed to be a single argument + if nargs is None: + nargs_pattern = '(-*A-*)' + + # allow zero or one arguments + elif nargs == OPTIONAL: + nargs_pattern = '(-*A?-*)' + + # allow zero or more arguments + elif nargs == ZERO_OR_MORE: + nargs_pattern = '(-*[A-]*)' + + # allow one or more arguments + elif nargs == ONE_OR_MORE: + nargs_pattern = '(-*A[A-]*)' + + # allow any number of options or arguments + elif nargs == REMAINDER: + nargs_pattern = '([-AO]*)' + + # allow one argument followed by any number of options or arguments + elif nargs == PARSER: + nargs_pattern = '(-*A[-AO]*)' + + # all others should be integers + else: + nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) + + # if this is an optional action, -- is not allowed + if action.option_strings: + nargs_pattern = nargs_pattern.replace('-*', '') + nargs_pattern = nargs_pattern.replace('-', '') + + # return the pattern + return nargs_pattern + + # ======================== + # Value conversion methods + # ======================== + def _get_values(self, action, arg_strings): + # for everything but PARSER args, strip out '--' + if action.nargs not in [PARSER, REMAINDER]: + arg_strings = [s for s in arg_strings if s != '--'] + + # optional argument produces a default when not present + if not arg_strings and action.nargs == OPTIONAL: + if action.option_strings: + value = action.const + else: + value = action.default + if isinstance(value, str): + value = self._get_value(action, value) + self._check_value(action, value) + + # when nargs='*' on a positional, if there were no command-line + # args, use the default if it is anything other than None + elif (not arg_strings and action.nargs == ZERO_OR_MORE and + not action.option_strings): + if action.default is not None: + value = action.default + else: + value = arg_strings + self._check_value(action, value) + + # single argument or optional argument produces a single value + elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: + arg_string, = arg_strings + value = self._get_value(action, arg_string) + self._check_value(action, value) + + # REMAINDER arguments convert all values, checking none + elif action.nargs == REMAINDER: + value = [self._get_value(action, v) for v in arg_strings] + + # PARSER arguments convert all values, but check only the first + elif action.nargs == PARSER: + value = [self._get_value(action, v) for v in arg_strings] + self._check_value(action, value[0]) + + # all other types of nargs produce a list + else: + value = [self._get_value(action, v) for v in arg_strings] + for v in value: + self._check_value(action, v) + + # return the converted value + return value + + def _get_value(self, action, arg_string): + type_func = self._registry_get('type', action.type, action.type) + if not _callable(type_func): + msg = _('%r is not callable') + raise ArgumentError(action, msg % type_func) + + # convert the value to the appropriate type + try: + result = type_func(arg_string) + + # ArgumentTypeErrors indicate errors + except ArgumentTypeError: + name = getattr(action.type, '__name__', repr(action.type)) + msg = str(_sys.exc_info()[1]) + raise ArgumentError(action, msg) + + # TypeErrors or ValueErrors also indicate errors + except (TypeError, ValueError): + name = getattr(action.type, '__name__', repr(action.type)) + args = {'type': name, 'value': arg_string} + msg = _('invalid %(type)s value: %(value)r') + raise ArgumentError(action, msg % args) + + # return the converted value + return result + + def _check_value(self, action, value): + # converted value must be one of the choices (if specified) + if action.choices is not None and value not in action.choices: + args = {'value': value, + 'choices': ', '.join(map(repr, action.choices))} + msg = _('invalid choice: %(value)r (choose from %(choices)s)') + raise ArgumentError(action, msg % args) + + # ======================= + # Help-formatting methods + # ======================= + def format_usage(self): + formatter = self._get_formatter() + formatter.add_usage(self.usage, self._actions, + self._mutually_exclusive_groups) + return formatter.format_help() + + def format_help(self): + formatter = self._get_formatter() + + # usage + formatter.add_usage(self.usage, self._actions, + self._mutually_exclusive_groups) + + # description + formatter.add_text(self.description) + + # positionals, optionals and user-defined groups + for action_group in self._action_groups: + formatter.start_section(action_group.title) + formatter.add_text(action_group.description) + formatter.add_arguments(action_group._group_actions) + formatter.end_section() + + # epilog + formatter.add_text(self.epilog) + + # determine help from format above + return formatter.format_help() + + def format_version(self): + import warnings + warnings.warn( + 'The format_version method is deprecated -- the "version" ' + 'argument to ArgumentParser is no longer supported.', + DeprecationWarning) + formatter = self._get_formatter() + formatter.add_text(self.version) + return formatter.format_help() + + def _get_formatter(self): + return self.formatter_class(prog=self.prog) + + # ===================== + # Help-printing methods + # ===================== + def print_usage(self, file=None): + if file is None: + file = _sys.stdout + self._print_message(self.format_usage(), file) + + def print_help(self, file=None): + if file is None: + file = _sys.stdout + self._print_message(self.format_help(), file) + + def print_version(self, file=None): + import warnings + warnings.warn( + 'The print_version method is deprecated -- the "version" ' + 'argument to ArgumentParser is no longer supported.', + DeprecationWarning) + self._print_message(self.format_version(), file) + + def _print_message(self, message, file=None): + if message: + if file is None: + file = _sys.stderr + file.write(message) + + # =============== + # Exiting methods + # =============== + def exit(self, status=0, message=None): + if message: + self._print_message(message, _sys.stderr) + _sys.exit(status) + + def error(self, message): + """error(message: string) + + Prints a usage message incorporating the message to stderr and + exits. + + If you override this in a subclass, it should not return -- it + should either exit or raise an exception. + """ + self.print_usage(_sys.stderr) + args = {'prog': self.prog, 'message': message} + self.exit(2, _('%(prog)s: error: %(message)s\n') % args) diff --git a/lib/configobj.py b/lib/configobj.py new file mode 100644 index 00000000..c1f6e6df --- /dev/null +++ b/lib/configobj.py @@ -0,0 +1,2468 @@ +# configobj.py +# A config file reader/writer that supports nested sections in config files. +# Copyright (C) 2005-2010 Michael Foord, Nicola Larosa +# E-mail: fuzzyman AT voidspace DOT org DOT uk +# nico AT tekNico DOT net + +# ConfigObj 4 +# http://www.voidspace.org.uk/python/configobj.html + +# Released subject to the BSD License +# Please see http://www.voidspace.org.uk/python/license.shtml + +# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml +# For information about bugfixes, updates and support, please join the +# ConfigObj mailing list: +# http://lists.sourceforge.net/lists/listinfo/configobj-develop +# Comments, suggestions and bug reports welcome. + +from __future__ import generators + +import os +import re +import sys + +from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF16_BE, BOM_UTF16_LE + + +# imported lazily to avoid startup performance hit if it isn't used +compiler = None + +# A dictionary mapping BOM to +# the encoding to decode with, and what to set the +# encoding attribute to. +BOMS = { + BOM_UTF8: ('utf_8', None), + BOM_UTF16_BE: ('utf16_be', 'utf_16'), + BOM_UTF16_LE: ('utf16_le', 'utf_16'), + BOM_UTF16: ('utf_16', 'utf_16'), + } +# All legal variants of the BOM codecs. +# TODO: the list of aliases is not meant to be exhaustive, is there a +# better way ? +BOM_LIST = { + 'utf_16': 'utf_16', + 'u16': 'utf_16', + 'utf16': 'utf_16', + 'utf-16': 'utf_16', + 'utf16_be': 'utf16_be', + 'utf_16_be': 'utf16_be', + 'utf-16be': 'utf16_be', + 'utf16_le': 'utf16_le', + 'utf_16_le': 'utf16_le', + 'utf-16le': 'utf16_le', + 'utf_8': 'utf_8', + 'u8': 'utf_8', + 'utf': 'utf_8', + 'utf8': 'utf_8', + 'utf-8': 'utf_8', + } + +# Map of encodings to the BOM to write. +BOM_SET = { + 'utf_8': BOM_UTF8, + 'utf_16': BOM_UTF16, + 'utf16_be': BOM_UTF16_BE, + 'utf16_le': BOM_UTF16_LE, + None: BOM_UTF8 + } + + +def match_utf8(encoding): + return BOM_LIST.get(encoding.lower()) == 'utf_8' + + +# Quote strings used for writing values +squot = "'%s'" +dquot = '"%s"' +noquot = "%s" +wspace_plus = ' \r\n\v\t\'"' +tsquot = '"""%s"""' +tdquot = "'''%s'''" + +# Sentinel for use in getattr calls to replace hasattr +MISSING = object() + +__version__ = '4.7.2' + +try: + any +except NameError: + def any(iterable): + for entry in iterable: + if entry: + return True + return False + + +__all__ = ( + '__version__', + 'DEFAULT_INDENT_TYPE', + 'DEFAULT_INTERPOLATION', + 'ConfigObjError', + 'NestingError', + 'ParseError', + 'DuplicateError', + 'ConfigspecError', + 'ConfigObj', + 'SimpleVal', + 'InterpolationError', + 'InterpolationLoopError', + 'MissingInterpolationOption', + 'RepeatSectionError', + 'ReloadError', + 'UnreprError', + 'UnknownType', + 'flatten_errors', + 'get_extra_values' +) + +DEFAULT_INTERPOLATION = 'configparser' +DEFAULT_INDENT_TYPE = ' ' +MAX_INTERPOL_DEPTH = 10 + +OPTION_DEFAULTS = { + 'interpolation': True, + 'raise_errors': False, + 'list_values': True, + 'create_empty': False, + 'file_error': False, + 'configspec': None, + 'stringify': True, + # option may be set to one of ('', ' ', '\t') + 'indent_type': None, + 'encoding': None, + 'default_encoding': None, + 'unrepr': False, + 'write_empty_values': False, +} + + + +def getObj(s): + global compiler + if compiler is None: + import compiler + s = "a=" + s + p = compiler.parse(s) + return p.getChildren()[1].getChildren()[0].getChildren()[1] + + +class UnknownType(Exception): + pass + + +class Builder(object): + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise UnknownType(o.__class__.__name__) + return m(o) + + def build_List(self, o): + return map(self.build, o.getChildren()) + + def build_Const(self, o): + return o.value + + def build_Dict(self, o): + d = {} + i = iter(map(self.build, o.getChildren())) + for el in i: + d[el] = i.next() + return d + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + if o.name == 'None': + return None + if o.name == 'True': + return True + if o.name == 'False': + return False + + # An undefined Name + raise UnknownType('Undefined Name') + + def build_Add(self, o): + real, imag = map(self.build_Const, o.getChildren()) + try: + real = float(real) + except TypeError: + raise UnknownType('Add') + if not isinstance(imag, complex) or imag.real != 0.0: + raise UnknownType('Add') + return real+imag + + def build_Getattr(self, o): + parent = self.build(o.expr) + return getattr(parent, o.attrname) + + def build_UnarySub(self, o): + return -self.build_Const(o.getChildren()[0]) + + def build_UnaryAdd(self, o): + return self.build_Const(o.getChildren()[0]) + + +_builder = Builder() + + +def unrepr(s): + if not s: + return s + return _builder.build(getObj(s)) + + + +class ConfigObjError(SyntaxError): + """ + This is the base class for all errors that ConfigObj raises. + It is a subclass of SyntaxError. + """ + def __init__(self, message='', line_number=None, line=''): + self.line = line + self.line_number = line_number + SyntaxError.__init__(self, message) + + +class NestingError(ConfigObjError): + """ + This error indicates a level of nesting that doesn't match. + """ + + +class ParseError(ConfigObjError): + """ + This error indicates that a line is badly written. + It is neither a valid ``key = value`` line, + nor a valid section marker line. + """ + + +class ReloadError(IOError): + """ + A 'reload' operation failed. + This exception is a subclass of ``IOError``. + """ + def __init__(self): + IOError.__init__(self, 'reload failed, filename is not set.') + + +class DuplicateError(ConfigObjError): + """ + The keyword or section specified already exists. + """ + + +class ConfigspecError(ConfigObjError): + """ + An error occured whilst parsing a configspec. + """ + + +class InterpolationError(ConfigObjError): + """Base class for the two interpolation errors.""" + + +class InterpolationLoopError(InterpolationError): + """Maximum interpolation depth exceeded in string interpolation.""" + + def __init__(self, option): + InterpolationError.__init__( + self, + 'interpolation loop detected in value "%s".' % option) + + +class RepeatSectionError(ConfigObjError): + """ + This error indicates additional sections in a section with a + ``__many__`` (repeated) section. + """ + + +class MissingInterpolationOption(InterpolationError): + """A value specified for interpolation was missing.""" + def __init__(self, option): + msg = 'missing option "%s" in interpolation.' % option + InterpolationError.__init__(self, msg) + + +class UnreprError(ConfigObjError): + """An error parsing in unrepr mode.""" + + + +class InterpolationEngine(object): + """ + A helper class to help perform string interpolation. + + This class is an abstract base class; its descendants perform + the actual work. + """ + + # compiled regexp to use in self.interpolate() + _KEYCRE = re.compile(r"%\(([^)]*)\)s") + _cookie = '%' + + def __init__(self, section): + # the Section instance that "owns" this engine + self.section = section + + + def interpolate(self, key, value): + # short-cut + if not self._cookie in value: + return value + + def recursive_interpolate(key, value, section, backtrail): + """The function that does the actual work. + + ``value``: the string we're trying to interpolate. + ``section``: the section in which that string was found + ``backtrail``: a dict to keep track of where we've been, + to detect and prevent infinite recursion loops + + This is similar to a depth-first-search algorithm. + """ + # Have we been here already? + if (key, section.name) in backtrail: + # Yes - infinite loop detected + raise InterpolationLoopError(key) + # Place a marker on our backtrail so we won't come back here again + backtrail[(key, section.name)] = 1 + + # Now start the actual work + match = self._KEYCRE.search(value) + while match: + # The actual parsing of the match is implementation-dependent, + # so delegate to our helper function + k, v, s = self._parse_match(match) + if k is None: + # That's the signal that no further interpolation is needed + replacement = v + else: + # Further interpolation may be needed to obtain final value + replacement = recursive_interpolate(k, v, s, backtrail) + # Replace the matched string with its final value + start, end = match.span() + value = ''.join((value[:start], replacement, value[end:])) + new_search_start = start + len(replacement) + # Pick up the next interpolation key, if any, for next time + # through the while loop + match = self._KEYCRE.search(value, new_search_start) + + # Now safe to come back here again; remove marker from backtrail + del backtrail[(key, section.name)] + + return value + + # Back in interpolate(), all we have to do is kick off the recursive + # function with appropriate starting values + value = recursive_interpolate(key, value, self.section, {}) + return value + + + def _fetch(self, key): + """Helper function to fetch values from owning section. + + Returns a 2-tuple: the value, and the section where it was found. + """ + # switch off interpolation before we try and fetch anything ! + save_interp = self.section.main.interpolation + self.section.main.interpolation = False + + # Start at section that "owns" this InterpolationEngine + current_section = self.section + while True: + # try the current section first + val = current_section.get(key) + if val is not None and not isinstance(val, Section): + break + # try "DEFAULT" next + val = current_section.get('DEFAULT', {}).get(key) + if val is not None and not isinstance(val, Section): + break + # move up to parent and try again + # top-level's parent is itself + if current_section.parent is current_section: + # reached top level, time to give up + break + current_section = current_section.parent + + # restore interpolation to previous value before returning + self.section.main.interpolation = save_interp + if val is None: + raise MissingInterpolationOption(key) + return val, current_section + + + def _parse_match(self, match): + """Implementation-dependent helper function. + + Will be passed a match object corresponding to the interpolation + key we just found (e.g., "%(foo)s" or "$foo"). Should look up that + key in the appropriate config file section (using the ``_fetch()`` + helper function) and return a 3-tuple: (key, value, section) + + ``key`` is the name of the key we're looking for + ``value`` is the value found for that key + ``section`` is a reference to the section where it was found + + ``key`` and ``section`` should be None if no further + interpolation should be performed on the resulting value + (e.g., if we interpolated "$$" and returned "$"). + """ + raise NotImplementedError() + + + +class ConfigParserInterpolation(InterpolationEngine): + """Behaves like ConfigParser.""" + _cookie = '%' + _KEYCRE = re.compile(r"%\(([^)]*)\)s") + + def _parse_match(self, match): + key = match.group(1) + value, section = self._fetch(key) + return key, value, section + + + +class TemplateInterpolation(InterpolationEngine): + """Behaves like string.Template.""" + _cookie = '$' + _delimiter = '$' + _KEYCRE = re.compile(r""" + \$(?: + (?P\$) | # Two $ signs + (?P[_a-z][_a-z0-9]*) | # $name format + {(?P[^}]*)} # ${name} format + ) + """, re.IGNORECASE | re.VERBOSE) + + def _parse_match(self, match): + # Valid name (in or out of braces): fetch value from section + key = match.group('named') or match.group('braced') + if key is not None: + value, section = self._fetch(key) + return key, value, section + # Escaped delimiter (e.g., $$): return single delimiter + if match.group('escaped') is not None: + # Return None for key and section to indicate it's time to stop + return None, self._delimiter, None + # Anything else: ignore completely, just return it unchanged + return None, match.group(), None + + +interpolation_engines = { + 'configparser': ConfigParserInterpolation, + 'template': TemplateInterpolation, +} + + +def __newobj__(cls, *args): + # Hack for pickle + return cls.__new__(cls, *args) + +class Section(dict): + """ + A dictionary-like object that represents a section in a config file. + + It does string interpolation if the 'interpolation' attribute + of the 'main' object is set to True. + + Interpolation is tried first from this object, then from the 'DEFAULT' + section of this object, next from the parent and its 'DEFAULT' section, + and so on until the main object is reached. + + A Section will behave like an ordered dictionary - following the + order of the ``scalars`` and ``sections`` attributes. + You can use this to change the order of members. + + Iteration follows the order: scalars, then sections. + """ + + + def __setstate__(self, state): + dict.update(self, state[0]) + self.__dict__.update(state[1]) + + def __reduce__(self): + state = (dict(self), self.__dict__) + return (__newobj__, (self.__class__,), state) + + + def __init__(self, parent, depth, main, indict=None, name=None): + """ + * parent is the section above + * depth is the depth level of this section + * main is the main ConfigObj + * indict is a dictionary to initialise the section with + """ + if indict is None: + indict = {} + dict.__init__(self) + # used for nesting level *and* interpolation + self.parent = parent + # used for the interpolation attribute + self.main = main + # level of nesting depth of this Section + self.depth = depth + # purely for information + self.name = name + # + self._initialise() + # we do this explicitly so that __setitem__ is used properly + # (rather than just passing to ``dict.__init__``) + for entry, value in indict.iteritems(): + self[entry] = value + + + def _initialise(self): + # the sequence of scalar values in this Section + self.scalars = [] + # the sequence of sections in this Section + self.sections = [] + # for comments :-) + self.comments = {} + self.inline_comments = {} + # the configspec + self.configspec = None + # for defaults + self.defaults = [] + self.default_values = {} + self.extra_values = [] + self._created = False + + + def _interpolate(self, key, value): + try: + # do we already have an interpolation engine? + engine = self._interpolation_engine + except AttributeError: + # not yet: first time running _interpolate(), so pick the engine + name = self.main.interpolation + if name == True: # note that "if name:" would be incorrect here + # backwards-compatibility: interpolation=True means use default + name = DEFAULT_INTERPOLATION + name = name.lower() # so that "Template", "template", etc. all work + class_ = interpolation_engines.get(name, None) + if class_ is None: + # invalid value for self.main.interpolation + self.main.interpolation = False + return value + else: + # save reference to engine so we don't have to do this again + engine = self._interpolation_engine = class_(self) + # let the engine do the actual work + return engine.interpolate(key, value) + + + def __getitem__(self, key): + """Fetch the item and do string interpolation.""" + val = dict.__getitem__(self, key) + if self.main.interpolation: + if isinstance(val, basestring): + return self._interpolate(key, val) + if isinstance(val, list): + def _check(entry): + if isinstance(entry, basestring): + return self._interpolate(key, entry) + return entry + new = [_check(entry) for entry in val] + if new != val: + return new + return val + + + def __setitem__(self, key, value, unrepr=False): + """ + Correctly set a value. + + Making dictionary values Section instances. + (We have to special case 'Section' instances - which are also dicts) + + Keys must be strings. + Values need only be strings (or lists of strings) if + ``main.stringify`` is set. + + ``unrepr`` must be set when setting a value to a dictionary, without + creating a new sub-section. + """ + if not isinstance(key, basestring): + raise ValueError('The key "%s" is not a string.' % key) + + # add the comment + if key not in self.comments: + self.comments[key] = [] + self.inline_comments[key] = '' + # remove the entry from defaults + if key in self.defaults: + self.defaults.remove(key) + # + if isinstance(value, Section): + if key not in self: + self.sections.append(key) + dict.__setitem__(self, key, value) + elif isinstance(value, dict) and not unrepr: + # First create the new depth level, + # then create the section + if key not in self: + self.sections.append(key) + new_depth = self.depth + 1 + dict.__setitem__( + self, + key, + Section( + self, + new_depth, + self.main, + indict=value, + name=key)) + else: + if key not in self: + self.scalars.append(key) + if not self.main.stringify: + if isinstance(value, basestring): + pass + elif isinstance(value, (list, tuple)): + for entry in value: + if not isinstance(entry, basestring): + raise TypeError('Value is not a string "%s".' % entry) + else: + raise TypeError('Value is not a string "%s".' % value) + dict.__setitem__(self, key, value) + + + def __delitem__(self, key): + """Remove items from the sequence when deleting.""" + dict. __delitem__(self, key) + if key in self.scalars: + self.scalars.remove(key) + else: + self.sections.remove(key) + del self.comments[key] + del self.inline_comments[key] + + + def get(self, key, default=None): + """A version of ``get`` that doesn't bypass string interpolation.""" + try: + return self[key] + except KeyError: + return default + + + def update(self, indict): + """ + A version of update that uses our ``__setitem__``. + """ + for entry in indict: + self[entry] = indict[entry] + + + def pop(self, key, default=MISSING): + """ + 'D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised' + """ + try: + val = self[key] + except KeyError: + if default is MISSING: + raise + val = default + else: + del self[key] + return val + + + def popitem(self): + """Pops the first (key,val)""" + sequence = (self.scalars + self.sections) + if not sequence: + raise KeyError(": 'popitem(): dictionary is empty'") + key = sequence[0] + val = self[key] + del self[key] + return key, val + + + def clear(self): + """ + A version of clear that also affects scalars/sections + Also clears comments and configspec. + + Leaves other attributes alone : + depth/main/parent are not affected + """ + dict.clear(self) + self.scalars = [] + self.sections = [] + self.comments = {} + self.inline_comments = {} + self.configspec = None + self.defaults = [] + self.extra_values = [] + + + def setdefault(self, key, default=None): + """A version of setdefault that sets sequence if appropriate.""" + try: + return self[key] + except KeyError: + self[key] = default + return self[key] + + + def items(self): + """D.items() -> list of D's (key, value) pairs, as 2-tuples""" + return zip((self.scalars + self.sections), self.values()) + + + def keys(self): + """D.keys() -> list of D's keys""" + return (self.scalars + self.sections) + + + def values(self): + """D.values() -> list of D's values""" + return [self[key] for key in (self.scalars + self.sections)] + + + def iteritems(self): + """D.iteritems() -> an iterator over the (key, value) items of D""" + return iter(self.items()) + + + def iterkeys(self): + """D.iterkeys() -> an iterator over the keys of D""" + return iter((self.scalars + self.sections)) + + __iter__ = iterkeys + + + def itervalues(self): + """D.itervalues() -> an iterator over the values of D""" + return iter(self.values()) + + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + def _getval(key): + try: + return self[key] + except MissingInterpolationOption: + return dict.__getitem__(self, key) + return '{%s}' % ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) + for key in (self.scalars + self.sections)]) + + __str__ = __repr__ + __str__.__doc__ = "x.__str__() <==> str(x)" + + + # Extra methods - not in a normal dictionary + + def dict(self): + """ + Return a deepcopy of self as a dictionary. + + All members that are ``Section`` instances are recursively turned to + ordinary dictionaries - by calling their ``dict`` method. + + >>> n = a.dict() + >>> n == a + 1 + >>> n is a + 0 + """ + newdict = {} + for entry in self: + this_entry = self[entry] + if isinstance(this_entry, Section): + this_entry = this_entry.dict() + elif isinstance(this_entry, list): + # create a copy rather than a reference + this_entry = list(this_entry) + elif isinstance(this_entry, tuple): + # create a copy rather than a reference + this_entry = tuple(this_entry) + newdict[entry] = this_entry + return newdict + + + def merge(self, indict): + """ + A recursive update - useful for merging config files. + + >>> a = '''[section1] + ... option1 = True + ... [[subsection]] + ... more_options = False + ... # end of file'''.splitlines() + >>> b = '''# File is user.ini + ... [section1] + ... option1 = False + ... # end of file'''.splitlines() + >>> c1 = ConfigObj(b) + >>> c2 = ConfigObj(a) + >>> c2.merge(c1) + >>> c2 + ConfigObj({'section1': {'option1': 'False', 'subsection': {'more_options': 'False'}}}) + """ + for key, val in indict.items(): + if (key in self and isinstance(self[key], dict) and + isinstance(val, dict)): + self[key].merge(val) + else: + self[key] = val + + + def rename(self, oldkey, newkey): + """ + Change a keyname to another, without changing position in sequence. + + Implemented so that transformations can be made on keys, + as well as on values. (used by encode and decode) + + Also renames comments. + """ + if oldkey in self.scalars: + the_list = self.scalars + elif oldkey in self.sections: + the_list = self.sections + else: + raise KeyError('Key "%s" not found.' % oldkey) + pos = the_list.index(oldkey) + # + val = self[oldkey] + dict.__delitem__(self, oldkey) + dict.__setitem__(self, newkey, val) + the_list.remove(oldkey) + the_list.insert(pos, newkey) + comm = self.comments[oldkey] + inline_comment = self.inline_comments[oldkey] + del self.comments[oldkey] + del self.inline_comments[oldkey] + self.comments[newkey] = comm + self.inline_comments[newkey] = inline_comment + + + def walk(self, function, raise_errors=True, + call_on_sections=False, **keywargs): + """ + Walk every member and call a function on the keyword and value. + + Return a dictionary of the return values + + If the function raises an exception, raise the errror + unless ``raise_errors=False``, in which case set the return value to + ``False``. + + Any unrecognised keyword arguments you pass to walk, will be pased on + to the function you pass in. + + Note: if ``call_on_sections`` is ``True`` then - on encountering a + subsection, *first* the function is called for the *whole* subsection, + and then recurses into it's members. This means your function must be + able to handle strings, dictionaries and lists. This allows you + to change the key of subsections as well as for ordinary members. The + return value when called on the whole subsection has to be discarded. + + See the encode and decode methods for examples, including functions. + + .. admonition:: caution + + You can use ``walk`` to transform the names of members of a section + but you mustn't add or delete members. + + >>> config = '''[XXXXsection] + ... XXXXkey = XXXXvalue'''.splitlines() + >>> cfg = ConfigObj(config) + >>> cfg + ConfigObj({'XXXXsection': {'XXXXkey': 'XXXXvalue'}}) + >>> def transform(section, key): + ... val = section[key] + ... newkey = key.replace('XXXX', 'CLIENT1') + ... section.rename(key, newkey) + ... if isinstance(val, (tuple, list, dict)): + ... pass + ... else: + ... val = val.replace('XXXX', 'CLIENT1') + ... section[newkey] = val + >>> cfg.walk(transform, call_on_sections=True) + {'CLIENT1section': {'CLIENT1key': None}} + >>> cfg + ConfigObj({'CLIENT1section': {'CLIENT1key': 'CLIENT1value'}}) + """ + out = {} + # scalars first + for i in range(len(self.scalars)): + entry = self.scalars[i] + try: + val = function(self, entry, **keywargs) + # bound again in case name has changed + entry = self.scalars[i] + out[entry] = val + except Exception: + if raise_errors: + raise + else: + entry = self.scalars[i] + out[entry] = False + # then sections + for i in range(len(self.sections)): + entry = self.sections[i] + if call_on_sections: + try: + function(self, entry, **keywargs) + except Exception: + if raise_errors: + raise + else: + entry = self.sections[i] + out[entry] = False + # bound again in case name has changed + entry = self.sections[i] + # previous result is discarded + out[entry] = self[entry].walk( + function, + raise_errors=raise_errors, + call_on_sections=call_on_sections, + **keywargs) + return out + + + def as_bool(self, key): + """ + Accepts a key as input. The corresponding value must be a string or + the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to + retain compatibility with Python 2.2. + + If the string is one of ``True``, ``On``, ``Yes``, or ``1`` it returns + ``True``. + + If the string is one of ``False``, ``Off``, ``No``, or ``0`` it returns + ``False``. + + ``as_bool`` is not case sensitive. + + Any other input will raise a ``ValueError``. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_bool('a') + Traceback (most recent call last): + ValueError: Value "fish" is neither True nor False + >>> a['b'] = 'True' + >>> a.as_bool('b') + 1 + >>> a['b'] = 'off' + >>> a.as_bool('b') + 0 + """ + val = self[key] + if val == True: + return True + elif val == False: + return False + else: + try: + if not isinstance(val, basestring): + # TODO: Why do we raise a KeyError here? + raise KeyError() + else: + return self.main._bools[val.lower()] + except KeyError: + raise ValueError('Value "%s" is neither True nor False' % val) + + + def as_int(self, key): + """ + A convenience method which coerces the specified value to an integer. + + If the value is an invalid literal for ``int``, a ``ValueError`` will + be raised. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_int('a') + Traceback (most recent call last): + ValueError: invalid literal for int() with base 10: 'fish' + >>> a['b'] = '1' + >>> a.as_int('b') + 1 + >>> a['b'] = '3.2' + >>> a.as_int('b') + Traceback (most recent call last): + ValueError: invalid literal for int() with base 10: '3.2' + """ + return int(self[key]) + + + def as_float(self, key): + """ + A convenience method which coerces the specified value to a float. + + If the value is an invalid literal for ``float``, a ``ValueError`` will + be raised. + + >>> a = ConfigObj() + >>> a['a'] = 'fish' + >>> a.as_float('a') + Traceback (most recent call last): + ValueError: invalid literal for float(): fish + >>> a['b'] = '1' + >>> a.as_float('b') + 1.0 + >>> a['b'] = '3.2' + >>> a.as_float('b') + 3.2000000000000002 + """ + return float(self[key]) + + + def as_list(self, key): + """ + A convenience method which fetches the specified value, guaranteeing + that it is a list. + + >>> a = ConfigObj() + >>> a['a'] = 1 + >>> a.as_list('a') + [1] + >>> a['a'] = (1,) + >>> a.as_list('a') + [1] + >>> a['a'] = [1] + >>> a.as_list('a') + [1] + """ + result = self[key] + if isinstance(result, (tuple, list)): + return list(result) + return [result] + + + def restore_default(self, key): + """ + Restore (and return) default value for the specified key. + + This method will only work for a ConfigObj that was created + with a configspec and has been validated. + + If there is no default value for this key, ``KeyError`` is raised. + """ + default = self.default_values[key] + dict.__setitem__(self, key, default) + if key not in self.defaults: + self.defaults.append(key) + return default + + + def restore_defaults(self): + """ + Recursively restore default values to all members + that have them. + + This method will only work for a ConfigObj that was created + with a configspec and has been validated. + + It doesn't delete or modify entries without default values. + """ + for key in self.default_values: + self.restore_default(key) + + for section in self.sections: + self[section].restore_defaults() + + +class ConfigObj(Section): + """An object to read, create, and write config files.""" + + _keyword = re.compile(r'''^ # line start + (\s*) # indentation + ( # keyword + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'"=].*?) # no quotes + ) + \s*=\s* # divider + (.*) # value (including list values and comments) + $ # line end + ''', + re.VERBOSE) + + _sectionmarker = re.compile(r'''^ + (\s*) # 1: indentation + ((?:\[\s*)+) # 2: section marker open + ( # 3: section name open + (?:"\s*\S.*?\s*")| # at least one non-space with double quotes + (?:'\s*\S.*?\s*')| # at least one non-space with single quotes + (?:[^'"\s].*?) # at least one non-space unquoted + ) # section name close + ((?:\s*\])+) # 4: section marker close + \s*(\#.*)? # 5: optional comment + $''', + re.VERBOSE) + + # this regexp pulls list values out as a single string + # or single values and comments + # FIXME: this regex adds a '' to the end of comma terminated lists + # workaround in ``_handle_value`` + _valueexp = re.compile(r'''^ + (?: + (?: + ( + (?: + (?: + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'",\#][^,\#]*?) # unquoted + ) + \s*,\s* # comma + )* # match all list items ending in a comma (if any) + ) + ( + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'",\#\s][^,]*?)| # unquoted + (?:(? 1: + msg = "Parsing failed with several errors.\nFirst error %s" % info + error = ConfigObjError(msg) + else: + error = self._errors[0] + # set the errors attribute; it's a list of tuples: + # (error_type, message, line_number) + error.errors = self._errors + # set the config attribute + error.config = self + raise error + # delete private attributes + del self._errors + + if configspec is None: + self.configspec = None + else: + self._handle_configspec(configspec) + + + def _initialise(self, options=None): + if options is None: + options = OPTION_DEFAULTS + + # initialise a few variables + self.filename = None + self._errors = [] + self.raise_errors = options['raise_errors'] + self.interpolation = options['interpolation'] + self.list_values = options['list_values'] + self.create_empty = options['create_empty'] + self.file_error = options['file_error'] + self.stringify = options['stringify'] + self.indent_type = options['indent_type'] + self.encoding = options['encoding'] + self.default_encoding = options['default_encoding'] + self.BOM = False + self.newlines = None + self.write_empty_values = options['write_empty_values'] + self.unrepr = options['unrepr'] + + self.initial_comment = [] + self.final_comment = [] + self.configspec = None + + if self._inspec: + self.list_values = False + + # Clear section attributes as well + Section._initialise(self) + + + def __repr__(self): + def _getval(key): + try: + return self[key] + except MissingInterpolationOption: + return dict.__getitem__(self, key) + return ('ConfigObj({%s})' % + ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) + for key in (self.scalars + self.sections)])) + + + def _handle_bom(self, infile): + """ + Handle any BOM, and decode if necessary. + + If an encoding is specified, that *must* be used - but the BOM should + still be removed (and the BOM attribute set). + + (If the encoding is wrongly specified, then a BOM for an alternative + encoding won't be discovered or removed.) + + If an encoding is not specified, UTF8 or UTF16 BOM will be detected and + removed. The BOM attribute will be set. UTF16 will be decoded to + unicode. + + NOTE: This method must not be called with an empty ``infile``. + + Specifying the *wrong* encoding is likely to cause a + ``UnicodeDecodeError``. + + ``infile`` must always be returned as a list of lines, but may be + passed in as a single string. + """ + if ((self.encoding is not None) and + (self.encoding.lower() not in BOM_LIST)): + # No need to check for a BOM + # the encoding specified doesn't have one + # just decode + return self._decode(infile, self.encoding) + + if isinstance(infile, (list, tuple)): + line = infile[0] + else: + line = infile + if self.encoding is not None: + # encoding explicitly supplied + # And it could have an associated BOM + # TODO: if encoding is just UTF16 - we ought to check for both + # TODO: big endian and little endian versions. + enc = BOM_LIST[self.encoding.lower()] + if enc == 'utf_16': + # For UTF16 we try big endian and little endian + for BOM, (encoding, final_encoding) in BOMS.items(): + if not final_encoding: + # skip UTF8 + continue + if infile.startswith(BOM): + ### BOM discovered + ##self.BOM = True + # Don't need to remove BOM + return self._decode(infile, encoding) + + # If we get this far, will *probably* raise a DecodeError + # As it doesn't appear to start with a BOM + return self._decode(infile, self.encoding) + + # Must be UTF8 + BOM = BOM_SET[enc] + if not line.startswith(BOM): + return self._decode(infile, self.encoding) + + newline = line[len(BOM):] + + # BOM removed + if isinstance(infile, (list, tuple)): + infile[0] = newline + else: + infile = newline + self.BOM = True + return self._decode(infile, self.encoding) + + # No encoding specified - so we need to check for UTF8/UTF16 + for BOM, (encoding, final_encoding) in BOMS.items(): + if not line.startswith(BOM): + continue + else: + # BOM discovered + self.encoding = final_encoding + if not final_encoding: + self.BOM = True + # UTF8 + # remove BOM + newline = line[len(BOM):] + if isinstance(infile, (list, tuple)): + infile[0] = newline + else: + infile = newline + # UTF8 - don't decode + if isinstance(infile, basestring): + return infile.splitlines(True) + else: + return infile + # UTF16 - have to decode + return self._decode(infile, encoding) + + # No BOM discovered and no encoding specified, just return + if isinstance(infile, basestring): + # infile read from a file will be a single string + return infile.splitlines(True) + return infile + + + def _a_to_u(self, aString): + """Decode ASCII strings to unicode if a self.encoding is specified.""" + if self.encoding: + return aString.decode('ascii') + else: + return aString + + + def _decode(self, infile, encoding): + """ + Decode infile to unicode. Using the specified encoding. + + if is a string, it also needs converting to a list. + """ + if isinstance(infile, basestring): + # can't be unicode + # NOTE: Could raise a ``UnicodeDecodeError`` + return infile.decode(encoding).splitlines(True) + for i, line in enumerate(infile): + if not isinstance(line, unicode): + # NOTE: The isinstance test here handles mixed lists of unicode/string + # NOTE: But the decode will break on any non-string values + # NOTE: Or could raise a ``UnicodeDecodeError`` + infile[i] = line.decode(encoding) + return infile + + + def _decode_element(self, line): + """Decode element to unicode if necessary.""" + if not self.encoding: + return line + if isinstance(line, str) and self.default_encoding: + return line.decode(self.default_encoding) + return line + + + def _str(self, value): + """ + Used by ``stringify`` within validate, to turn non-string values + into strings. + """ + if not isinstance(value, basestring): + return str(value) + else: + return value + + + def _parse(self, infile): + """Actually parse the config file.""" + temp_list_values = self.list_values + if self.unrepr: + self.list_values = False + + comment_list = [] + done_start = False + this_section = self + maxline = len(infile) - 1 + cur_index = -1 + reset_comment = False + + while cur_index < maxline: + if reset_comment: + comment_list = [] + cur_index += 1 + line = infile[cur_index] + sline = line.strip() + # do we have anything on the line ? + if not sline or sline.startswith('#'): + reset_comment = False + comment_list.append(line) + continue + + if not done_start: + # preserve initial comment + self.initial_comment = comment_list + comment_list = [] + done_start = True + + reset_comment = True + # first we check if it's a section marker + mat = self._sectionmarker.match(line) + if mat is not None: + # is a section line + (indent, sect_open, sect_name, sect_close, comment) = mat.groups() + if indent and (self.indent_type is None): + self.indent_type = indent + cur_depth = sect_open.count('[') + if cur_depth != sect_close.count(']'): + self._handle_error("Cannot compute the section depth at line %s.", + NestingError, infile, cur_index) + continue + + if cur_depth < this_section.depth: + # the new section is dropping back to a previous level + try: + parent = self._match_depth(this_section, + cur_depth).parent + except SyntaxError: + self._handle_error("Cannot compute nesting level at line %s.", + NestingError, infile, cur_index) + continue + elif cur_depth == this_section.depth: + # the new section is a sibling of the current section + parent = this_section.parent + elif cur_depth == this_section.depth + 1: + # the new section is a child the current section + parent = this_section + else: + self._handle_error("Section too nested at line %s.", + NestingError, infile, cur_index) + + sect_name = self._unquote(sect_name) + if sect_name in parent: + self._handle_error('Duplicate section name at line %s.', + DuplicateError, infile, cur_index) + continue + + # create the new section + this_section = Section( + parent, + cur_depth, + self, + name=sect_name) + parent[sect_name] = this_section + parent.inline_comments[sect_name] = comment + parent.comments[sect_name] = comment_list + continue + # + # it's not a section marker, + # so it should be a valid ``key = value`` line + mat = self._keyword.match(line) + if mat is None: + # it neither matched as a keyword + # or a section marker + self._handle_error( + 'Invalid line at line "%s".', + ParseError, infile, cur_index) + else: + # is a keyword value + # value will include any inline comment + (indent, key, value) = mat.groups() + if indent and (self.indent_type is None): + self.indent_type = indent + # check for a multiline value + if value[:3] in ['"""', "'''"]: + try: + value, comment, cur_index = self._multiline( + value, infile, cur_index, maxline) + except SyntaxError: + self._handle_error( + 'Parse error in value at line %s.', + ParseError, infile, cur_index) + continue + else: + if self.unrepr: + comment = '' + try: + value = unrepr(value) + except Exception, e: + if type(e) == UnknownType: + msg = 'Unknown name or type in value at line %s.' + else: + msg = 'Parse error in value at line %s.' + self._handle_error(msg, UnreprError, infile, + cur_index) + continue + else: + if self.unrepr: + comment = '' + try: + value = unrepr(value) + except Exception, e: + if isinstance(e, UnknownType): + msg = 'Unknown name or type in value at line %s.' + else: + msg = 'Parse error in value at line %s.' + self._handle_error(msg, UnreprError, infile, + cur_index) + continue + else: + # extract comment and lists + try: + (value, comment) = self._handle_value(value) + except SyntaxError: + self._handle_error( + 'Parse error in value at line %s.', + ParseError, infile, cur_index) + continue + # + key = self._unquote(key) + if key in this_section: + self._handle_error( + 'Duplicate keyword name at line %s.', + DuplicateError, infile, cur_index) + continue + # add the key. + # we set unrepr because if we have got this far we will never + # be creating a new section + this_section.__setitem__(key, value, unrepr=True) + this_section.inline_comments[key] = comment + this_section.comments[key] = comment_list + continue + # + if self.indent_type is None: + # no indentation used, set the type accordingly + self.indent_type = '' + + # preserve the final comment + if not self and not self.initial_comment: + self.initial_comment = comment_list + elif not reset_comment: + self.final_comment = comment_list + self.list_values = temp_list_values + + + def _match_depth(self, sect, depth): + """ + Given a section and a depth level, walk back through the sections + parents to see if the depth level matches a previous section. + + Return a reference to the right section, + or raise a SyntaxError. + """ + while depth < sect.depth: + if sect is sect.parent: + # we've reached the top level already + raise SyntaxError() + sect = sect.parent + if sect.depth == depth: + return sect + # shouldn't get here + raise SyntaxError() + + + def _handle_error(self, text, ErrorClass, infile, cur_index): + """ + Handle an error according to the error settings. + + Either raise the error or store it. + The error will have occured at ``cur_index`` + """ + line = infile[cur_index] + cur_index += 1 + message = text % cur_index + error = ErrorClass(message, cur_index, line) + if self.raise_errors: + # raise the error - parsing stops here + raise error + # store the error + # reraise when parsing has finished + self._errors.append(error) + + + def _unquote(self, value): + """Return an unquoted version of a value""" + if not value: + # should only happen during parsing of lists + raise SyntaxError + if (value[0] == value[-1]) and (value[0] in ('"', "'")): + value = value[1:-1] + return value + + + def _quote(self, value, multiline=True): + """ + Return a safely quoted version of a value. + + Raise a ConfigObjError if the value cannot be safely quoted. + If multiline is ``True`` (default) then use triple quotes + if necessary. + + * Don't quote values that don't need it. + * Recursively quote members of a list and return a comma joined list. + * Multiline is ``False`` for lists. + * Obey list syntax for empty and single member lists. + + If ``list_values=False`` then the value is only quoted if it contains + a ``\\n`` (is multiline) or '#'. + + If ``write_empty_values`` is set, and the value is an empty string, it + won't be quoted. + """ + if multiline and self.write_empty_values and value == '': + # Only if multiline is set, so that it is used for values not + # keys, and not values that are part of a list + return '' + + if multiline and isinstance(value, (list, tuple)): + if not value: + return ',' + elif len(value) == 1: + return self._quote(value[0], multiline=False) + ',' + return ', '.join([self._quote(val, multiline=False) + for val in value]) + if not isinstance(value, basestring): + if self.stringify: + value = str(value) + else: + raise TypeError('Value "%s" is not a string.' % value) + + if not value: + return '""' + + no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value + need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value )) + hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value) + check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote + + if check_for_single: + if not self.list_values: + # we don't quote if ``list_values=False`` + quot = noquot + # for normal values either single or double quotes will do + elif '\n' in value: + # will only happen if multiline is off - e.g. '\n' in key + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + elif ((value[0] not in wspace_plus) and + (value[-1] not in wspace_plus) and + (',' not in value)): + quot = noquot + else: + quot = self._get_single_quote(value) + else: + # if value has '\n' or "'" *and* '"', it will need triple quotes + quot = self._get_triple_quote(value) + + if quot == noquot and '#' in value and self.list_values: + quot = self._get_single_quote(value) + + return quot % value + + + def _get_single_quote(self, value): + if ("'" in value) and ('"' in value): + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + elif '"' in value: + quot = squot + else: + quot = dquot + return quot + + + def _get_triple_quote(self, value): + if (value.find('"""') != -1) and (value.find("'''") != -1): + raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) + if value.find('"""') == -1: + quot = tdquot + else: + quot = tsquot + return quot + + + def _handle_value(self, value): + """ + Given a value string, unquote, remove comment, + handle lists. (including empty and single member lists) + """ + if self._inspec: + # Parsing a configspec so don't handle comments + return (value, '') + # do we look for lists in values ? + if not self.list_values: + mat = self._nolistvalue.match(value) + if mat is None: + raise SyntaxError() + # NOTE: we don't unquote here + return mat.groups() + # + mat = self._valueexp.match(value) + if mat is None: + # the value is badly constructed, probably badly quoted, + # or an invalid list + raise SyntaxError() + (list_values, single, empty_list, comment) = mat.groups() + if (list_values == '') and (single is None): + # change this if you want to accept empty values + raise SyntaxError() + # NOTE: note there is no error handling from here if the regex + # is wrong: then incorrect values will slip through + if empty_list is not None: + # the single comma - meaning an empty list + return ([], comment) + if single is not None: + # handle empty values + if list_values and not single: + # FIXME: the '' is a workaround because our regex now matches + # '' at the end of a list if it has a trailing comma + single = None + else: + single = single or '""' + single = self._unquote(single) + if list_values == '': + # not a list value + return (single, comment) + the_list = self._listvalueexp.findall(list_values) + the_list = [self._unquote(val) for val in the_list] + if single is not None: + the_list += [single] + return (the_list, comment) + + + def _multiline(self, value, infile, cur_index, maxline): + """Extract the value, where we are in a multiline situation.""" + quot = value[:3] + newvalue = value[3:] + single_line = self._triple_quote[quot][0] + multi_line = self._triple_quote[quot][1] + mat = single_line.match(value) + if mat is not None: + retval = list(mat.groups()) + retval.append(cur_index) + return retval + elif newvalue.find(quot) != -1: + # somehow the triple quote is missing + raise SyntaxError() + # + while cur_index < maxline: + cur_index += 1 + newvalue += '\n' + line = infile[cur_index] + if line.find(quot) == -1: + newvalue += line + else: + # end of multiline, process it + break + else: + # we've got to the end of the config, oops... + raise SyntaxError() + mat = multi_line.match(line) + if mat is None: + # a badly formed line + raise SyntaxError() + (value, comment) = mat.groups() + return (newvalue + value, comment, cur_index) + + + def _handle_configspec(self, configspec): + """Parse the configspec.""" + # FIXME: Should we check that the configspec was created with the + # correct settings ? (i.e. ``list_values=False``) + if not isinstance(configspec, ConfigObj): + try: + configspec = ConfigObj(configspec, + raise_errors=True, + file_error=True, + _inspec=True) + except ConfigObjError, e: + # FIXME: Should these errors have a reference + # to the already parsed ConfigObj ? + raise ConfigspecError('Parsing configspec failed: %s' % e) + except IOError, e: + raise IOError('Reading configspec failed: %s' % e) + + self.configspec = configspec + + + + def _set_configspec(self, section, copy): + """ + Called by validate. Handles setting the configspec on subsections + including sections to be validated by __many__ + """ + configspec = section.configspec + many = configspec.get('__many__') + if isinstance(many, dict): + for entry in section.sections: + if entry not in configspec: + section[entry].configspec = many + + for entry in configspec.sections: + if entry == '__many__': + continue + if entry not in section: + section[entry] = {} + section[entry]._created = True + if copy: + # copy comments + section.comments[entry] = configspec.comments.get(entry, []) + section.inline_comments[entry] = configspec.inline_comments.get(entry, '') + + # Could be a scalar when we expect a section + if isinstance(section[entry], Section): + section[entry].configspec = configspec[entry] + + + def _write_line(self, indent_string, entry, this_entry, comment): + """Write an individual line, for the write method""" + # NOTE: the calls to self._quote here handles non-StringType values. + if not self.unrepr: + val = self._decode_element(self._quote(this_entry)) + else: + val = repr(this_entry) + return '%s%s%s%s%s' % (indent_string, + self._decode_element(self._quote(entry, multiline=False)), + self._a_to_u(' = '), + val, + self._decode_element(comment)) + + + def _write_marker(self, indent_string, depth, entry, comment): + """Write a section marker line""" + return '%s%s%s%s%s' % (indent_string, + self._a_to_u('[' * depth), + self._quote(self._decode_element(entry), multiline=False), + self._a_to_u(']' * depth), + self._decode_element(comment)) + + + def _handle_comment(self, comment): + """Deal with a comment.""" + if not comment: + return '' + start = self.indent_type + if not comment.startswith('#'): + start += self._a_to_u(' # ') + return (start + comment) + + + # Public methods + + def write(self, outfile=None, section=None): + """ + Write the current ConfigObj as a file + + tekNico: FIXME: use StringIO instead of real files + + >>> filename = a.filename + >>> a.filename = 'test.ini' + >>> a.write() + >>> a.filename = filename + >>> a == ConfigObj('test.ini', raise_errors=True) + 1 + >>> import os + >>> os.remove('test.ini') + """ + if self.indent_type is None: + # this can be true if initialised from a dictionary + self.indent_type = DEFAULT_INDENT_TYPE + + out = [] + cs = self._a_to_u('#') + csp = self._a_to_u('# ') + if section is None: + int_val = self.interpolation + self.interpolation = False + section = self + for line in self.initial_comment: + line = self._decode_element(line) + stripped_line = line.strip() + if stripped_line and not stripped_line.startswith(cs): + line = csp + line + out.append(line) + + indent_string = self.indent_type * section.depth + for entry in (section.scalars + section.sections): + if entry in section.defaults: + # don't write out default values + continue + for comment_line in section.comments[entry]: + comment_line = self._decode_element(comment_line.lstrip()) + if comment_line and not comment_line.startswith(cs): + comment_line = csp + comment_line + out.append(indent_string + comment_line) + this_entry = section[entry] + comment = self._handle_comment(section.inline_comments[entry]) + + if isinstance(this_entry, dict): + # a section + out.append(self._write_marker( + indent_string, + this_entry.depth, + entry, + comment)) + out.extend(self.write(section=this_entry)) + else: + out.append(self._write_line( + indent_string, + entry, + this_entry, + comment)) + + if section is self: + for line in self.final_comment: + line = self._decode_element(line) + stripped_line = line.strip() + if stripped_line and not stripped_line.startswith(cs): + line = csp + line + out.append(line) + self.interpolation = int_val + + if section is not self: + return out + + if (self.filename is None) and (outfile is None): + # output a list of lines + # might need to encode + # NOTE: This will *screw* UTF16, each line will start with the BOM + if self.encoding: + out = [l.encode(self.encoding) for l in out] + if (self.BOM and ((self.encoding is None) or + (BOM_LIST.get(self.encoding.lower()) == 'utf_8'))): + # Add the UTF8 BOM + if not out: + out.append('') + out[0] = BOM_UTF8 + out[0] + return out + + # Turn the list to a string, joined with correct newlines + newline = self.newlines or os.linesep + if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w' + and sys.platform == 'win32' and newline == '\r\n'): + # Windows specific hack to avoid writing '\r\r\n' + newline = '\n' + output = self._a_to_u(newline).join(out) + if self.encoding: + output = output.encode(self.encoding) + if self.BOM and ((self.encoding is None) or match_utf8(self.encoding)): + # Add the UTF8 BOM + output = BOM_UTF8 + output + + if not output.endswith(newline): + output += newline + if outfile is not None: + outfile.write(output) + else: + h = open(self.filename, 'wb') + h.write(output) + h.close() + + + def validate(self, validator, preserve_errors=False, copy=False, + section=None): + """ + Test the ConfigObj against a configspec. + + It uses the ``validator`` object from *validate.py*. + + To run ``validate`` on the current ConfigObj, call: :: + + test = config.validate(validator) + + (Normally having previously passed in the configspec when the ConfigObj + was created - you can dynamically assign a dictionary of checks to the + ``configspec`` attribute of a section though). + + It returns ``True`` if everything passes, or a dictionary of + pass/fails (True/False). If every member of a subsection passes, it + will just have the value ``True``. (It also returns ``False`` if all + members fail). + + In addition, it converts the values from strings to their native + types if their checks pass (and ``stringify`` is set). + + If ``preserve_errors`` is ``True`` (``False`` is default) then instead + of a marking a fail with a ``False``, it will preserve the actual + exception object. This can contain info about the reason for failure. + For example the ``VdtValueTooSmallError`` indicates that the value + supplied was too small. If a value (or section) is missing it will + still be marked as ``False``. + + You must have the validate module to use ``preserve_errors=True``. + + You can then use the ``flatten_errors`` function to turn your nested + results dictionary into a flattened list of failures - useful for + displaying meaningful error messages. + """ + if section is None: + if self.configspec is None: + raise ValueError('No configspec supplied.') + if preserve_errors: + # We do this once to remove a top level dependency on the validate module + # Which makes importing configobj faster + from validate import VdtMissingValue + self._vdtMissingValue = VdtMissingValue + + section = self + + if copy: + section.initial_comment = section.configspec.initial_comment + section.final_comment = section.configspec.final_comment + section.encoding = section.configspec.encoding + section.BOM = section.configspec.BOM + section.newlines = section.configspec.newlines + section.indent_type = section.configspec.indent_type + + # + # section.default_values.clear() #?? + configspec = section.configspec + self._set_configspec(section, copy) + + + def validate_entry(entry, spec, val, missing, ret_true, ret_false): + section.default_values.pop(entry, None) + + try: + section.default_values[entry] = validator.get_default_value(configspec[entry]) + except (KeyError, AttributeError, validator.baseErrorClass): + # No default, bad default or validator has no 'get_default_value' + # (e.g. SimpleVal) + pass + + try: + check = validator.check(spec, + val, + missing=missing + ) + except validator.baseErrorClass, e: + if not preserve_errors or isinstance(e, self._vdtMissingValue): + out[entry] = False + else: + # preserve the error + out[entry] = e + ret_false = False + ret_true = False + else: + ret_false = False + out[entry] = True + if self.stringify or missing: + # if we are doing type conversion + # or the value is a supplied default + if not self.stringify: + if isinstance(check, (list, tuple)): + # preserve lists + check = [self._str(item) for item in check] + elif missing and check is None: + # convert the None from a default to a '' + check = '' + else: + check = self._str(check) + if (check != val) or missing: + section[entry] = check + if not copy and missing and entry not in section.defaults: + section.defaults.append(entry) + return ret_true, ret_false + + # + out = {} + ret_true = True + ret_false = True + + unvalidated = [k for k in section.scalars if k not in configspec] + incorrect_sections = [k for k in configspec.sections if k in section.scalars] + incorrect_scalars = [k for k in configspec.scalars if k in section.sections] + + for entry in configspec.scalars: + if entry in ('__many__', '___many___'): + # reserved names + continue + if (not entry in section.scalars) or (entry in section.defaults): + # missing entries + # or entries from defaults + missing = True + val = None + if copy and entry not in section.scalars: + # copy comments + section.comments[entry] = ( + configspec.comments.get(entry, [])) + section.inline_comments[entry] = ( + configspec.inline_comments.get(entry, '')) + # + else: + missing = False + val = section[entry] + + ret_true, ret_false = validate_entry(entry, configspec[entry], val, + missing, ret_true, ret_false) + + many = None + if '__many__' in configspec.scalars: + many = configspec['__many__'] + elif '___many___' in configspec.scalars: + many = configspec['___many___'] + + if many is not None: + for entry in unvalidated: + val = section[entry] + ret_true, ret_false = validate_entry(entry, many, val, False, + ret_true, ret_false) + unvalidated = [] + + for entry in incorrect_scalars: + ret_true = False + if not preserve_errors: + out[entry] = False + else: + ret_false = False + msg = 'Value %r was provided as a section' % entry + out[entry] = validator.baseErrorClass(msg) + for entry in incorrect_sections: + ret_true = False + if not preserve_errors: + out[entry] = False + else: + ret_false = False + msg = 'Section %r was provided as a single value' % entry + out[entry] = validator.baseErrorClass(msg) + + # Missing sections will have been created as empty ones when the + # configspec was read. + for entry in section.sections: + # FIXME: this means DEFAULT is not copied in copy mode + if section is self and entry == 'DEFAULT': + continue + if section[entry].configspec is None: + unvalidated.append(entry) + continue + if copy: + section.comments[entry] = configspec.comments.get(entry, []) + section.inline_comments[entry] = configspec.inline_comments.get(entry, '') + check = self.validate(validator, preserve_errors=preserve_errors, copy=copy, section=section[entry]) + out[entry] = check + if check == False: + ret_true = False + elif check == True: + ret_false = False + else: + ret_true = False + + section.extra_values = unvalidated + if preserve_errors and not section._created: + # If the section wasn't created (i.e. it wasn't missing) + # then we can't return False, we need to preserve errors + ret_false = False + # + if ret_false and preserve_errors and out: + # If we are preserving errors, but all + # the failures are from missing sections / values + # then we can return False. Otherwise there is a + # real failure that we need to preserve. + ret_false = not any(out.values()) + if ret_true: + return True + elif ret_false: + return False + return out + + + def reset(self): + """Clear ConfigObj instance and restore to 'freshly created' state.""" + self.clear() + self._initialise() + # FIXME: Should be done by '_initialise', but ConfigObj constructor (and reload) + # requires an empty dictionary + self.configspec = None + # Just to be sure ;-) + self._original_configspec = None + + + def reload(self): + """ + Reload a ConfigObj from file. + + This method raises a ``ReloadError`` if the ConfigObj doesn't have + a filename attribute pointing to a file. + """ + if not isinstance(self.filename, basestring): + raise ReloadError() + + filename = self.filename + current_options = {} + for entry in OPTION_DEFAULTS: + if entry == 'configspec': + continue + current_options[entry] = getattr(self, entry) + + configspec = self._original_configspec + current_options['configspec'] = configspec + + self.clear() + self._initialise(current_options) + self._load(filename, configspec) + + + +class SimpleVal(object): + """ + A simple validator. + Can be used to check that all members expected are present. + + To use it, provide a configspec with all your members in (the value given + will be ignored). Pass an instance of ``SimpleVal`` to the ``validate`` + method of your ``ConfigObj``. ``validate`` will return ``True`` if all + members are present, or a dictionary with True/False meaning + present/missing. (Whole missing sections will be replaced with ``False``) + """ + + def __init__(self): + self.baseErrorClass = ConfigObjError + + def check(self, check, member, missing=False): + """A dummy check method, always returns the value unchanged.""" + if missing: + raise self.baseErrorClass() + return member + + +def flatten_errors(cfg, res, levels=None, results=None): + """ + An example function that will turn a nested dictionary of results + (as returned by ``ConfigObj.validate``) into a flat list. + + ``cfg`` is the ConfigObj instance being checked, ``res`` is the results + dictionary returned by ``validate``. + + (This is a recursive function, so you shouldn't use the ``levels`` or + ``results`` arguments - they are used by the function.) + + Returns a list of keys that failed. Each member of the list is a tuple:: + + ([list of sections...], key, result) + + If ``validate`` was called with ``preserve_errors=False`` (the default) + then ``result`` will always be ``False``. + + *list of sections* is a flattened list of sections that the key was found + in. + + If the section was missing (or a section was expected and a scalar provided + - or vice-versa) then key will be ``None``. + + If the value (or section) was missing then ``result`` will be ``False``. + + If ``validate`` was called with ``preserve_errors=True`` and a value + was present, but failed the check, then ``result`` will be the exception + object returned. You can use this as a string that describes the failure. + + For example *The value "3" is of the wrong type*. + """ + if levels is None: + # first time called + levels = [] + results = [] + if res == True: + return results + if res == False or isinstance(res, Exception): + results.append((levels[:], None, res)) + if levels: + levels.pop() + return results + for (key, val) in res.items(): + if val == True: + continue + if isinstance(cfg.get(key), dict): + # Go down one level + levels.append(key) + flatten_errors(cfg[key], val, levels, results) + continue + results.append((levels[:], key, val)) + # + # Go up one level + if levels: + levels.pop() + # + return results + + +def get_extra_values(conf, _prepend=()): + """ + Find all the values and sections not in the configspec from a validated + ConfigObj. + + ``get_extra_values`` returns a list of tuples where each tuple represents + either an extra section, or an extra value. + + The tuples contain two values, a tuple representing the section the value + is in and the name of the extra values. For extra values in the top level + section the first member will be an empty tuple. For values in the 'foo' + section the first member will be ``('foo',)``. For members in the 'bar' + subsection of the 'foo' section the first member will be ``('foo', 'bar')``. + + NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't + been validated it will return an empty list. + """ + out = [] + + out.extend([(_prepend, name) for name in conf.extra_values]) + for name in conf.sections: + if name not in conf.extra_values: + out.extend(get_extra_values(conf[name], _prepend + (name,))) + return out + + +"""*A programming language is a medium of expression.* - Paul Graham""" diff --git a/lib/feedparser.py b/lib/feedparser.py new file mode 100644 index 00000000..b9144a9e --- /dev/null +++ b/lib/feedparser.py @@ -0,0 +1,3909 @@ +#!/usr/bin/env python +"""Universal feed parser + +Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds + +Visit http://feedparser.org/ for the latest version +Visit http://feedparser.org/docs/ for the latest documentation + +Required: Python 2.4 or later +Recommended: CJKCodecs and iconv_codec +""" + +__version__ = "5.0.1" +__license__ = """Copyright (c) 2002-2008, Mark Pilgrim, All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS' +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE.""" +__author__ = "Mark Pilgrim " +__contributors__ = ["Jason Diamond ", + "John Beimler ", + "Fazal Majid ", + "Aaron Swartz ", + "Kevin Marks ", + "Sam Ruby ", + "Ade Oshineye ", + "Martin Pool ", + "Kurt McKee "] +_debug = 0 + +# HTTP "User-Agent" header to send to servers when downloading feeds. +# If you are embedding feedparser in a larger application, you should +# change this to your application name and URL. +USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % __version__ + +# HTTP "Accept" header to send to servers when downloading feeds. If you don't +# want to send an Accept header, set this to None. +ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1" + +# List of preferred XML parsers, by SAX driver name. These will be tried first, +# but if they're not installed, Python will keep searching through its own list +# of pre-installed parsers until it finds one that supports everything we need. +PREFERRED_XML_PARSERS = ["drv_libxml2"] + +# If you want feedparser to automatically run HTML markup through HTML Tidy, set +# this to 1. Requires mxTidy +# or utidylib . +TIDY_MARKUP = 0 + +# List of Python interfaces for HTML Tidy, in order of preference. Only useful +# if TIDY_MARKUP = 1 +PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] + +# If you want feedparser to automatically resolve all relative URIs, set this +# to 1. +RESOLVE_RELATIVE_URIS = 1 + +# If you want feedparser to automatically sanitize all potentially unsafe +# HTML content, set this to 1. +SANITIZE_HTML = 1 + +# ---------- Python 3 modules (make it work if possible) ---------- +try: + import rfc822 +except ImportError: + from email import _parseaddr as rfc822 + +try: + # Python 3.1 introduces bytes.maketrans and simultaneously + # deprecates string.maketrans; use bytes.maketrans if possible + _maketrans = bytes.maketrans +except (NameError, AttributeError): + import string + _maketrans = string.maketrans + +# base64 support for Atom feeds that contain embedded binary data +try: + import base64, binascii + # Python 3.1 deprecates decodestring in favor of decodebytes + _base64decode = getattr(base64, 'decodebytes', base64.decodestring) +except: + base64 = binascii = None + +def _s2bytes(s): + # Convert a UTF-8 str to bytes if the interpreter is Python 3 + try: + return bytes(s, 'utf8') + except (NameError, TypeError): + # In Python 2.5 and below, bytes doesn't exist (NameError) + # In Python 2.6 and above, bytes and str are the same (TypeError) + return s + +def _l2bytes(l): + # Convert a list of ints to bytes if the interpreter is Python 3 + try: + if bytes is not str: + # In Python 2.6 and above, this call won't raise an exception + # but it will return bytes([65]) as '[65]' instead of 'A' + return bytes(l) + raise NameError + except NameError: + return ''.join(map(chr, l)) + +# If you want feedparser to allow all URL schemes, set this to () +# List culled from Python's urlparse documentation at: +# http://docs.python.org/library/urlparse.html +# as well as from "URI scheme" at Wikipedia: +# https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme +# Many more will likely need to be added! +ACCEPTABLE_URI_SCHEMES = ( + 'file', 'ftp', 'gopher', 'h323', 'hdl', 'http', 'https', 'imap', 'mailto', + 'mms', 'news', 'nntp', 'prospero', 'rsync', 'rtsp', 'rtspu', 'sftp', + 'shttp', 'sip', 'sips', 'snews', 'svn', 'svn+ssh', 'telnet', 'wais', + # Additional common-but-unofficial schemes + 'aim', 'callto', 'cvs', 'facetime', 'feed', 'git', 'gtalk', 'irc', 'ircs', + 'irc6', 'itms', 'mms', 'msnim', 'skype', 'ssh', 'smb', 'svn', 'ymsg', +) +#ACCEPTABLE_URI_SCHEMES = () + +# ---------- required modules (should come with any Python distribution) ---------- +import sgmllib, re, sys, copy, urlparse, time, types, cgi, urllib, urllib2, datetime +try: + from io import BytesIO as _StringIO +except ImportError: + try: + from cStringIO import StringIO as _StringIO + except: + from StringIO import StringIO as _StringIO + +# ---------- optional modules (feedparser will work without these, but with reduced functionality) ---------- + +# gzip is included with most Python distributions, but may not be available if you compiled your own +try: + import gzip +except: + gzip = None +try: + import zlib +except: + zlib = None + +# If a real XML parser is available, feedparser will attempt to use it. feedparser has +# been tested with the built-in SAX parser, PyXML, and libxml2. On platforms where the +# Python distribution does not come with an XML parser (such as Mac OS X 10.2 and some +# versions of FreeBSD), feedparser will quietly fall back on regex-based parsing. +try: + import xml.sax + xml.sax.make_parser(PREFERRED_XML_PARSERS) # test for valid parsers + from xml.sax.saxutils import escape as _xmlescape + _XML_AVAILABLE = 1 +except: + _XML_AVAILABLE = 0 + def _xmlescape(data,entities={}): + data = data.replace('&', '&') + data = data.replace('>', '>') + data = data.replace('<', '<') + for char, entity in entities: + data = data.replace(char, entity) + return data + +# cjkcodecs and iconv_codec provide support for more character encodings. +# Both are available from http://cjkpython.i18n.org/ +try: + import cjkcodecs.aliases +except: + pass +try: + import iconv_codec +except: + pass + +# chardet library auto-detects character encodings +# Download from http://chardet.feedparser.org/ +try: + import chardet + if _debug: + import chardet.constants + chardet.constants._debug = 1 +except: + chardet = None + +# reversable htmlentitydefs mappings for Python 2.2 +try: + from htmlentitydefs import name2codepoint, codepoint2name +except: + import htmlentitydefs + name2codepoint={} + codepoint2name={} + for (name,codepoint) in htmlentitydefs.entitydefs.iteritems(): + if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1])) + name2codepoint[name]=ord(codepoint) + codepoint2name[ord(codepoint)]=name + +# BeautifulSoup parser used for parsing microformats from embedded HTML content +# http://www.crummy.com/software/BeautifulSoup/ +# feedparser is tested with BeautifulSoup 3.0.x, but it might work with the +# older 2.x series. If it doesn't, and you can figure out why, I'll accept a +# patch and modify the compatibility statement accordingly. +try: + import BeautifulSoup +except: + BeautifulSoup = None + +# ---------- don't touch these ---------- +class ThingsNobodyCaresAboutButMe(Exception): pass +class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe): pass +class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe): pass +class NonXMLContentType(ThingsNobodyCaresAboutButMe): pass +class UndeclaredNamespace(Exception): pass + +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +sgmllib.special = re.compile(']|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''') + def search(self,string,index=0): + match = self.endbracket.match(string,index) + if match is not None: + # Returning a new object in the calling thread's context + # resolves a thread-safety. + return EndBracketMatch(match) + return None + class EndBracketMatch: + def __init__(self, match): + self.match = match + def start(self, n): + return self.match.end(n) + sgmllib.endbracket = EndBracketRegEx() + +SUPPORTED_VERSIONS = {'': 'unknown', + 'rss090': 'RSS 0.90', + 'rss091n': 'RSS 0.91 (Netscape)', + 'rss091u': 'RSS 0.91 (Userland)', + 'rss092': 'RSS 0.92', + 'rss093': 'RSS 0.93', + 'rss094': 'RSS 0.94', + 'rss20': 'RSS 2.0', + 'rss10': 'RSS 1.0', + 'rss': 'RSS (unknown version)', + 'atom01': 'Atom 0.1', + 'atom02': 'Atom 0.2', + 'atom03': 'Atom 0.3', + 'atom10': 'Atom 1.0', + 'atom': 'Atom (unknown version)', + 'cdf': 'CDF', + 'hotrss': 'Hot RSS' + } + +try: + UserDict = dict +except NameError: + # Python 2.1 does not have dict + from UserDict import UserDict + def dict(aList): + rc = {} + for k, v in aList: + rc[k] = v + return rc + +class FeedParserDict(UserDict): + keymap = {'channel': 'feed', + 'items': 'entries', + 'guid': 'id', + 'date': 'updated', + 'date_parsed': 'updated_parsed', + 'description': ['summary', 'subtitle'], + 'url': ['href'], + 'modified': 'updated', + 'modified_parsed': 'updated_parsed', + 'issued': 'published', + 'issued_parsed': 'published_parsed', + 'copyright': 'rights', + 'copyright_detail': 'rights_detail', + 'tagline': 'subtitle', + 'tagline_detail': 'subtitle_detail'} + def __getitem__(self, key): + if key == 'category': + return UserDict.__getitem__(self, 'tags')[0]['term'] + if key == 'enclosures': + norel = lambda link: FeedParserDict([(name,value) for (name,value) in link.items() if name!='rel']) + return [norel(link) for link in UserDict.__getitem__(self, 'links') if link['rel']=='enclosure'] + if key == 'license': + for link in UserDict.__getitem__(self, 'links'): + if link['rel']=='license' and link.has_key('href'): + return link['href'] + if key == 'categories': + return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')] + realkey = self.keymap.get(key, key) + if type(realkey) == types.ListType: + for k in realkey: + if UserDict.__contains__(self, k): + return UserDict.__getitem__(self, k) + if UserDict.__contains__(self, key): + return UserDict.__getitem__(self, key) + return UserDict.__getitem__(self, realkey) + + def __setitem__(self, key, value): + for k in self.keymap.keys(): + if key == k: + key = self.keymap[k] + if type(key) == types.ListType: + key = key[0] + return UserDict.__setitem__(self, key, value) + + def get(self, key, default=None): + if self.has_key(key): + return self[key] + else: + return default + + def setdefault(self, key, value): + if not self.has_key(key): + self[key] = value + return self[key] + + def has_key(self, key): + try: + return hasattr(self, key) or UserDict.__contains__(self, key) + except AttributeError: + return False + # This alias prevents the 2to3 tool from changing the semantics of the + # __contains__ function below and exhausting the maximum recursion depth + __has_key = has_key + + def __getattr__(self, key): + try: + return self.__dict__[key] + except KeyError: + pass + try: + assert not key.startswith('_') + return self.__getitem__(key) + except: + raise AttributeError, "object has no attribute '%s'" % key + + def __setattr__(self, key, value): + if key.startswith('_') or key == 'data': + self.__dict__[key] = value + else: + return self.__setitem__(key, value) + + def __contains__(self, key): + return self.__has_key(key) + +def zopeCompatibilityHack(): + global FeedParserDict + del FeedParserDict + def FeedParserDict(aDict=None): + rc = {} + if aDict: + rc.update(aDict) + return rc + +_ebcdic_to_ascii_map = None +def _ebcdic_to_ascii(s): + global _ebcdic_to_ascii_map + if not _ebcdic_to_ascii_map: + emap = ( + 0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, + 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, + 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, + 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, + 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, + 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, + 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, + 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, + 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,201, + 202,106,107,108,109,110,111,112,113,114,203,204,205,206,207,208, + 209,126,115,116,117,118,119,120,121,122,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231, + 123,65,66,67,68,69,70,71,72,73,232,233,234,235,236,237, + 125,74,75,76,77,78,79,80,81,82,238,239,240,241,242,243, + 92,159,83,84,85,86,87,88,89,90,244,245,246,247,248,249, + 48,49,50,51,52,53,54,55,56,57,250,251,252,253,254,255 + ) + _ebcdic_to_ascii_map = _maketrans( \ + _l2bytes(range(256)), _l2bytes(emap)) + return s.translate(_ebcdic_to_ascii_map) + +_cp1252 = { + unichr(128): unichr(8364), # euro sign + unichr(130): unichr(8218), # single low-9 quotation mark + unichr(131): unichr( 402), # latin small letter f with hook + unichr(132): unichr(8222), # double low-9 quotation mark + unichr(133): unichr(8230), # horizontal ellipsis + unichr(134): unichr(8224), # dagger + unichr(135): unichr(8225), # double dagger + unichr(136): unichr( 710), # modifier letter circumflex accent + unichr(137): unichr(8240), # per mille sign + unichr(138): unichr( 352), # latin capital letter s with caron + unichr(139): unichr(8249), # single left-pointing angle quotation mark + unichr(140): unichr( 338), # latin capital ligature oe + unichr(142): unichr( 381), # latin capital letter z with caron + unichr(145): unichr(8216), # left single quotation mark + unichr(146): unichr(8217), # right single quotation mark + unichr(147): unichr(8220), # left double quotation mark + unichr(148): unichr(8221), # right double quotation mark + unichr(149): unichr(8226), # bullet + unichr(150): unichr(8211), # en dash + unichr(151): unichr(8212), # em dash + unichr(152): unichr( 732), # small tilde + unichr(153): unichr(8482), # trade mark sign + unichr(154): unichr( 353), # latin small letter s with caron + unichr(155): unichr(8250), # single right-pointing angle quotation mark + unichr(156): unichr( 339), # latin small ligature oe + unichr(158): unichr( 382), # latin small letter z with caron + unichr(159): unichr( 376)} # latin capital letter y with diaeresis + +_urifixer = re.compile('^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)') +def _urljoin(base, uri): + uri = _urifixer.sub(r'\1\3', uri) + try: + return urlparse.urljoin(base, uri) + except: + uri = urlparse.urlunparse([urllib.quote(part) for part in urlparse.urlparse(uri)]) + return urlparse.urljoin(base, uri) + +class _FeedParserMixin: + namespaces = {'': '', + 'http://backend.userland.com/rss': '', + 'http://blogs.law.harvard.edu/tech/rss': '', + 'http://purl.org/rss/1.0/': '', + 'http://my.netscape.com/rdf/simple/0.9/': '', + 'http://example.com/newformat#': '', + 'http://example.com/necho': '', + 'http://purl.org/echo/': '', + 'uri/of/echo/namespace#': '', + 'http://purl.org/pie/': '', + 'http://purl.org/atom/ns#': '', + 'http://www.w3.org/2005/Atom': '', + 'http://purl.org/rss/1.0/modules/rss091#': '', + + 'http://webns.net/mvcb/': 'admin', + 'http://purl.org/rss/1.0/modules/aggregation/': 'ag', + 'http://purl.org/rss/1.0/modules/annotate/': 'annotate', + 'http://media.tangent.org/rss/1.0/': 'audio', + 'http://backend.userland.com/blogChannelModule': 'blogChannel', + 'http://web.resource.org/cc/': 'cc', + 'http://backend.userland.com/creativeCommonsRssModule': 'creativeCommons', + 'http://purl.org/rss/1.0/modules/company': 'co', + 'http://purl.org/rss/1.0/modules/content/': 'content', + 'http://my.theinfo.org/changed/1.0/rss/': 'cp', + 'http://purl.org/dc/elements/1.1/': 'dc', + 'http://purl.org/dc/terms/': 'dcterms', + 'http://purl.org/rss/1.0/modules/email/': 'email', + 'http://purl.org/rss/1.0/modules/event/': 'ev', + 'http://rssnamespace.org/feedburner/ext/1.0': 'feedburner', + 'http://freshmeat.net/rss/fm/': 'fm', + 'http://xmlns.com/foaf/0.1/': 'foaf', + 'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo', + 'http://postneo.com/icbm/': 'icbm', + 'http://purl.org/rss/1.0/modules/image/': 'image', + 'http://www.itunes.com/DTDs/PodCast-1.0.dtd': 'itunes', + 'http://example.com/DTDs/PodCast-1.0.dtd': 'itunes', + 'http://purl.org/rss/1.0/modules/link/': 'l', + 'http://search.yahoo.com/mrss': 'media', + #Version 1.1.2 of the Media RSS spec added the trailing slash on the namespace + 'http://search.yahoo.com/mrss/': 'media', + 'http://madskills.com/public/xml/rss/module/pingback/': 'pingback', + 'http://prismstandard.org/namespaces/1.2/basic/': 'prism', + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#': 'rdf', + 'http://www.w3.org/2000/01/rdf-schema#': 'rdfs', + 'http://purl.org/rss/1.0/modules/reference/': 'ref', + 'http://purl.org/rss/1.0/modules/richequiv/': 'reqv', + 'http://purl.org/rss/1.0/modules/search/': 'search', + 'http://purl.org/rss/1.0/modules/slash/': 'slash', + 'http://schemas.xmlsoap.org/soap/envelope/': 'soap', + 'http://purl.org/rss/1.0/modules/servicestatus/': 'ss', + 'http://hacks.benhammersley.com/rss/streaming/': 'str', + 'http://purl.org/rss/1.0/modules/subscription/': 'sub', + 'http://purl.org/rss/1.0/modules/syndication/': 'sy', + 'http://schemas.pocketsoap.com/rss/myDescModule/': 'szf', + 'http://purl.org/rss/1.0/modules/taxonomy/': 'taxo', + 'http://purl.org/rss/1.0/modules/threading/': 'thr', + 'http://purl.org/rss/1.0/modules/textinput/': 'ti', + 'http://madskills.com/public/xml/rss/module/trackback/':'trackback', + 'http://wellformedweb.org/commentAPI/': 'wfw', + 'http://purl.org/rss/1.0/modules/wiki/': 'wiki', + 'http://www.w3.org/1999/xhtml': 'xhtml', + 'http://www.w3.org/1999/xlink': 'xlink', + 'http://www.w3.org/XML/1998/namespace': 'xml' +} + _matchnamespaces = {} + + can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'icon', 'logo'] + can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description'] + can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description'] + html_types = ['text/html', 'application/xhtml+xml'] + + def __init__(self, baseuri=None, baselang=None, encoding='utf-8'): + if _debug: sys.stderr.write('initializing FeedParser\n') + if not self._matchnamespaces: + for k, v in self.namespaces.items(): + self._matchnamespaces[k.lower()] = v + self.feeddata = FeedParserDict() # feed-level data + self.encoding = encoding # character encoding + self.entries = [] # list of entry-level data + self.version = '' # feed type/version, see SUPPORTED_VERSIONS + self.namespacesInUse = {} # dictionary of namespaces defined by the feed + + # the following are used internally to track state; + # this is really out of control and should be refactored + self.infeed = 0 + self.inentry = 0 + self.incontent = 0 + self.intextinput = 0 + self.inimage = 0 + self.inauthor = 0 + self.incontributor = 0 + self.inpublisher = 0 + self.insource = 0 + self.sourcedata = FeedParserDict() + self.contentparams = FeedParserDict() + self._summaryKey = None + self.namespacemap = {} + self.elementstack = [] + self.basestack = [] + self.langstack = [] + self.baseuri = baseuri or '' + self.lang = baselang or None + self.svgOK = 0 + self.hasTitle = 0 + if baselang: + self.feeddata['language'] = baselang.replace('_','-') + + def unknown_starttag(self, tag, attrs): + if _debug: sys.stderr.write('start %s with %s\n' % (tag, attrs)) + # normalize attrs + attrs = [(k.lower(), v) for k, v in attrs] + attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] + # the sgml parser doesn't handle entities in attributes, but + # strict xml parsers do -- account for this difference + if isinstance(self, _LooseFeedParser): + attrs = [(k, v.replace('&', '&')) for k, v in attrs] + + # track xml:base and xml:lang + attrsD = dict(attrs) + baseuri = attrsD.get('xml:base', attrsD.get('base')) or self.baseuri + if type(baseuri) != type(u''): + try: + baseuri = unicode(baseuri, self.encoding) + except: + baseuri = unicode(baseuri, 'iso-8859-1') + # ensure that self.baseuri is always an absolute URI that + # uses a whitelisted URI scheme (e.g. not `javscript:`) + if self.baseuri: + self.baseuri = _makeSafeAbsoluteURI(self.baseuri, baseuri) or self.baseuri + else: + self.baseuri = _urljoin(self.baseuri, baseuri) + lang = attrsD.get('xml:lang', attrsD.get('lang')) + if lang == '': + # xml:lang could be explicitly set to '', we need to capture that + lang = None + elif lang is None: + # if no xml:lang is specified, use parent lang + lang = self.lang + if lang: + if tag in ('feed', 'rss', 'rdf:RDF'): + self.feeddata['language'] = lang.replace('_','-') + self.lang = lang + self.basestack.append(self.baseuri) + self.langstack.append(lang) + + # track namespaces + for prefix, uri in attrs: + if prefix.startswith('xmlns:'): + self.trackNamespace(prefix[6:], uri) + elif prefix == 'xmlns': + self.trackNamespace(None, uri) + + # track inline content + if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): + if tag in ['xhtml:div', 'div']: return # typepad does this 10/2007 + # element declared itself as escaped markup, but it isn't really + self.contentparams['type'] = 'application/xhtml+xml' + if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': + if tag.find(':') <> -1: + prefix, tag = tag.split(':', 1) + namespace = self.namespacesInUse.get(prefix, '') + if tag=='math' and namespace=='http://www.w3.org/1998/Math/MathML': + attrs.append(('xmlns',namespace)) + if tag=='svg' and namespace=='http://www.w3.org/2000/svg': + attrs.append(('xmlns',namespace)) + if tag == 'svg': self.svgOK += 1 + return self.handle_data('<%s%s>' % (tag, self.strattrs(attrs)), escape=0) + + # match namespaces + if tag.find(':') <> -1: + prefix, suffix = tag.split(':', 1) + else: + prefix, suffix = '', tag + prefix = self.namespacemap.get(prefix, prefix) + if prefix: + prefix = prefix + '_' + + # special hack for better tracking of empty textinput/image elements in illformed feeds + if (not prefix) and tag not in ('title', 'link', 'description', 'name'): + self.intextinput = 0 + if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'): + self.inimage = 0 + + # call special handler (if defined) or default handler + methodname = '_start_' + prefix + suffix + try: + method = getattr(self, methodname) + return method(attrsD) + except AttributeError: + # Since there's no handler or something has gone wrong we explicitly add the element and its attributes + unknown_tag = prefix + suffix + if len(attrsD) == 0: + # No attributes so merge it into the encosing dictionary + return self.push(unknown_tag, 1) + else: + # Has attributes so create it in its own dictionary + context = self._getContext() + context[unknown_tag] = attrsD + + def unknown_endtag(self, tag): + if _debug: sys.stderr.write('end %s\n' % tag) + # match namespaces + if tag.find(':') <> -1: + prefix, suffix = tag.split(':', 1) + else: + prefix, suffix = '', tag + prefix = self.namespacemap.get(prefix, prefix) + if prefix: + prefix = prefix + '_' + if suffix == 'svg' and self.svgOK: self.svgOK -= 1 + + # call special handler (if defined) or default handler + methodname = '_end_' + prefix + suffix + try: + if self.svgOK: raise AttributeError() + method = getattr(self, methodname) + method() + except AttributeError: + self.pop(prefix + suffix) + + # track inline content + if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): + # element declared itself as escaped markup, but it isn't really + if tag in ['xhtml:div', 'div']: return # typepad does this 10/2007 + self.contentparams['type'] = 'application/xhtml+xml' + if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml': + tag = tag.split(':')[-1] + self.handle_data('' % tag, escape=0) + + # track xml:base and xml:lang going out of scope + if self.basestack: + self.basestack.pop() + if self.basestack and self.basestack[-1]: + self.baseuri = self.basestack[-1] + if self.langstack: + self.langstack.pop() + if self.langstack: # and (self.langstack[-1] is not None): + self.lang = self.langstack[-1] + + def handle_charref(self, ref): + # called for each character reference, e.g. for ' ', ref will be '160' + if not self.elementstack: return + ref = ref.lower() + if ref in ('34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e'): + text = '&#%s;' % ref + else: + if ref[0] == 'x': + c = int(ref[1:], 16) + else: + c = int(ref) + text = unichr(c).encode('utf-8') + self.elementstack[-1][2].append(text) + + def handle_entityref(self, ref): + # called for each entity reference, e.g. for '©', ref will be 'copy' + if not self.elementstack: return + if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref) + if ref in ('lt', 'gt', 'quot', 'amp', 'apos'): + text = '&%s;' % ref + elif ref in self.entities.keys(): + text = self.entities[ref] + if text.startswith('&#') and text.endswith(';'): + return self.handle_entityref(text) + else: + try: name2codepoint[ref] + except KeyError: text = '&%s;' % ref + else: text = unichr(name2codepoint[ref]).encode('utf-8') + self.elementstack[-1][2].append(text) + + def handle_data(self, text, escape=1): + # called for each block of plain text, i.e. outside of any tag and + # not containing any character or entity references + if not self.elementstack: return + if escape and self.contentparams.get('type') == 'application/xhtml+xml': + text = _xmlescape(text) + self.elementstack[-1][2].append(text) + + def handle_comment(self, text): + # called for each comment, e.g. + pass + + def handle_pi(self, text): + # called for each processing instruction, e.g. + pass + + def handle_decl(self, text): + pass + + def parse_declaration(self, i): + # override internal declaration handler to handle CDATA blocks + if _debug: sys.stderr.write('entering parse_declaration\n') + if self.rawdata[i:i+9] == '', i) + if k == -1: + # CDATA block began but didn't finish + k = len(self.rawdata) + return k + self.handle_data(_xmlescape(self.rawdata[i+9:k]), 0) + return k+3 + else: + k = self.rawdata.find('>', i) + if k >= 0: + return k+1 + else: + # We have an incomplete CDATA block. + return k + + def mapContentType(self, contentType): + contentType = contentType.lower() + if contentType == 'text' or contentType == 'plain': + contentType = 'text/plain' + elif contentType == 'html': + contentType = 'text/html' + elif contentType == 'xhtml': + contentType = 'application/xhtml+xml' + return contentType + + def trackNamespace(self, prefix, uri): + loweruri = uri.lower() + if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version: + self.version = 'rss090' + if loweruri == 'http://purl.org/rss/1.0/' and not self.version: + self.version = 'rss10' + if loweruri == 'http://www.w3.org/2005/atom' and not self.version: + self.version = 'atom10' + if loweruri.find('backend.userland.com/rss') <> -1: + # match any backend.userland.com namespace + uri = 'http://backend.userland.com/rss' + loweruri = uri + if self._matchnamespaces.has_key(loweruri): + self.namespacemap[prefix] = self._matchnamespaces[loweruri] + self.namespacesInUse[self._matchnamespaces[loweruri]] = uri + else: + self.namespacesInUse[prefix or ''] = uri + + def resolveURI(self, uri): + return _urljoin(self.baseuri or '', uri) + + def decodeEntities(self, element, data): + return data + + def strattrs(self, attrs): + return ''.join([' %s="%s"' % (t[0],_xmlescape(t[1],{'"':'"'})) for t in attrs]) + + def push(self, element, expectingText): + self.elementstack.append([element, expectingText, []]) + + def pop(self, element, stripWhitespace=1): + if not self.elementstack: return + if self.elementstack[-1][0] != element: return + + element, expectingText, pieces = self.elementstack.pop() + + if self.version == 'atom10' and self.contentparams.get('type','text') == 'application/xhtml+xml': + # remove enclosing child element, but only if it is a
and + # only if all the remaining content is nested underneath it. + # This means that the divs would be retained in the following: + #
foo
bar
+ while pieces and len(pieces)>1 and not pieces[-1].strip(): + del pieces[-1] + while pieces and len(pieces)>1 and not pieces[0].strip(): + del pieces[0] + if pieces and (pieces[0] == '
' or pieces[0].startswith('
': + depth = 0 + for piece in pieces[:-1]: + if piece.startswith(''): + depth += 1 + else: + pieces = pieces[1:-1] + + # Ensure each piece is a str for Python 3 + for (i, v) in enumerate(pieces): + if not isinstance(v, basestring): + pieces[i] = v.decode('utf-8') + + output = ''.join(pieces) + if stripWhitespace: + output = output.strip() + if not expectingText: return output + + # decode base64 content + if base64 and self.contentparams.get('base64', 0): + try: + output = _base64decode(output) + except binascii.Error: + pass + except binascii.Incomplete: + pass + except TypeError: + # In Python 3, base64 takes and outputs bytes, not str + # This may not be the most correct way to accomplish this + output = _base64decode(output.encode('utf-8')).decode('utf-8') + + # resolve relative URIs + if (element in self.can_be_relative_uri) and output: + output = self.resolveURI(output) + + # decode entities within embedded markup + if not self.contentparams.get('base64', 0): + output = self.decodeEntities(element, output) + + if self.lookslikehtml(output): + self.contentparams['type']='text/html' + + # remove temporary cruft from contentparams + try: + del self.contentparams['mode'] + except KeyError: + pass + try: + del self.contentparams['base64'] + except KeyError: + pass + + is_htmlish = self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types + # resolve relative URIs within embedded markup + if is_htmlish and RESOLVE_RELATIVE_URIS: + if element in self.can_contain_relative_uris: + output = _resolveRelativeURIs(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html')) + + # parse microformats + # (must do this before sanitizing because some microformats + # rely on elements that we sanitize) + if is_htmlish and element in ['content', 'description', 'summary']: + mfresults = _parseMicroformats(output, self.baseuri, self.encoding) + if mfresults: + for tag in mfresults.get('tags', []): + self._addTag(tag['term'], tag['scheme'], tag['label']) + for enclosure in mfresults.get('enclosures', []): + self._start_enclosure(enclosure) + for xfn in mfresults.get('xfn', []): + self._addXFN(xfn['relationships'], xfn['href'], xfn['name']) + vcard = mfresults.get('vcard') + if vcard: + self._getContext()['vcard'] = vcard + + # sanitize embedded markup + if is_htmlish and SANITIZE_HTML: + if element in self.can_contain_dangerous_markup: + output = _sanitizeHTML(output, self.encoding, self.contentparams.get('type', 'text/html')) + + if self.encoding and type(output) != type(u''): + try: + output = unicode(output, self.encoding) + except: + pass + + # address common error where people take data that is already + # utf-8, presume that it is iso-8859-1, and re-encode it. + if self.encoding in ('utf-8', 'utf-8_INVALID_PYTHON_3') and type(output) == type(u''): + try: + output = unicode(output.encode('iso-8859-1'), 'utf-8') + except: + pass + + # map win-1252 extensions to the proper code points + if type(output) == type(u''): + output = u''.join([c in _cp1252.keys() and _cp1252[c] or c for c in output]) + + # categories/tags/keywords/whatever are handled in _end_category + if element == 'category': + return output + + if element == 'title' and self.hasTitle: + return output + + # store output in appropriate place(s) + if self.inentry and not self.insource: + if element == 'content': + self.entries[-1].setdefault(element, []) + contentparams = copy.deepcopy(self.contentparams) + contentparams['value'] = output + self.entries[-1][element].append(contentparams) + elif element == 'link': + if not self.inimage: + # query variables in urls in link elements are improperly + # converted from `?a=1&b=2` to `?a=1&b;=2` as if they're + # unhandled character references. fix this special case. + output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) + self.entries[-1][element] = output + if output: + self.entries[-1]['links'][-1]['href'] = output + else: + if element == 'description': + element = 'summary' + self.entries[-1][element] = output + if self.incontent: + contentparams = copy.deepcopy(self.contentparams) + contentparams['value'] = output + self.entries[-1][element + '_detail'] = contentparams + elif (self.infeed or self.insource):# and (not self.intextinput) and (not self.inimage): + context = self._getContext() + if element == 'description': + element = 'subtitle' + context[element] = output + if element == 'link': + # fix query variables; see above for the explanation + output = re.sub("&([A-Za-z0-9_]+);", "&\g<1>", output) + context[element] = output + context['links'][-1]['href'] = output + elif self.incontent: + contentparams = copy.deepcopy(self.contentparams) + contentparams['value'] = output + context[element + '_detail'] = contentparams + return output + + def pushContent(self, tag, attrsD, defaultContentType, expectingText): + self.incontent += 1 + if self.lang: self.lang=self.lang.replace('_','-') + self.contentparams = FeedParserDict({ + 'type': self.mapContentType(attrsD.get('type', defaultContentType)), + 'language': self.lang, + 'base': self.baseuri}) + self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams) + self.push(tag, expectingText) + + def popContent(self, tag): + value = self.pop(tag) + self.incontent -= 1 + self.contentparams.clear() + return value + + # a number of elements in a number of RSS variants are nominally plain + # text, but this is routinely ignored. This is an attempt to detect + # the most common cases. As false positives often result in silent + # data loss, this function errs on the conservative side. + def lookslikehtml(self, s): + if self.version.startswith('atom'): return + if self.contentparams.get('type','text/html') != 'text/plain': return + + # must have a close tag or a entity reference to qualify + if not (re.search(r'',s) or re.search("&#?\w+;",s)): return + + # all tags must be in a restricted subset of valid HTML tags + if filter(lambda t: t.lower() not in _HTMLSanitizer.acceptable_elements, + re.findall(r' -1: + prefix = name[:colonpos] + suffix = name[colonpos+1:] + prefix = self.namespacemap.get(prefix, prefix) + name = prefix + ':' + suffix + return name + + def _getAttribute(self, attrsD, name): + return attrsD.get(self._mapToStandardPrefix(name)) + + def _isBase64(self, attrsD, contentparams): + if attrsD.get('mode', '') == 'base64': + return 1 + if self.contentparams['type'].startswith('text/'): + return 0 + if self.contentparams['type'].endswith('+xml'): + return 0 + if self.contentparams['type'].endswith('/xml'): + return 0 + return 1 + + def _itsAnHrefDamnIt(self, attrsD): + href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None))) + if href: + try: + del attrsD['url'] + except KeyError: + pass + try: + del attrsD['uri'] + except KeyError: + pass + attrsD['href'] = href + return attrsD + + def _save(self, key, value, overwrite=False): + context = self._getContext() + if overwrite: + context[key] = value + else: + context.setdefault(key, value) + + def _start_rss(self, attrsD): + versionmap = {'0.91': 'rss091u', + '0.92': 'rss092', + '0.93': 'rss093', + '0.94': 'rss094'} + #If we're here then this is an RSS feed. + #If we don't have a version or have a version that starts with something + #other than RSS then there's been a mistake. Correct it. + if not self.version or not self.version.startswith('rss'): + attr_version = attrsD.get('version', '') + version = versionmap.get(attr_version) + if version: + self.version = version + elif attr_version.startswith('2.'): + self.version = 'rss20' + else: + self.version = 'rss' + + def _start_dlhottitles(self, attrsD): + self.version = 'hotrss' + + def _start_channel(self, attrsD): + self.infeed = 1 + self._cdf_common(attrsD) + _start_feedinfo = _start_channel + + def _cdf_common(self, attrsD): + if attrsD.has_key('lastmod'): + self._start_modified({}) + self.elementstack[-1][-1] = attrsD['lastmod'] + self._end_modified() + if attrsD.has_key('href'): + self._start_link({}) + self.elementstack[-1][-1] = attrsD['href'] + self._end_link() + + def _start_feed(self, attrsD): + self.infeed = 1 + versionmap = {'0.1': 'atom01', + '0.2': 'atom02', + '0.3': 'atom03'} + if not self.version: + attr_version = attrsD.get('version') + version = versionmap.get(attr_version) + if version: + self.version = version + else: + self.version = 'atom' + + def _end_channel(self): + self.infeed = 0 + _end_feed = _end_channel + + def _start_image(self, attrsD): + context = self._getContext() + if not self.inentry: + context.setdefault('image', FeedParserDict()) + self.inimage = 1 + self.hasTitle = 0 + self.push('image', 0) + + def _end_image(self): + self.pop('image') + self.inimage = 0 + + def _start_textinput(self, attrsD): + context = self._getContext() + context.setdefault('textinput', FeedParserDict()) + self.intextinput = 1 + self.hasTitle = 0 + self.push('textinput', 0) + _start_textInput = _start_textinput + + def _end_textinput(self): + self.pop('textinput') + self.intextinput = 0 + _end_textInput = _end_textinput + + def _start_author(self, attrsD): + self.inauthor = 1 + self.push('author', 1) + # Append a new FeedParserDict when expecting an author + context = self._getContext() + context.setdefault('authors', []) + context['authors'].append(FeedParserDict()) + _start_managingeditor = _start_author + _start_dc_author = _start_author + _start_dc_creator = _start_author + _start_itunes_author = _start_author + + def _end_author(self): + self.pop('author') + self.inauthor = 0 + self._sync_author_detail() + _end_managingeditor = _end_author + _end_dc_author = _end_author + _end_dc_creator = _end_author + _end_itunes_author = _end_author + + def _start_itunes_owner(self, attrsD): + self.inpublisher = 1 + self.push('publisher', 0) + + def _end_itunes_owner(self): + self.pop('publisher') + self.inpublisher = 0 + self._sync_author_detail('publisher') + + def _start_contributor(self, attrsD): + self.incontributor = 1 + context = self._getContext() + context.setdefault('contributors', []) + context['contributors'].append(FeedParserDict()) + self.push('contributor', 0) + + def _end_contributor(self): + self.pop('contributor') + self.incontributor = 0 + + def _start_dc_contributor(self, attrsD): + self.incontributor = 1 + context = self._getContext() + context.setdefault('contributors', []) + context['contributors'].append(FeedParserDict()) + self.push('name', 0) + + def _end_dc_contributor(self): + self._end_name() + self.incontributor = 0 + + def _start_name(self, attrsD): + self.push('name', 0) + _start_itunes_name = _start_name + + def _end_name(self): + value = self.pop('name') + if self.inpublisher: + self._save_author('name', value, 'publisher') + elif self.inauthor: + self._save_author('name', value) + elif self.incontributor: + self._save_contributor('name', value) + elif self.intextinput: + context = self._getContext() + context['name'] = value + _end_itunes_name = _end_name + + def _start_width(self, attrsD): + self.push('width', 0) + + def _end_width(self): + value = self.pop('width') + try: + value = int(value) + except: + value = 0 + if self.inimage: + context = self._getContext() + context['width'] = value + + def _start_height(self, attrsD): + self.push('height', 0) + + def _end_height(self): + value = self.pop('height') + try: + value = int(value) + except: + value = 0 + if self.inimage: + context = self._getContext() + context['height'] = value + + def _start_url(self, attrsD): + self.push('href', 1) + _start_homepage = _start_url + _start_uri = _start_url + + def _end_url(self): + value = self.pop('href') + if self.inauthor: + self._save_author('href', value) + elif self.incontributor: + self._save_contributor('href', value) + _end_homepage = _end_url + _end_uri = _end_url + + def _start_email(self, attrsD): + self.push('email', 0) + _start_itunes_email = _start_email + + def _end_email(self): + value = self.pop('email') + if self.inpublisher: + self._save_author('email', value, 'publisher') + elif self.inauthor: + self._save_author('email', value) + elif self.incontributor: + self._save_contributor('email', value) + _end_itunes_email = _end_email + + def _getContext(self): + if self.insource: + context = self.sourcedata + elif self.inimage and self.feeddata.has_key('image'): + context = self.feeddata['image'] + elif self.intextinput: + context = self.feeddata['textinput'] + elif self.inentry: + context = self.entries[-1] + else: + context = self.feeddata + return context + + def _save_author(self, key, value, prefix='author'): + context = self._getContext() + context.setdefault(prefix + '_detail', FeedParserDict()) + context[prefix + '_detail'][key] = value + self._sync_author_detail() + context.setdefault('authors', [FeedParserDict()]) + context['authors'][-1][key] = value + + def _save_contributor(self, key, value): + context = self._getContext() + context.setdefault('contributors', [FeedParserDict()]) + context['contributors'][-1][key] = value + + def _sync_author_detail(self, key='author'): + context = self._getContext() + detail = context.get('%s_detail' % key) + if detail: + name = detail.get('name') + email = detail.get('email') + if name and email: + context[key] = '%s (%s)' % (name, email) + elif name: + context[key] = name + elif email: + context[key] = email + else: + author, email = context.get(key), None + if not author: return + emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?''', author) + if emailmatch: + email = emailmatch.group(0) + # probably a better way to do the following, but it passes all the tests + author = author.replace(email, '') + author = author.replace('()', '') + author = author.replace('<>', '') + author = author.replace('<>', '') + author = author.strip() + if author and (author[0] == '('): + author = author[1:] + if author and (author[-1] == ')'): + author = author[:-1] + author = author.strip() + if author or email: + context.setdefault('%s_detail' % key, FeedParserDict()) + if author: + context['%s_detail' % key]['name'] = author + if email: + context['%s_detail' % key]['email'] = email + + def _start_subtitle(self, attrsD): + self.pushContent('subtitle', attrsD, 'text/plain', 1) + _start_tagline = _start_subtitle + _start_itunes_subtitle = _start_subtitle + + def _end_subtitle(self): + self.popContent('subtitle') + _end_tagline = _end_subtitle + _end_itunes_subtitle = _end_subtitle + + def _start_rights(self, attrsD): + self.pushContent('rights', attrsD, 'text/plain', 1) + _start_dc_rights = _start_rights + _start_copyright = _start_rights + + def _end_rights(self): + self.popContent('rights') + _end_dc_rights = _end_rights + _end_copyright = _end_rights + + def _start_item(self, attrsD): + self.entries.append(FeedParserDict()) + self.push('item', 0) + self.inentry = 1 + self.guidislink = 0 + self.hasTitle = 0 + id = self._getAttribute(attrsD, 'rdf:about') + if id: + context = self._getContext() + context['id'] = id + self._cdf_common(attrsD) + _start_entry = _start_item + _start_product = _start_item + + def _end_item(self): + self.pop('item') + self.inentry = 0 + _end_entry = _end_item + + def _start_dc_language(self, attrsD): + self.push('language', 1) + _start_language = _start_dc_language + + def _end_dc_language(self): + self.lang = self.pop('language') + _end_language = _end_dc_language + + def _start_dc_publisher(self, attrsD): + self.push('publisher', 1) + _start_webmaster = _start_dc_publisher + + def _end_dc_publisher(self): + self.pop('publisher') + self._sync_author_detail('publisher') + _end_webmaster = _end_dc_publisher + + def _start_published(self, attrsD): + self.push('published', 1) + _start_dcterms_issued = _start_published + _start_issued = _start_published + + def _end_published(self): + value = self.pop('published') + self._save('published_parsed', _parse_date(value), overwrite=True) + _end_dcterms_issued = _end_published + _end_issued = _end_published + + def _start_updated(self, attrsD): + self.push('updated', 1) + _start_modified = _start_updated + _start_dcterms_modified = _start_updated + _start_pubdate = _start_updated + _start_dc_date = _start_updated + _start_lastbuilddate = _start_updated + + def _end_updated(self): + value = self.pop('updated') + parsed_value = _parse_date(value) + self._save('updated_parsed', parsed_value, overwrite=True) + _end_modified = _end_updated + _end_dcterms_modified = _end_updated + _end_pubdate = _end_updated + _end_dc_date = _end_updated + _end_lastbuilddate = _end_updated + + def _start_created(self, attrsD): + self.push('created', 1) + _start_dcterms_created = _start_created + + def _end_created(self): + value = self.pop('created') + self._save('created_parsed', _parse_date(value), overwrite=True) + _end_dcterms_created = _end_created + + def _start_expirationdate(self, attrsD): + self.push('expired', 1) + + def _end_expirationdate(self): + self._save('expired_parsed', _parse_date(self.pop('expired')), overwrite=True) + + def _start_cc_license(self, attrsD): + context = self._getContext() + value = self._getAttribute(attrsD, 'rdf:resource') + attrsD = FeedParserDict() + attrsD['rel']='license' + if value: attrsD['href']=value + context.setdefault('links', []).append(attrsD) + + def _start_creativecommons_license(self, attrsD): + self.push('license', 1) + _start_creativeCommons_license = _start_creativecommons_license + + def _end_creativecommons_license(self): + value = self.pop('license') + context = self._getContext() + attrsD = FeedParserDict() + attrsD['rel']='license' + if value: attrsD['href']=value + context.setdefault('links', []).append(attrsD) + del context['license'] + _end_creativeCommons_license = _end_creativecommons_license + + def _addXFN(self, relationships, href, name): + context = self._getContext() + xfn = context.setdefault('xfn', []) + value = FeedParserDict({'relationships': relationships, 'href': href, 'name': name}) + if value not in xfn: + xfn.append(value) + + def _addTag(self, term, scheme, label): + context = self._getContext() + tags = context.setdefault('tags', []) + if (not term) and (not scheme) and (not label): return + value = FeedParserDict({'term': term, 'scheme': scheme, 'label': label}) + if value not in tags: + tags.append(value) + + def _start_category(self, attrsD): + if _debug: sys.stderr.write('entering _start_category with %s\n' % repr(attrsD)) + term = attrsD.get('term') + scheme = attrsD.get('scheme', attrsD.get('domain')) + label = attrsD.get('label') + self._addTag(term, scheme, label) + self.push('category', 1) + _start_dc_subject = _start_category + _start_keywords = _start_category + + def _start_media_category(self, attrsD): + attrsD.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema') + self._start_category(attrsD) + + def _end_itunes_keywords(self): + for term in self.pop('itunes_keywords').split(): + self._addTag(term, 'http://www.itunes.com/', None) + + def _start_itunes_category(self, attrsD): + self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None) + self.push('category', 1) + + def _end_category(self): + value = self.pop('category') + if not value: return + context = self._getContext() + tags = context['tags'] + if value and len(tags) and not tags[-1]['term']: + tags[-1]['term'] = value + else: + self._addTag(value, None, None) + _end_dc_subject = _end_category + _end_keywords = _end_category + _end_itunes_category = _end_category + _end_media_category = _end_category + + def _start_cloud(self, attrsD): + self._getContext()['cloud'] = FeedParserDict(attrsD) + + def _start_link(self, attrsD): + attrsD.setdefault('rel', 'alternate') + if attrsD['rel'] == 'self': + attrsD.setdefault('type', 'application/atom+xml') + else: + attrsD.setdefault('type', 'text/html') + context = self._getContext() + attrsD = self._itsAnHrefDamnIt(attrsD) + if attrsD.has_key('href'): + attrsD['href'] = self.resolveURI(attrsD['href']) + expectingText = self.infeed or self.inentry or self.insource + context.setdefault('links', []) + if not (self.inentry and self.inimage): + context['links'].append(FeedParserDict(attrsD)) + if attrsD.has_key('href'): + expectingText = 0 + if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types): + context['link'] = attrsD['href'] + else: + self.push('link', expectingText) + _start_producturl = _start_link + + def _end_link(self): + value = self.pop('link') + context = self._getContext() + _end_producturl = _end_link + + def _start_guid(self, attrsD): + self.guidislink = (attrsD.get('ispermalink', 'true') == 'true') + self.push('id', 1) + + def _end_guid(self): + value = self.pop('id') + self._save('guidislink', self.guidislink and not self._getContext().has_key('link')) + if self.guidislink: + # guid acts as link, but only if 'ispermalink' is not present or is 'true', + # and only if the item doesn't already have a link element + self._save('link', value) + + def _start_title(self, attrsD): + if self.svgOK: return self.unknown_starttag('title', attrsD.items()) + self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) + _start_dc_title = _start_title + _start_media_title = _start_title + + def _end_title(self): + if self.svgOK: return + value = self.popContent('title') + if not value: return + context = self._getContext() + self.hasTitle = 1 + _end_dc_title = _end_title + + def _end_media_title(self): + hasTitle = self.hasTitle + self._end_title() + self.hasTitle = hasTitle + + def _start_description(self, attrsD): + context = self._getContext() + if context.has_key('summary'): + self._summaryKey = 'content' + self._start_content(attrsD) + else: + self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource) + _start_dc_description = _start_description + + def _start_abstract(self, attrsD): + self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource) + + def _end_description(self): + if self._summaryKey == 'content': + self._end_content() + else: + value = self.popContent('description') + self._summaryKey = None + _end_abstract = _end_description + _end_dc_description = _end_description + + def _start_info(self, attrsD): + self.pushContent('info', attrsD, 'text/plain', 1) + _start_feedburner_browserfriendly = _start_info + + def _end_info(self): + self.popContent('info') + _end_feedburner_browserfriendly = _end_info + + def _start_generator(self, attrsD): + if attrsD: + attrsD = self._itsAnHrefDamnIt(attrsD) + if attrsD.has_key('href'): + attrsD['href'] = self.resolveURI(attrsD['href']) + self._getContext()['generator_detail'] = FeedParserDict(attrsD) + self.push('generator', 1) + + def _end_generator(self): + value = self.pop('generator') + context = self._getContext() + if context.has_key('generator_detail'): + context['generator_detail']['name'] = value + + def _start_admin_generatoragent(self, attrsD): + self.push('generator', 1) + value = self._getAttribute(attrsD, 'rdf:resource') + if value: + self.elementstack[-1][2].append(value) + self.pop('generator') + self._getContext()['generator_detail'] = FeedParserDict({'href': value}) + + def _start_admin_errorreportsto(self, attrsD): + self.push('errorreportsto', 1) + value = self._getAttribute(attrsD, 'rdf:resource') + if value: + self.elementstack[-1][2].append(value) + self.pop('errorreportsto') + + def _start_summary(self, attrsD): + context = self._getContext() + if context.has_key('summary'): + self._summaryKey = 'content' + self._start_content(attrsD) + else: + self._summaryKey = 'summary' + self.pushContent(self._summaryKey, attrsD, 'text/plain', 1) + _start_itunes_summary = _start_summary + + def _end_summary(self): + if self._summaryKey == 'content': + self._end_content() + else: + self.popContent(self._summaryKey or 'summary') + self._summaryKey = None + _end_itunes_summary = _end_summary + + def _start_enclosure(self, attrsD): + attrsD = self._itsAnHrefDamnIt(attrsD) + context = self._getContext() + attrsD['rel']='enclosure' + context.setdefault('links', []).append(FeedParserDict(attrsD)) + + def _start_source(self, attrsD): + if 'url' in attrsD: + # This means that we're processing a source element from an RSS 2.0 feed + self.sourcedata['href'] = attrsD[u'url'] + self.push('source', 1) + self.insource = 1 + self.hasTitle = 0 + + def _end_source(self): + self.insource = 0 + value = self.pop('source') + if value: + self.sourcedata['title'] = value + self._getContext()['source'] = copy.deepcopy(self.sourcedata) + self.sourcedata.clear() + + def _start_content(self, attrsD): + self.pushContent('content', attrsD, 'text/plain', 1) + src = attrsD.get('src') + if src: + self.contentparams['src'] = src + self.push('content', 1) + + def _start_prodlink(self, attrsD): + self.pushContent('content', attrsD, 'text/html', 1) + + def _start_body(self, attrsD): + self.pushContent('content', attrsD, 'application/xhtml+xml', 1) + _start_xhtml_body = _start_body + + def _start_content_encoded(self, attrsD): + self.pushContent('content', attrsD, 'text/html', 1) + _start_fullitem = _start_content_encoded + + def _end_content(self): + copyToSummary = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types) + value = self.popContent('content') + if copyToSummary: + self._save('summary', value) + + _end_body = _end_content + _end_xhtml_body = _end_content + _end_content_encoded = _end_content + _end_fullitem = _end_content + _end_prodlink = _end_content + + def _start_itunes_image(self, attrsD): + self.push('itunes_image', 0) + if attrsD.get('href'): + self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')}) + _start_itunes_link = _start_itunes_image + + def _end_itunes_block(self): + value = self.pop('itunes_block', 0) + self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0 + + def _end_itunes_explicit(self): + value = self.pop('itunes_explicit', 0) + # Convert 'yes' -> True, 'clean' to False, and any other value to None + # False and None both evaluate as False, so the difference can be ignored + # by applications that only need to know if the content is explicit. + self._getContext()['itunes_explicit'] = (None, False, True)[(value == 'yes' and 2) or value == 'clean' or 0] + + def _start_media_content(self, attrsD): + context = self._getContext() + context.setdefault('media_content', []) + context['media_content'].append(attrsD) + + def _start_media_thumbnail(self, attrsD): + context = self._getContext() + context.setdefault('media_thumbnail', []) + self.push('url', 1) # new + context['media_thumbnail'].append(attrsD) + + def _end_media_thumbnail(self): + url = self.pop('url') + context = self._getContext() + if url != None and len(url.strip()) != 0: + if not context['media_thumbnail'][-1].has_key('url'): + context['media_thumbnail'][-1]['url'] = url + + def _start_media_player(self, attrsD): + self.push('media_player', 0) + self._getContext()['media_player'] = FeedParserDict(attrsD) + + def _end_media_player(self): + value = self.pop('media_player') + context = self._getContext() + context['media_player']['content'] = value + + def _start_newlocation(self, attrsD): + self.push('newlocation', 1) + + def _end_newlocation(self): + url = self.pop('newlocation') + context = self._getContext() + # don't set newlocation if the context isn't right + if context is not self.feeddata: + return + context['newlocation'] = _makeSafeAbsoluteURI(self.baseuri, url.strip()) + +if _XML_AVAILABLE: + class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler): + def __init__(self, baseuri, baselang, encoding): + if _debug: sys.stderr.write('trying StrictFeedParser\n') + xml.sax.handler.ContentHandler.__init__(self) + _FeedParserMixin.__init__(self, baseuri, baselang, encoding) + self.bozo = 0 + self.exc = None + self.decls = {} + + def startPrefixMapping(self, prefix, uri): + self.trackNamespace(prefix, uri) + if uri == 'http://www.w3.org/1999/xlink': + self.decls['xmlns:'+prefix] = uri + + def startElementNS(self, name, qname, attrs): + namespace, localname = name + lowernamespace = str(namespace or '').lower() + if lowernamespace.find('backend.userland.com/rss') <> -1: + # match any backend.userland.com namespace + namespace = 'http://backend.userland.com/rss' + lowernamespace = namespace + if qname and qname.find(':') > 0: + givenprefix = qname.split(':')[0] + else: + givenprefix = None + prefix = self._matchnamespaces.get(lowernamespace, givenprefix) + if givenprefix and (prefix == None or (prefix == '' and lowernamespace == '')) and not self.namespacesInUse.has_key(givenprefix): + raise UndeclaredNamespace, "'%s' is not associated with a namespace" % givenprefix + localname = str(localname).lower() + + # qname implementation is horribly broken in Python 2.1 (it + # doesn't report any), and slightly broken in Python 2.2 (it + # doesn't report the xml: namespace). So we match up namespaces + # with a known list first, and then possibly override them with + # the qnames the SAX parser gives us (if indeed it gives us any + # at all). Thanks to MatejC for helping me test this and + # tirelessly telling me that it didn't work yet. + attrsD, self.decls = self.decls, {} + if localname=='math' and namespace=='http://www.w3.org/1998/Math/MathML': + attrsD['xmlns']=namespace + if localname=='svg' and namespace=='http://www.w3.org/2000/svg': + attrsD['xmlns']=namespace + + if prefix: + localname = prefix.lower() + ':' + localname + elif namespace and not qname: #Expat + for name,value in self.namespacesInUse.items(): + if name and value == namespace: + localname = name + ':' + localname + break + if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname)) + + for (namespace, attrlocalname), attrvalue in attrs._attrs.items(): + lowernamespace = (namespace or '').lower() + prefix = self._matchnamespaces.get(lowernamespace, '') + if prefix: + attrlocalname = prefix + ':' + attrlocalname + attrsD[str(attrlocalname).lower()] = attrvalue + for qname in attrs.getQNames(): + attrsD[str(qname).lower()] = attrs.getValueByQName(qname) + self.unknown_starttag(localname, attrsD.items()) + + def characters(self, text): + self.handle_data(text) + + def endElementNS(self, name, qname): + namespace, localname = name + lowernamespace = str(namespace or '').lower() + if qname and qname.find(':') > 0: + givenprefix = qname.split(':')[0] + else: + givenprefix = '' + prefix = self._matchnamespaces.get(lowernamespace, givenprefix) + if prefix: + localname = prefix + ':' + localname + elif namespace and not qname: #Expat + for name,value in self.namespacesInUse.items(): + if name and value == namespace: + localname = name + ':' + localname + break + localname = str(localname).lower() + self.unknown_endtag(localname) + + def error(self, exc): + self.bozo = 1 + self.exc = exc + + def fatalError(self, exc): + self.error(exc) + raise exc + +class _BaseHTMLProcessor(sgmllib.SGMLParser): + special = re.compile('''[<>'"]''') + bare_ampersand = re.compile("&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)") + elements_no_end_tag = [ + 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', + 'hr', 'img', 'input', 'isindex', 'keygen', 'link', 'meta', 'param', + 'source', 'track', 'wbr' + ] + + def __init__(self, encoding, _type): + self.encoding = encoding + self._type = _type + if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding) + sgmllib.SGMLParser.__init__(self) + + def reset(self): + self.pieces = [] + sgmllib.SGMLParser.reset(self) + + def _shorttag_replace(self, match): + tag = match.group(1) + if tag in self.elements_no_end_tag: + return '<' + tag + ' />' + else: + return '<' + tag + '>' + + def parse_starttag(self,i): + j=sgmllib.SGMLParser.parse_starttag(self, i) + if self._type == 'application/xhtml+xml': + if j>2 and self.rawdata[j-2:j]=='/>': + self.unknown_endtag(self.lasttag) + return j + + def feed(self, data): + data = re.compile(r'', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace + data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data) + data = data.replace(''', "'") + data = data.replace('"', '"') + try: + bytes + if bytes is str: + raise NameError + self.encoding = self.encoding + '_INVALID_PYTHON_3' + except NameError: + if self.encoding and type(data) == type(u''): + data = data.encode(self.encoding) + sgmllib.SGMLParser.feed(self, data) + sgmllib.SGMLParser.close(self) + + def normalize_attrs(self, attrs): + if not attrs: return attrs + # utility method to be called by descendants + attrs = dict([(k.lower(), v) for k, v in attrs]).items() + attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs] + attrs.sort() + return attrs + + def unknown_starttag(self, tag, attrs): + # called for each start tag + # attrs is a list of (attr, value) tuples + # e.g. for
, tag='pre', attrs=[('class', 'screen')]
+        if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
+        uattrs = []
+        strattrs=''
+        if attrs:
+            for key, value in attrs:
+                value=value.replace('>','>').replace('<','<').replace('"','"')
+                value = self.bare_ampersand.sub("&", value)
+                # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
+                if type(value) != type(u''):
+                    try:
+                        value = unicode(value, self.encoding)
+                    except:
+                        value = unicode(value, 'iso-8859-1')
+                try:
+                    # Currently, in Python 3 the key is already a str, and cannot be decoded again
+                    uattrs.append((unicode(key, self.encoding), value))
+                except TypeError:
+                    uattrs.append((key, value))
+            strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs])
+            if self.encoding:
+                try:
+                    strattrs=strattrs.encode(self.encoding)
+                except:
+                    pass
+        if tag in self.elements_no_end_tag:
+            self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
+        else:
+            self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
+
+    def unknown_endtag(self, tag):
+        # called for each end tag, e.g. for 
, tag will be 'pre' + # Reconstruct the original end tag. + if tag not in self.elements_no_end_tag: + self.pieces.append("" % locals()) + + def handle_charref(self, ref): + # called for each character reference, e.g. for ' ', ref will be '160' + # Reconstruct the original character reference. + if ref.startswith('x'): + value = unichr(int(ref[1:],16)) + else: + value = unichr(int(ref)) + + if value in _cp1252.keys(): + self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:]) + else: + self.pieces.append('&#%(ref)s;' % locals()) + + def handle_entityref(self, ref): + # called for each entity reference, e.g. for '©', ref will be 'copy' + # Reconstruct the original entity reference. + if name2codepoint.has_key(ref): + self.pieces.append('&%(ref)s;' % locals()) + else: + self.pieces.append('&%(ref)s' % locals()) + + def handle_data(self, text): + # called for each block of plain text, i.e. outside of any tag and + # not containing any character or entity references + # Store the original text verbatim. + if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_data, text=%s\n' % text) + self.pieces.append(text) + + def handle_comment(self, text): + # called for each HTML comment, e.g. + # Reconstruct the original comment. + self.pieces.append('' % locals()) + + def handle_pi(self, text): + # called for each processing instruction, e.g. + # Reconstruct original processing instruction. + self.pieces.append('' % locals()) + + def handle_decl(self, text): + # called for the DOCTYPE, if present, e.g. + # + # Reconstruct original DOCTYPE + self.pieces.append('' % locals()) + + _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match + def _scan_name(self, i, declstartpos): + rawdata = self.rawdata + n = len(rawdata) + if i == n: + return None, -1 + m = self._new_declname_match(rawdata, i) + if m: + s = m.group() + name = s.strip() + if (i + len(s)) == n: + return None, -1 # end of buffer + return name.lower(), m.end() + else: + self.handle_data(rawdata) +# self.updatepos(declstartpos, i) + return None, -1 + + def convert_charref(self, name): + return '&#%s;' % name + + def convert_entityref(self, name): + return '&%s;' % name + + def output(self): + '''Return processed HTML as a single string''' + return ''.join([str(p) for p in self.pieces]) + + def parse_declaration(self, i): + try: + return sgmllib.SGMLParser.parse_declaration(self, i) + except sgmllib.SGMLParseError: + # escape the doctype declaration and continue parsing + self.handle_data('<') + return i+1 + +class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor): + def __init__(self, baseuri, baselang, encoding, entities): + sgmllib.SGMLParser.__init__(self) + _FeedParserMixin.__init__(self, baseuri, baselang, encoding) + _BaseHTMLProcessor.__init__(self, encoding, 'application/xhtml+xml') + self.entities=entities + + def decodeEntities(self, element, data): + data = data.replace('<', '<') + data = data.replace('<', '<') + data = data.replace('<', '<') + data = data.replace('>', '>') + data = data.replace('>', '>') + data = data.replace('>', '>') + data = data.replace('&', '&') + data = data.replace('&', '&') + data = data.replace('"', '"') + data = data.replace('"', '"') + data = data.replace(''', ''') + data = data.replace(''', ''') + if self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): + data = data.replace('<', '<') + data = data.replace('>', '>') + data = data.replace('&', '&') + data = data.replace('"', '"') + data = data.replace(''', "'") + return data + + def strattrs(self, attrs): + return ''.join([' %s="%s"' % (n,v.replace('"','"')) for n,v in attrs]) + +class _MicroformatsParser: + STRING = 1 + DATE = 2 + URI = 3 + NODE = 4 + EMAIL = 5 + + known_xfn_relationships = ['contact', 'acquaintance', 'friend', 'met', 'co-worker', 'coworker', 'colleague', 'co-resident', 'coresident', 'neighbor', 'child', 'parent', 'sibling', 'brother', 'sister', 'spouse', 'wife', 'husband', 'kin', 'relative', 'muse', 'crush', 'date', 'sweetheart', 'me'] + known_binary_extensions = ['zip','rar','exe','gz','tar','tgz','tbz2','bz2','z','7z','dmg','img','sit','sitx','hqx','deb','rpm','bz2','jar','rar','iso','bin','msi','mp2','mp3','ogg','ogm','mp4','m4v','m4a','avi','wma','wmv'] + + def __init__(self, data, baseuri, encoding): + self.document = BeautifulSoup.BeautifulSoup(data) + self.baseuri = baseuri + self.encoding = encoding + if type(data) == type(u''): + data = data.encode(encoding) + self.tags = [] + self.enclosures = [] + self.xfn = [] + self.vcard = None + + def vcardEscape(self, s): + if type(s) in (type(''), type(u'')): + s = s.replace(',', '\\,').replace(';', '\\;').replace('\n', '\\n') + return s + + def vcardFold(self, s): + s = re.sub(';+$', '', s) + sFolded = '' + iMax = 75 + sPrefix = '' + while len(s) > iMax: + sFolded += sPrefix + s[:iMax] + '\n' + s = s[iMax:] + sPrefix = ' ' + iMax = 74 + sFolded += sPrefix + s + return sFolded + + def normalize(self, s): + return re.sub(r'\s+', ' ', s).strip() + + def unique(self, aList): + results = [] + for element in aList: + if element not in results: + results.append(element) + return results + + def toISO8601(self, dt): + return time.strftime('%Y-%m-%dT%H:%M:%SZ', dt) + + def getPropertyValue(self, elmRoot, sProperty, iPropertyType=4, bAllowMultiple=0, bAutoEscape=0): + all = lambda x: 1 + sProperty = sProperty.lower() + bFound = 0 + bNormalize = 1 + propertyMatch = {'class': re.compile(r'\b%s\b' % sProperty)} + if bAllowMultiple and (iPropertyType != self.NODE): + snapResults = [] + containers = elmRoot(['ul', 'ol'], propertyMatch) + for container in containers: + snapResults.extend(container('li')) + bFound = (len(snapResults) != 0) + if not bFound: + snapResults = elmRoot(all, propertyMatch) + bFound = (len(snapResults) != 0) + if (not bFound) and (sProperty == 'value'): + snapResults = elmRoot('pre') + bFound = (len(snapResults) != 0) + bNormalize = not bFound + if not bFound: + snapResults = [elmRoot] + bFound = (len(snapResults) != 0) + arFilter = [] + if sProperty == 'vcard': + snapFilter = elmRoot(all, propertyMatch) + for node in snapFilter: + if node.findParent(all, propertyMatch): + arFilter.append(node) + arResults = [] + for node in snapResults: + if node not in arFilter: + arResults.append(node) + bFound = (len(arResults) != 0) + if not bFound: + if bAllowMultiple: return [] + elif iPropertyType == self.STRING: return '' + elif iPropertyType == self.DATE: return None + elif iPropertyType == self.URI: return '' + elif iPropertyType == self.NODE: return None + else: return None + arValues = [] + for elmResult in arResults: + sValue = None + if iPropertyType == self.NODE: + if bAllowMultiple: + arValues.append(elmResult) + continue + else: + return elmResult + sNodeName = elmResult.name.lower() + if (iPropertyType == self.EMAIL) and (sNodeName == 'a'): + sValue = (elmResult.get('href') or '').split('mailto:').pop().split('?')[0] + if sValue: + sValue = bNormalize and self.normalize(sValue) or sValue.strip() + if (not sValue) and (sNodeName == 'abbr'): + sValue = elmResult.get('title') + if sValue: + sValue = bNormalize and self.normalize(sValue) or sValue.strip() + if (not sValue) and (iPropertyType == self.URI): + if sNodeName == 'a': sValue = elmResult.get('href') + elif sNodeName == 'img': sValue = elmResult.get('src') + elif sNodeName == 'object': sValue = elmResult.get('data') + if sValue: + sValue = bNormalize and self.normalize(sValue) or sValue.strip() + if (not sValue) and (sNodeName == 'img'): + sValue = elmResult.get('alt') + if sValue: + sValue = bNormalize and self.normalize(sValue) or sValue.strip() + if not sValue: + sValue = elmResult.renderContents() + sValue = re.sub(r'<\S[^>]*>', '', sValue) + sValue = sValue.replace('\r\n', '\n') + sValue = sValue.replace('\r', '\n') + if sValue: + sValue = bNormalize and self.normalize(sValue) or sValue.strip() + if not sValue: continue + if iPropertyType == self.DATE: + sValue = _parse_date_iso8601(sValue) + if bAllowMultiple: + arValues.append(bAutoEscape and self.vcardEscape(sValue) or sValue) + else: + return bAutoEscape and self.vcardEscape(sValue) or sValue + return arValues + + def findVCards(self, elmRoot, bAgentParsing=0): + sVCards = '' + + if not bAgentParsing: + arCards = self.getPropertyValue(elmRoot, 'vcard', bAllowMultiple=1) + else: + arCards = [elmRoot] + + for elmCard in arCards: + arLines = [] + + def processSingleString(sProperty): + sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1).decode(self.encoding) + if sValue: + arLines.append(self.vcardFold(sProperty.upper() + ':' + sValue)) + return sValue or u'' + + def processSingleURI(sProperty): + sValue = self.getPropertyValue(elmCard, sProperty, self.URI) + if sValue: + sContentType = '' + sEncoding = '' + sValueKey = '' + if sValue.startswith('data:'): + sEncoding = ';ENCODING=b' + sContentType = sValue.split(';')[0].split('/').pop() + sValue = sValue.split(',', 1).pop() + else: + elmValue = self.getPropertyValue(elmCard, sProperty) + if elmValue: + if sProperty != 'url': + sValueKey = ';VALUE=uri' + sContentType = elmValue.get('type', '').strip().split('/').pop().strip() + sContentType = sContentType.upper() + if sContentType == 'OCTET-STREAM': + sContentType = '' + if sContentType: + sContentType = ';TYPE=' + sContentType.upper() + arLines.append(self.vcardFold(sProperty.upper() + sEncoding + sContentType + sValueKey + ':' + sValue)) + + def processTypeValue(sProperty, arDefaultType, arForceType=None): + arResults = self.getPropertyValue(elmCard, sProperty, bAllowMultiple=1) + for elmResult in arResults: + arType = self.getPropertyValue(elmResult, 'type', self.STRING, 1, 1) + if arForceType: + arType = self.unique(arForceType + arType) + if not arType: + arType = arDefaultType + sValue = self.getPropertyValue(elmResult, 'value', self.EMAIL, 0) + if sValue: + arLines.append(self.vcardFold(sProperty.upper() + ';TYPE=' + ','.join(arType) + ':' + sValue)) + + # AGENT + # must do this before all other properties because it is destructive + # (removes nested class="vcard" nodes so they don't interfere with + # this vcard's other properties) + arAgent = self.getPropertyValue(elmCard, 'agent', bAllowMultiple=1) + for elmAgent in arAgent: + if re.compile(r'\bvcard\b').search(elmAgent.get('class')): + sAgentValue = self.findVCards(elmAgent, 1) + '\n' + sAgentValue = sAgentValue.replace('\n', '\\n') + sAgentValue = sAgentValue.replace(';', '\\;') + if sAgentValue: + arLines.append(self.vcardFold('AGENT:' + sAgentValue)) + # Completely remove the agent element from the parse tree + elmAgent.extract() + else: + sAgentValue = self.getPropertyValue(elmAgent, 'value', self.URI, bAutoEscape=1); + if sAgentValue: + arLines.append(self.vcardFold('AGENT;VALUE=uri:' + sAgentValue)) + + # FN (full name) + sFN = processSingleString('fn') + + # N (name) + elmName = self.getPropertyValue(elmCard, 'n') + if elmName: + sFamilyName = self.getPropertyValue(elmName, 'family-name', self.STRING, bAutoEscape=1) + sGivenName = self.getPropertyValue(elmName, 'given-name', self.STRING, bAutoEscape=1) + arAdditionalNames = self.getPropertyValue(elmName, 'additional-name', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'additional-names', self.STRING, 1, 1) + arHonorificPrefixes = self.getPropertyValue(elmName, 'honorific-prefix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-prefixes', self.STRING, 1, 1) + arHonorificSuffixes = self.getPropertyValue(elmName, 'honorific-suffix', self.STRING, 1, 1) + self.getPropertyValue(elmName, 'honorific-suffixes', self.STRING, 1, 1) + arLines.append(self.vcardFold('N:' + sFamilyName + ';' + + sGivenName + ';' + + ','.join(arAdditionalNames) + ';' + + ','.join(arHonorificPrefixes) + ';' + + ','.join(arHonorificSuffixes))) + elif sFN: + # implied "N" optimization + # http://microformats.org/wiki/hcard#Implied_.22N.22_Optimization + arNames = self.normalize(sFN).split() + if len(arNames) == 2: + bFamilyNameFirst = (arNames[0].endswith(',') or + len(arNames[1]) == 1 or + ((len(arNames[1]) == 2) and (arNames[1].endswith('.')))) + if bFamilyNameFirst: + arLines.append(self.vcardFold('N:' + arNames[0] + ';' + arNames[1])) + else: + arLines.append(self.vcardFold('N:' + arNames[1] + ';' + arNames[0])) + + # SORT-STRING + sSortString = self.getPropertyValue(elmCard, 'sort-string', self.STRING, bAutoEscape=1) + if sSortString: + arLines.append(self.vcardFold('SORT-STRING:' + sSortString)) + + # NICKNAME + arNickname = self.getPropertyValue(elmCard, 'nickname', self.STRING, 1, 1) + if arNickname: + arLines.append(self.vcardFold('NICKNAME:' + ','.join(arNickname))) + + # PHOTO + processSingleURI('photo') + + # BDAY + dtBday = self.getPropertyValue(elmCard, 'bday', self.DATE) + if dtBday: + arLines.append(self.vcardFold('BDAY:' + self.toISO8601(dtBday))) + + # ADR (address) + arAdr = self.getPropertyValue(elmCard, 'adr', bAllowMultiple=1) + for elmAdr in arAdr: + arType = self.getPropertyValue(elmAdr, 'type', self.STRING, 1, 1) + if not arType: + arType = ['intl','postal','parcel','work'] # default adr types, see RFC 2426 section 3.2.1 + sPostOfficeBox = self.getPropertyValue(elmAdr, 'post-office-box', self.STRING, 0, 1) + sExtendedAddress = self.getPropertyValue(elmAdr, 'extended-address', self.STRING, 0, 1) + sStreetAddress = self.getPropertyValue(elmAdr, 'street-address', self.STRING, 0, 1) + sLocality = self.getPropertyValue(elmAdr, 'locality', self.STRING, 0, 1) + sRegion = self.getPropertyValue(elmAdr, 'region', self.STRING, 0, 1) + sPostalCode = self.getPropertyValue(elmAdr, 'postal-code', self.STRING, 0, 1) + sCountryName = self.getPropertyValue(elmAdr, 'country-name', self.STRING, 0, 1) + arLines.append(self.vcardFold('ADR;TYPE=' + ','.join(arType) + ':' + + sPostOfficeBox + ';' + + sExtendedAddress + ';' + + sStreetAddress + ';' + + sLocality + ';' + + sRegion + ';' + + sPostalCode + ';' + + sCountryName)) + + # LABEL + processTypeValue('label', ['intl','postal','parcel','work']) + + # TEL (phone number) + processTypeValue('tel', ['voice']) + + # EMAIL + processTypeValue('email', ['internet'], ['internet']) + + # MAILER + processSingleString('mailer') + + # TZ (timezone) + processSingleString('tz') + + # GEO (geographical information) + elmGeo = self.getPropertyValue(elmCard, 'geo') + if elmGeo: + sLatitude = self.getPropertyValue(elmGeo, 'latitude', self.STRING, 0, 1) + sLongitude = self.getPropertyValue(elmGeo, 'longitude', self.STRING, 0, 1) + arLines.append(self.vcardFold('GEO:' + sLatitude + ';' + sLongitude)) + + # TITLE + processSingleString('title') + + # ROLE + processSingleString('role') + + # LOGO + processSingleURI('logo') + + # ORG (organization) + elmOrg = self.getPropertyValue(elmCard, 'org') + if elmOrg: + sOrganizationName = self.getPropertyValue(elmOrg, 'organization-name', self.STRING, 0, 1) + if not sOrganizationName: + # implied "organization-name" optimization + # http://microformats.org/wiki/hcard#Implied_.22organization-name.22_Optimization + sOrganizationName = self.getPropertyValue(elmCard, 'org', self.STRING, 0, 1) + if sOrganizationName: + arLines.append(self.vcardFold('ORG:' + sOrganizationName)) + else: + arOrganizationUnit = self.getPropertyValue(elmOrg, 'organization-unit', self.STRING, 1, 1) + arLines.append(self.vcardFold('ORG:' + sOrganizationName + ';' + ';'.join(arOrganizationUnit))) + + # CATEGORY + arCategory = self.getPropertyValue(elmCard, 'category', self.STRING, 1, 1) + self.getPropertyValue(elmCard, 'categories', self.STRING, 1, 1) + if arCategory: + arLines.append(self.vcardFold('CATEGORIES:' + ','.join(arCategory))) + + # NOTE + processSingleString('note') + + # REV + processSingleString('rev') + + # SOUND + processSingleURI('sound') + + # UID + processSingleString('uid') + + # URL + processSingleURI('url') + + # CLASS + processSingleString('class') + + # KEY + processSingleURI('key') + + if arLines: + arLines = [u'BEGIN:vCard',u'VERSION:3.0'] + arLines + [u'END:vCard'] + sVCards += u'\n'.join(arLines) + u'\n' + + return sVCards.strip() + + def isProbablyDownloadable(self, elm): + attrsD = elm.attrMap + if not attrsD.has_key('href'): return 0 + linktype = attrsD.get('type', '').strip() + if linktype.startswith('audio/') or \ + linktype.startswith('video/') or \ + (linktype.startswith('application/') and not linktype.endswith('xml')): + return 1 + path = urlparse.urlparse(attrsD['href'])[2] + if path.find('.') == -1: return 0 + fileext = path.split('.').pop().lower() + return fileext in self.known_binary_extensions + + def findTags(self): + all = lambda x: 1 + for elm in self.document(all, {'rel': re.compile(r'\btag\b')}): + href = elm.get('href') + if not href: continue + urlscheme, domain, path, params, query, fragment = \ + urlparse.urlparse(_urljoin(self.baseuri, href)) + segments = path.split('/') + tag = segments.pop() + if not tag: + tag = segments.pop() + tagscheme = urlparse.urlunparse((urlscheme, domain, '/'.join(segments), '', '', '')) + if not tagscheme.endswith('/'): + tagscheme += '/' + self.tags.append(FeedParserDict({"term": tag, "scheme": tagscheme, "label": elm.string or ''})) + + def findEnclosures(self): + all = lambda x: 1 + enclosure_match = re.compile(r'\benclosure\b') + for elm in self.document(all, {'href': re.compile(r'.+')}): + if not enclosure_match.search(elm.get('rel', '')) and not self.isProbablyDownloadable(elm): continue + if elm.attrMap not in self.enclosures: + self.enclosures.append(elm.attrMap) + if elm.string and not elm.get('title'): + self.enclosures[-1]['title'] = elm.string + + def findXFN(self): + all = lambda x: 1 + for elm in self.document(all, {'rel': re.compile('.+'), 'href': re.compile('.+')}): + rels = elm.get('rel', '').split() + xfn_rels = [] + for rel in rels: + if rel in self.known_xfn_relationships: + xfn_rels.append(rel) + if xfn_rels: + self.xfn.append({"relationships": xfn_rels, "href": elm.get('href', ''), "name": elm.string}) + +def _parseMicroformats(htmlSource, baseURI, encoding): + if not BeautifulSoup: return + if _debug: sys.stderr.write('entering _parseMicroformats\n') + try: + p = _MicroformatsParser(htmlSource, baseURI, encoding) + except UnicodeEncodeError: + # sgmllib throws this exception when performing lookups of tags + # with non-ASCII characters in them. + return + p.vcard = p.findVCards(p.document) + p.findTags() + p.findEnclosures() + p.findXFN() + return {"tags": p.tags, "enclosures": p.enclosures, "xfn": p.xfn, "vcard": p.vcard} + +class _RelativeURIResolver(_BaseHTMLProcessor): + relative_uris = [('a', 'href'), + ('applet', 'codebase'), + ('area', 'href'), + ('blockquote', 'cite'), + ('body', 'background'), + ('del', 'cite'), + ('form', 'action'), + ('frame', 'longdesc'), + ('frame', 'src'), + ('iframe', 'longdesc'), + ('iframe', 'src'), + ('head', 'profile'), + ('img', 'longdesc'), + ('img', 'src'), + ('img', 'usemap'), + ('input', 'src'), + ('input', 'usemap'), + ('ins', 'cite'), + ('link', 'href'), + ('object', 'classid'), + ('object', 'codebase'), + ('object', 'data'), + ('object', 'usemap'), + ('q', 'cite'), + ('script', 'src')] + + def __init__(self, baseuri, encoding, _type): + _BaseHTMLProcessor.__init__(self, encoding, _type) + self.baseuri = baseuri + + def resolveURI(self, uri): + return _makeSafeAbsoluteURI(_urljoin(self.baseuri, uri.strip())) + + def unknown_starttag(self, tag, attrs): + if _debug: + sys.stderr.write('tag: [%s] with attributes: [%s]\n' % (tag, str(attrs))) + attrs = self.normalize_attrs(attrs) + attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs] + _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) + +def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type): + if _debug: + sys.stderr.write('entering _resolveRelativeURIs\n') + + p = _RelativeURIResolver(baseURI, encoding, _type) + p.feed(htmlSource) + return p.output() + +def _makeSafeAbsoluteURI(base, rel=None): + # bail if ACCEPTABLE_URI_SCHEMES is empty + if not ACCEPTABLE_URI_SCHEMES: + return _urljoin(base, rel or u'') + if not base: + return rel or u'' + if not rel: + scheme = urlparse.urlparse(base)[0] + if not scheme or scheme in ACCEPTABLE_URI_SCHEMES: + return base + return u'' + uri = _urljoin(base, rel) + if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES: + return u'' + return uri + +class _HTMLSanitizer(_BaseHTMLProcessor): + acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', + 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', + 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', + 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', + 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', + 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1', + 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', + 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', + 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', + 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', + 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', + 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', + 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript'] + + acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', + 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis', + 'background', 'balance', 'bgcolor', 'bgproperties', 'border', + 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding', + 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff', + 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', 'cols', + 'colspan', 'compact', 'contenteditable', 'controls', 'coords', 'data', + 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', 'delay', + 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', 'face', 'for', + 'form', 'frame', 'galleryimg', 'gutter', 'headers', 'height', 'hidefocus', + 'hidden', 'high', 'href', 'hreflang', 'hspace', 'icon', 'id', 'inputmode', + 'ismap', 'keytype', 'label', 'leftspacing', 'lang', 'list', 'longdesc', + 'loop', 'loopcount', 'loopend', 'loopstart', 'low', 'lowsrc', 'max', + 'maxlength', 'media', 'method', 'min', 'multiple', 'name', 'nohref', + 'noshade', 'nowrap', 'open', 'optimum', 'pattern', 'ping', 'point-size', + 'prompt', 'pqg', 'radiogroup', 'readonly', 'rel', 'repeat-max', + 'repeat-min', 'replace', 'required', 'rev', 'rightspacing', 'rows', + 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', + 'start', 'step', 'summary', 'suppress', 'tabindex', 'target', 'template', + 'title', 'toppadding', 'type', 'unselectable', 'usemap', 'urn', 'valign', + 'value', 'variable', 'volume', 'vspace', 'vrml', 'width', 'wrap', + 'xml:lang'] + + unacceptable_elements_with_end_tag = ['script', 'applet', 'style'] + + acceptable_css_properties = ['azimuth', 'background-color', + 'border-bottom-color', 'border-collapse', 'border-color', + 'border-left-color', 'border-right-color', 'border-top-color', 'clear', + 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', + 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', + 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', + 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', + 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', + 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', + 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', + 'white-space', 'width'] + + # survey of common keywords found in feeds + acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue', + 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', + 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', + 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', + 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', + 'transparent', 'underline', 'white', 'yellow'] + + valid_css_values = re.compile('^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' + + '\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$') + + mathml_elements = ['annotation', 'annotation-xml', 'maction', 'math', + 'merror', 'mfenced', 'mfrac', 'mi', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', + 'mphantom', 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', + 'msub', 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', + 'munderover', 'none', 'semantics'] + + mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign', + 'columnalign', 'close', 'columnlines', 'columnspacing', 'columnspan', 'depth', + 'display', 'displaystyle', 'encoding', 'equalcolumns', 'equalrows', + 'fence', 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', + 'lspace', 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', + 'maxsize', 'minsize', 'open', 'other', 'rowalign', 'rowalign', 'rowalign', + 'rowlines', 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection', + 'separator', 'separators', 'stretchy', 'width', 'width', 'xlink:href', + 'xlink:show', 'xlink:type', 'xmlns', 'xmlns:xlink'] + + # svgtiny - foreignObject + linearGradient + radialGradient + stop + svg_elements = ['a', 'animate', 'animateColor', 'animateMotion', + 'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'foreignObject', + 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', + 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', 'mpath', + 'path', 'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop', + 'svg', 'switch', 'text', 'title', 'tspan', 'use'] + + # svgtiny + class + opacity + offset + xmlns + xmlns:xlink + svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic', + 'arabic-form', 'ascent', 'attributeName', 'attributeType', + 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height', + 'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd', 'dx', + 'dy', 'descent', 'display', 'dur', 'end', 'fill', 'fill-opacity', + 'fill-rule', 'font-family', 'font-size', 'font-stretch', 'font-style', + 'font-variant', 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', + 'glyph-name', 'gradientUnits', 'hanging', 'height', 'horiz-adv-x', + 'horiz-origin-x', 'id', 'ideographic', 'k', 'keyPoints', 'keySplines', + 'keyTimes', 'lang', 'mathematical', 'marker-end', 'marker-mid', + 'marker-start', 'markerHeight', 'markerUnits', 'markerWidth', 'max', + 'min', 'name', 'offset', 'opacity', 'orient', 'origin', + 'overline-position', 'overline-thickness', 'panose-1', 'path', + 'pathLength', 'points', 'preserveAspectRatio', 'r', 'refX', 'refY', + 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures', + 'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', + 'stop-color', 'stop-opacity', 'strikethrough-position', + 'strikethrough-thickness', 'stroke', 'stroke-dasharray', + 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', + 'stroke-miterlimit', 'stroke-opacity', 'stroke-width', 'systemLanguage', + 'target', 'text-anchor', 'to', 'transform', 'type', 'u1', 'u2', + 'underline-position', 'underline-thickness', 'unicode', 'unicode-range', + 'units-per-em', 'values', 'version', 'viewBox', 'visibility', 'width', + 'widths', 'x', 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole', + 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type', + 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', + 'y2', 'zoomAndPan'] + + svg_attr_map = None + svg_elem_map = None + + acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule', + 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', + 'stroke-opacity'] + + def reset(self): + _BaseHTMLProcessor.reset(self) + self.unacceptablestack = 0 + self.mathmlOK = 0 + self.svgOK = 0 + + def unknown_starttag(self, tag, attrs): + acceptable_attributes = self.acceptable_attributes + keymap = {} + if not tag in self.acceptable_elements or self.svgOK: + if tag in self.unacceptable_elements_with_end_tag: + self.unacceptablestack += 1 + + # add implicit namespaces to html5 inline svg/mathml + if self._type.endswith('html'): + if not dict(attrs).get('xmlns'): + if tag=='svg': + attrs.append( ('xmlns','http://www.w3.org/2000/svg') ) + if tag=='math': + attrs.append( ('xmlns','http://www.w3.org/1998/Math/MathML') ) + + # not otherwise acceptable, perhaps it is MathML or SVG? + if tag=='math' and ('xmlns','http://www.w3.org/1998/Math/MathML') in attrs: + self.mathmlOK += 1 + if tag=='svg' and ('xmlns','http://www.w3.org/2000/svg') in attrs: + self.svgOK += 1 + + # chose acceptable attributes based on tag class, else bail + if self.mathmlOK and tag in self.mathml_elements: + acceptable_attributes = self.mathml_attributes + elif self.svgOK and tag in self.svg_elements: + # for most vocabularies, lowercasing is a good idea. Many + # svg elements, however, are camel case + if not self.svg_attr_map: + lower=[attr.lower() for attr in self.svg_attributes] + mix=[a for a in self.svg_attributes if a not in lower] + self.svg_attributes = lower + self.svg_attr_map = dict([(a.lower(),a) for a in mix]) + + lower=[attr.lower() for attr in self.svg_elements] + mix=[a for a in self.svg_elements if a not in lower] + self.svg_elements = lower + self.svg_elem_map = dict([(a.lower(),a) for a in mix]) + acceptable_attributes = self.svg_attributes + tag = self.svg_elem_map.get(tag,tag) + keymap = self.svg_attr_map + elif not tag in self.acceptable_elements: + return + + # declare xlink namespace, if needed + if self.mathmlOK or self.svgOK: + if filter(lambda (n,v): n.startswith('xlink:'),attrs): + if not ('xmlns:xlink','http://www.w3.org/1999/xlink') in attrs: + attrs.append(('xmlns:xlink','http://www.w3.org/1999/xlink')) + + clean_attrs = [] + for key, value in self.normalize_attrs(attrs): + if key in acceptable_attributes: + key=keymap.get(key,key) + # make sure the uri uses an acceptable uri scheme + if key == u'href': + value = _makeSafeAbsoluteURI(value) + clean_attrs.append((key,value)) + elif key=='style': + clean_value = self.sanitize_style(value) + if clean_value: clean_attrs.append((key,clean_value)) + _BaseHTMLProcessor.unknown_starttag(self, tag, clean_attrs) + + def unknown_endtag(self, tag): + if not tag in self.acceptable_elements: + if tag in self.unacceptable_elements_with_end_tag: + self.unacceptablestack -= 1 + if self.mathmlOK and tag in self.mathml_elements: + if tag == 'math' and self.mathmlOK: self.mathmlOK -= 1 + elif self.svgOK and tag in self.svg_elements: + tag = self.svg_elem_map.get(tag,tag) + if tag == 'svg' and self.svgOK: self.svgOK -= 1 + else: + return + _BaseHTMLProcessor.unknown_endtag(self, tag) + + def handle_pi(self, text): + pass + + def handle_decl(self, text): + pass + + def handle_data(self, text): + if not self.unacceptablestack: + _BaseHTMLProcessor.handle_data(self, text) + + def sanitize_style(self, style): + # disallow urls + style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style) + + # gauntlet + if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return '' + # This replaced a regexp that used re.match and was prone to pathological back-tracking. + if re.sub("\s*[-\w]+\s*:\s*[^:;]*;?", '', style).strip(): return '' + + clean = [] + for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style): + if not value: continue + if prop.lower() in self.acceptable_css_properties: + clean.append(prop + ': ' + value + ';') + elif prop.split('-')[0].lower() in ['background','border','margin','padding']: + for keyword in value.split(): + if not keyword in self.acceptable_css_keywords and \ + not self.valid_css_values.match(keyword): + break + else: + clean.append(prop + ': ' + value + ';') + elif self.svgOK and prop.lower() in self.acceptable_svg_properties: + clean.append(prop + ': ' + value + ';') + + return ' '.join(clean) + + def parse_comment(self, i, report=1): + ret = _BaseHTMLProcessor.parse_comment(self, i, report) + if ret >= 0: + return ret + # if ret == -1, this may be a malicious attempt to circumvent + # sanitization, or a page-destroying unclosed comment + match = re.compile(r'--[^>]*>').search(self.rawdata, i+4) + if match: + return match.end() + # unclosed comment; deliberately fail to handle_data() + return len(self.rawdata) + + +def _sanitizeHTML(htmlSource, encoding, _type): + p = _HTMLSanitizer(encoding, _type) + htmlSource = htmlSource.replace(''): + data = data.split('>', 1)[1] + if data.count('= '2.3.3' + assert base64 != None + user, passw = _base64decode(req.headers['Authorization'].split(' ')[1]).split(':') + realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0] + self.add_password(realm, host, user, passw) + retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) + self.reset_retry_count() + return retry + except: + return self.http_error_default(req, fp, code, msg, headers) + +def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers): + """URL, filename, or string --> stream + + This function lets you define parsers that take any input source + (URL, pathname to local or network file, or actual data as a string) + and deal with it in a uniform manner. Returned object is guaranteed + to have all the basic stdio read methods (read, readline, readlines). + Just .close() the object when you're done with it. + + If the etag argument is supplied, it will be used as the value of an + If-None-Match request header. + + If the modified argument is supplied, it can be a tuple of 9 integers + (as returned by gmtime() in the standard Python time module) or a date + string in any format supported by feedparser. Regardless, it MUST + be in GMT (Greenwich Mean Time). It will be reformatted into an + RFC 1123-compliant date and used as the value of an If-Modified-Since + request header. + + If the agent argument is supplied, it will be used as the value of a + User-Agent request header. + + If the referrer argument is supplied, it will be used as the value of a + Referer[sic] request header. + + If handlers is supplied, it is a list of handlers used to build a + urllib2 opener. + + if request_headers is supplied it is a dictionary of HTTP request headers + that will override the values generated by FeedParser. + """ + + if hasattr(url_file_stream_or_string, 'read'): + return url_file_stream_or_string + + if url_file_stream_or_string == '-': + return sys.stdin + + if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'): + # Deal with the feed URI scheme + if url_file_stream_or_string.startswith('feed:http'): + url_file_stream_or_string = url_file_stream_or_string[5:] + elif url_file_stream_or_string.startswith('feed:'): + url_file_stream_or_string = 'http:' + url_file_stream_or_string[5:] + if not agent: + agent = USER_AGENT + # test for inline user:password for basic auth + auth = None + if base64: + urltype, rest = urllib.splittype(url_file_stream_or_string) + realhost, rest = urllib.splithost(rest) + if realhost: + user_passwd, realhost = urllib.splituser(realhost) + if user_passwd: + url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest) + auth = base64.standard_b64encode(user_passwd).strip() + + # iri support + try: + if isinstance(url_file_stream_or_string,unicode): + url_file_stream_or_string = url_file_stream_or_string.encode('idna').decode('utf-8') + else: + url_file_stream_or_string = url_file_stream_or_string.decode('utf-8').encode('idna').decode('utf-8') + except: + pass + + # try to open with urllib2 (to use optional headers) + request = _build_urllib2_request(url_file_stream_or_string, agent, etag, modified, referrer, auth, request_headers) + opener = apply(urllib2.build_opener, tuple(handlers + [_FeedURLHandler()])) + opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent + try: + return opener.open(request) + finally: + opener.close() # JohnD + + # try to open with native open function (if url_file_stream_or_string is a filename) + try: + return open(url_file_stream_or_string, 'rb') + except: + pass + + # treat url_file_stream_or_string as string + return _StringIO(str(url_file_stream_or_string)) + +def _build_urllib2_request(url, agent, etag, modified, referrer, auth, request_headers): + request = urllib2.Request(url) + request.add_header('User-Agent', agent) + if etag: + request.add_header('If-None-Match', etag) + if type(modified) == type(''): + modified = _parse_date(modified) + elif isinstance(modified, datetime.datetime): + modified = modified.utctimetuple() + if modified: + # format into an RFC 1123-compliant timestamp. We can't use + # time.strftime() since the %a and %b directives can be affected + # by the current locale, but RFC 2616 states that dates must be + # in English. + short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + request.add_header('If-Modified-Since', '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5])) + if referrer: + request.add_header('Referer', referrer) + if gzip and zlib: + request.add_header('Accept-encoding', 'gzip, deflate') + elif gzip: + request.add_header('Accept-encoding', 'gzip') + elif zlib: + request.add_header('Accept-encoding', 'deflate') + else: + request.add_header('Accept-encoding', '') + if auth: + request.add_header('Authorization', 'Basic %s' % auth) + if ACCEPT_HEADER: + request.add_header('Accept', ACCEPT_HEADER) + # use this for whatever -- cookies, special headers, etc + # [('Cookie','Something'),('x-special-header','Another Value')] + for header_name, header_value in request_headers.items(): + request.add_header(header_name, header_value) + request.add_header('A-IM', 'feed') # RFC 3229 support + return request + +_date_handlers = [] +def registerDateHandler(func): + '''Register a date handler function (takes string, returns 9-tuple date in GMT)''' + _date_handlers.insert(0, func) + +# ISO-8601 date parsing routines written by Fazal Majid. +# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601 +# parser is beyond the scope of feedparser and would be a worthwhile addition +# to the Python library. +# A single regular expression cannot parse ISO 8601 date formats into groups +# as the standard is highly irregular (for instance is 030104 2003-01-04 or +# 0301-04-01), so we use templates instead. +# Please note the order in templates is significant because we need a +# greedy match. +_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-0MM?-?DD', 'YYYY-MM', 'YYYY-?OOO', + 'YY-?MM-?DD', 'YY-?OOO', 'YYYY', + '-YY-?MM', '-OOO', '-YY', + '--MM-?DD', '--MM', + '---DD', + 'CC', ''] +_iso8601_re = [ + tmpl.replace( + 'YYYY', r'(?P\d{4})').replace( + 'YY', r'(?P\d\d)').replace( + 'MM', r'(?P[01]\d)').replace( + 'DD', r'(?P[0123]\d)').replace( + 'OOO', r'(?P[0123]\d\d)').replace( + 'CC', r'(?P\d\d$)') + + r'(T?(?P\d{2}):(?P\d{2})' + + r'(:(?P\d{2}))?' + + r'(\.(?P\d+))?' + + r'(?P[+-](?P\d{2})(:(?P\d{2}))?|Z)?)?' + for tmpl in _iso8601_tmpl] +try: + del tmpl +except NameError: + pass +_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re] +try: + del regex +except NameError: + pass +def _parse_date_iso8601(dateString): + '''Parse a variety of ISO-8601-compatible formats like 20040105''' + m = None + for _iso8601_match in _iso8601_matches: + m = _iso8601_match(dateString) + if m: break + if not m: return + if m.span() == (0, 0): return + params = m.groupdict() + ordinal = params.get('ordinal', 0) + if ordinal: + ordinal = int(ordinal) + else: + ordinal = 0 + year = params.get('year', '--') + if not year or year == '--': + year = time.gmtime()[0] + elif len(year) == 2: + # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993 + year = 100 * int(time.gmtime()[0] / 100) + int(year) + else: + year = int(year) + month = params.get('month', '-') + if not month or month == '-': + # ordinals are NOT normalized by mktime, we simulate them + # by setting month=1, day=ordinal + if ordinal: + month = 1 + else: + month = time.gmtime()[1] + month = int(month) + day = params.get('day', 0) + if not day: + # see above + if ordinal: + day = ordinal + elif params.get('century', 0) or \ + params.get('year', 0) or params.get('month', 0): + day = 1 + else: + day = time.gmtime()[2] + else: + day = int(day) + # special case of the century - is the first year of the 21st century + # 2000 or 2001 ? The debate goes on... + if 'century' in params.keys(): + year = (int(params['century']) - 1) * 100 + 1 + # in ISO 8601 most fields are optional + for field in ['hour', 'minute', 'second', 'tzhour', 'tzmin']: + if not params.get(field, None): + params[field] = 0 + hour = int(params.get('hour', 0)) + minute = int(params.get('minute', 0)) + second = int(float(params.get('second', 0))) + # weekday is normalized by mktime(), we can ignore it + weekday = 0 + daylight_savings_flag = -1 + tm = [year, month, day, hour, minute, second, weekday, + ordinal, daylight_savings_flag] + # ISO 8601 time zone adjustments + tz = params.get('tz') + if tz and tz != 'Z': + if tz[0] == '-': + tm[3] += int(params.get('tzhour', 0)) + tm[4] += int(params.get('tzmin', 0)) + elif tz[0] == '+': + tm[3] -= int(params.get('tzhour', 0)) + tm[4] -= int(params.get('tzmin', 0)) + else: + return None + # Python's time.mktime() is a wrapper around the ANSI C mktime(3c) + # which is guaranteed to normalize d/m/y/h/m/s. + # Many implementations have bugs, but we'll pretend they don't. + return time.localtime(time.mktime(tuple(tm))) +registerDateHandler(_parse_date_iso8601) + +# 8-bit date handling routines written by ytrewq1. +_korean_year = u'\ub144' # b3e2 in euc-kr +_korean_month = u'\uc6d4' # bff9 in euc-kr +_korean_day = u'\uc77c' # c0cf in euc-kr +_korean_am = u'\uc624\uc804' # bfc0 c0fc in euc-kr +_korean_pm = u'\uc624\ud6c4' # bfc0 c8c4 in euc-kr + +_korean_onblog_date_re = \ + re.compile('(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})' % \ + (_korean_year, _korean_month, _korean_day)) +_korean_nate_date_re = \ + re.compile(u'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})' % \ + (_korean_am, _korean_pm)) +def _parse_date_onblog(dateString): + '''Parse a string according to the OnBlog 8-bit date format''' + m = _korean_onblog_date_re.match(dateString) + if not m: return + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ + {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ + 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ + 'zonediff': '+09:00'} + if _debug: sys.stderr.write('OnBlog date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_onblog) + +def _parse_date_nate(dateString): + '''Parse a string according to the Nate 8-bit date format''' + m = _korean_nate_date_re.match(dateString) + if not m: return + hour = int(m.group(5)) + ampm = m.group(4) + if (ampm == _korean_pm): + hour += 12 + hour = str(hour) + if len(hour) == 1: + hour = '0' + hour + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ + {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ + 'hour': hour, 'minute': m.group(6), 'second': m.group(7),\ + 'zonediff': '+09:00'} + if _debug: sys.stderr.write('Nate date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_nate) + +_mssql_date_re = \ + re.compile('(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?') +def _parse_date_mssql(dateString): + '''Parse a string according to the MS SQL date format''' + m = _mssql_date_re.match(dateString) + if not m: return + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \ + {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\ + 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\ + 'zonediff': '+09:00'} + if _debug: sys.stderr.write('MS SQL date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_mssql) + +# Unicode strings for Greek date strings +_greek_months = \ + { \ + u'\u0399\u03b1\u03bd': u'Jan', # c9e1ed in iso-8859-7 + u'\u03a6\u03b5\u03b2': u'Feb', # d6e5e2 in iso-8859-7 + u'\u039c\u03ac\u03ce': u'Mar', # ccdcfe in iso-8859-7 + u'\u039c\u03b1\u03ce': u'Mar', # cce1fe in iso-8859-7 + u'\u0391\u03c0\u03c1': u'Apr', # c1f0f1 in iso-8859-7 + u'\u039c\u03ac\u03b9': u'May', # ccdce9 in iso-8859-7 + u'\u039c\u03b1\u03ca': u'May', # cce1fa in iso-8859-7 + u'\u039c\u03b1\u03b9': u'May', # cce1e9 in iso-8859-7 + u'\u0399\u03bf\u03cd\u03bd': u'Jun', # c9effded in iso-8859-7 + u'\u0399\u03bf\u03bd': u'Jun', # c9efed in iso-8859-7 + u'\u0399\u03bf\u03cd\u03bb': u'Jul', # c9effdeb in iso-8859-7 + u'\u0399\u03bf\u03bb': u'Jul', # c9f9eb in iso-8859-7 + u'\u0391\u03cd\u03b3': u'Aug', # c1fde3 in iso-8859-7 + u'\u0391\u03c5\u03b3': u'Aug', # c1f5e3 in iso-8859-7 + u'\u03a3\u03b5\u03c0': u'Sep', # d3e5f0 in iso-8859-7 + u'\u039f\u03ba\u03c4': u'Oct', # cfeaf4 in iso-8859-7 + u'\u039d\u03bf\u03ad': u'Nov', # cdefdd in iso-8859-7 + u'\u039d\u03bf\u03b5': u'Nov', # cdefe5 in iso-8859-7 + u'\u0394\u03b5\u03ba': u'Dec', # c4e5ea in iso-8859-7 + } + +_greek_wdays = \ + { \ + u'\u039a\u03c5\u03c1': u'Sun', # caf5f1 in iso-8859-7 + u'\u0394\u03b5\u03c5': u'Mon', # c4e5f5 in iso-8859-7 + u'\u03a4\u03c1\u03b9': u'Tue', # d4f1e9 in iso-8859-7 + u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7 + u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7 + u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7 + u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7 + } + +_greek_date_format_re = \ + re.compile(u'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)') + +def _parse_date_greek(dateString): + '''Parse a string according to a Greek 8-bit date format.''' + m = _greek_date_format_re.match(dateString) + if not m: return + try: + wday = _greek_wdays[m.group(1)] + month = _greek_months[m.group(3)] + except: + return + rfc822date = '%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)s' % \ + {'wday': wday, 'day': m.group(2), 'month': month, 'year': m.group(4),\ + 'hour': m.group(5), 'minute': m.group(6), 'second': m.group(7),\ + 'zonediff': m.group(8)} + if _debug: sys.stderr.write('Greek date parsed as: %s\n' % rfc822date) + return _parse_date_rfc822(rfc822date) +registerDateHandler(_parse_date_greek) + +# Unicode strings for Hungarian date strings +_hungarian_months = \ + { \ + u'janu\u00e1r': u'01', # e1 in iso-8859-2 + u'febru\u00e1ri': u'02', # e1 in iso-8859-2 + u'm\u00e1rcius': u'03', # e1 in iso-8859-2 + u'\u00e1prilis': u'04', # e1 in iso-8859-2 + u'm\u00e1ujus': u'05', # e1 in iso-8859-2 + u'j\u00fanius': u'06', # fa in iso-8859-2 + u'j\u00falius': u'07', # fa in iso-8859-2 + u'augusztus': u'08', + u'szeptember': u'09', + u'okt\u00f3ber': u'10', # f3 in iso-8859-2 + u'november': u'11', + u'december': u'12', + } + +_hungarian_date_format_re = \ + re.compile(u'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))') + +def _parse_date_hungarian(dateString): + '''Parse a string according to a Hungarian 8-bit date format.''' + m = _hungarian_date_format_re.match(dateString) + if not m: return + try: + month = _hungarian_months[m.group(2)] + day = m.group(3) + if len(day) == 1: + day = '0' + day + hour = m.group(4) + if len(hour) == 1: + hour = '0' + hour + except: + return + w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s%(zonediff)s' % \ + {'year': m.group(1), 'month': month, 'day': day,\ + 'hour': hour, 'minute': m.group(5),\ + 'zonediff': m.group(6)} + if _debug: sys.stderr.write('Hungarian date parsed as: %s\n' % w3dtfdate) + return _parse_date_w3dtf(w3dtfdate) +registerDateHandler(_parse_date_hungarian) + +# W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by +# Drake and licensed under the Python license. Removed all range checking +# for month, day, hour, minute, and second, since mktime will normalize +# these later +def _parse_date_w3dtf(dateString): + def __extract_date(m): + year = int(m.group('year')) + if year < 100: + year = 100 * int(time.gmtime()[0] / 100) + int(year) + if year < 1000: + return 0, 0, 0 + julian = m.group('julian') + if julian: + julian = int(julian) + month = julian / 30 + 1 + day = julian % 30 + 1 + jday = None + while jday != julian: + t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) + jday = time.gmtime(t)[-2] + diff = abs(jday - julian) + if jday > julian: + if diff < day: + day = day - diff + else: + month = month - 1 + day = 31 + elif jday < julian: + if day + diff < 28: + day = day + diff + else: + month = month + 1 + return year, month, day + month = m.group('month') + day = 1 + if month is None: + month = 1 + else: + month = int(month) + day = m.group('day') + if day: + day = int(day) + else: + day = 1 + return year, month, day + + def __extract_time(m): + if not m: + return 0, 0, 0 + hours = m.group('hours') + if not hours: + return 0, 0, 0 + hours = int(hours) + minutes = int(m.group('minutes')) + seconds = m.group('seconds') + if seconds: + seconds = int(seconds) + else: + seconds = 0 + return hours, minutes, seconds + + def __extract_tzd(m): + '''Return the Time Zone Designator as an offset in seconds from UTC.''' + if not m: + return 0 + tzd = m.group('tzd') + if not tzd: + return 0 + if tzd == 'Z': + return 0 + hours = int(m.group('tzdhours')) + minutes = m.group('tzdminutes') + if minutes: + minutes = int(minutes) + else: + minutes = 0 + offset = (hours*60 + minutes) * 60 + if tzd[0] == '+': + return -offset + return offset + + __date_re = ('(?P\d\d\d\d)' + '(?:(?P-|)' + '(?:(?P\d\d)(?:(?P=dsep)(?P\d\d))?' + '|(?P\d\d\d)))?') + __tzd_re = '(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)' + __tzd_rx = re.compile(__tzd_re) + __time_re = ('(?P\d\d)(?P:|)(?P\d\d)' + '(?:(?P=tsep)(?P\d\d)(?:[.,]\d+)?)?' + + __tzd_re) + __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) + __datetime_rx = re.compile(__datetime_re) + m = __datetime_rx.match(dateString) + if (m is None) or (m.group() != dateString): return + gmt = __extract_date(m) + __extract_time(m) + (0, 0, 0) + if gmt[0] == 0: return + return time.gmtime(time.mktime(gmt) + __extract_tzd(m) - time.timezone) +registerDateHandler(_parse_date_w3dtf) + +def _parse_date_rfc822(dateString): + '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' + data = dateString.split() + if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames: + del data[0] + if len(data) == 4: + s = data[3] + i = s.find('+') + if i > 0: + data[3:] = [s[:i], s[i+1:]] + else: + data.append('') + dateString = " ".join(data) + # Account for the Etc/GMT timezone by stripping 'Etc/' + elif len(data) == 5 and data[4].lower().startswith('etc/'): + data[4] = data[4][4:] + dateString = " ".join(data) + if len(data) < 5: + dateString += ' 00:00:00 GMT' + tm = rfc822.parsedate_tz(dateString) + if tm: + return time.gmtime(rfc822.mktime_tz(tm)) +# rfc822.py defines several time zones, but we define some extra ones. +# 'ET' is equivalent to 'EST', etc. +_additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -800} +rfc822._timezones.update(_additional_timezones) +registerDateHandler(_parse_date_rfc822) + +def _parse_date_perforce(aDateString): + """parse a date in yyyy/mm/dd hh:mm:ss TTT format""" + # Fri, 2006/09/15 08:19:53 EDT + _my_date_pattern = re.compile( \ + r'(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})') + + dow, year, month, day, hour, minute, second, tz = \ + _my_date_pattern.search(aDateString).groups() + months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + dateString = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz) + tm = rfc822.parsedate_tz(dateString) + if tm: + return time.gmtime(rfc822.mktime_tz(tm)) +registerDateHandler(_parse_date_perforce) + +def _parse_date(dateString): + '''Parses a variety of date formats into a 9-tuple in GMT''' + for handler in _date_handlers: + try: + date9tuple = handler(dateString) + if not date9tuple: continue + if len(date9tuple) != 9: + if _debug: sys.stderr.write('date handler function must return 9-tuple\n') + raise ValueError + map(int, date9tuple) + return date9tuple + except Exception, e: + if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e))) + pass + return None + +def _getCharacterEncoding(http_headers, xml_data): + '''Get the character encoding of the XML document + + http_headers is a dictionary + xml_data is a raw string (not Unicode) + + This is so much trickier than it sounds, it's not even funny. + According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type + is application/xml, application/*+xml, + application/xml-external-parsed-entity, or application/xml-dtd, + the encoding given in the charset parameter of the HTTP Content-Type + takes precedence over the encoding given in the XML prefix within the + document, and defaults to 'utf-8' if neither are specified. But, if + the HTTP Content-Type is text/xml, text/*+xml, or + text/xml-external-parsed-entity, the encoding given in the XML prefix + within the document is ALWAYS IGNORED and only the encoding given in + the charset parameter of the HTTP Content-Type header should be + respected, and it defaults to 'us-ascii' if not specified. + + Furthermore, discussion on the atom-syntax mailing list with the + author of RFC 3023 leads me to the conclusion that any document + served with a Content-Type of text/* and no charset parameter + must be treated as us-ascii. (We now do this.) And also that it + must always be flagged as non-well-formed. (We now do this too.) + + If Content-Type is unspecified (input was local file or non-HTTP source) + or unrecognized (server just got it totally wrong), then go by the + encoding given in the XML prefix of the document and default to + 'iso-8859-1' as per the HTTP specification (RFC 2616). + + Then, assuming we didn't find a character encoding in the HTTP headers + (and the HTTP Content-type allowed us to look in the body), we need + to sniff the first few bytes of the XML data and try to determine + whether the encoding is ASCII-compatible. Section F of the XML + specification shows the way here: + http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info + + If the sniffed encoding is not ASCII-compatible, we need to make it + ASCII compatible so that we can sniff further into the XML declaration + to find the encoding attribute, which will tell us the true encoding. + + Of course, none of this guarantees that we will be able to parse the + feed in the declared character encoding (assuming it was declared + correctly, which many are not). CJKCodecs and iconv_codec help a lot; + you should definitely install them if you can. + http://cjkpython.i18n.org/ + ''' + + def _parseHTTPContentType(content_type): + '''takes HTTP Content-Type header and returns (content type, charset) + + If no charset is specified, returns (content type, '') + If no content type is specified, returns ('', '') + Both return parameters are guaranteed to be lowercase strings + ''' + content_type = content_type or '' + content_type, params = cgi.parse_header(content_type) + return content_type, params.get('charset', '').replace("'", '') + + sniffed_xml_encoding = '' + xml_encoding = '' + true_encoding = '' + http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type', http_headers.get('Content-type'))) + # Must sniff for non-ASCII-compatible character encodings before + # searching for XML declaration. This heuristic is defined in + # section F of the XML specification: + # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info + try: + if xml_data[:4] == _l2bytes([0x4c, 0x6f, 0xa7, 0x94]): + # EBCDIC + xml_data = _ebcdic_to_ascii(xml_data) + elif xml_data[:4] == _l2bytes([0x00, 0x3c, 0x00, 0x3f]): + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xfe, 0xff])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x3f, 0x00]): + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == _l2bytes([0xff, 0xfe])) and (xml_data[2:4] != _l2bytes([0x00, 0x00])): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == _l2bytes([0x00, 0x00, 0x00, 0x3c]): + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == _l2bytes([0x3c, 0x00, 0x00, 0x00]): + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' + xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + else: + # ASCII-compatible + pass + xml_encoding_match = re.compile(_s2bytes('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')).match(xml_data) + except: + xml_encoding_match = None + if xml_encoding_match: + xml_encoding = xml_encoding_match.groups()[0].decode('utf-8').lower() + if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')): + xml_encoding = sniffed_xml_encoding + acceptable_content_type = 0 + application_content_types = ('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity') + text_content_types = ('text/xml', 'text/xml-external-parsed-entity') + if (http_content_type in application_content_types) or \ + (http_content_type.startswith('application/') and http_content_type.endswith('+xml')): + acceptable_content_type = 1 + true_encoding = http_encoding or xml_encoding or 'utf-8' + elif (http_content_type in text_content_types) or \ + (http_content_type.startswith('text/')) and http_content_type.endswith('+xml'): + acceptable_content_type = 1 + true_encoding = http_encoding or 'us-ascii' + elif http_content_type.startswith('text/'): + true_encoding = http_encoding or 'us-ascii' + elif http_headers and (not (http_headers.has_key('content-type') or http_headers.has_key('Content-type'))): + true_encoding = xml_encoding or 'iso-8859-1' + else: + true_encoding = xml_encoding or 'utf-8' + # some feeds claim to be gb2312 but are actually gb18030. + # apparently MSIE and Firefox both do the following switch: + if true_encoding.lower() == 'gb2312': + true_encoding = 'gb18030' + return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type + +def _toUTF8(data, encoding): + '''Changes an XML data stream on the fly to specify a new encoding + + data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already + encoding is a string recognized by encodings.aliases + ''' + if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding) + # strip Byte Order Mark (if present) + if (len(data) >= 4) and (data[:2] == _l2bytes([0xfe, 0xff])) and (data[2:4] != _l2bytes([0x00, 0x00])): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-16be': + sys.stderr.write('trying utf-16be instead\n') + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == _l2bytes([0xff, 0xfe])) and (data[2:4] != _l2bytes([0x00, 0x00])): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-16le': + sys.stderr.write('trying utf-16le instead\n') + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == _l2bytes([0xef, 0xbb, 0xbf]): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-8': + sys.stderr.write('trying utf-8 instead\n') + encoding = 'utf-8' + data = data[3:] + elif data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-32be': + sys.stderr.write('trying utf-32be instead\n') + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]): + if _debug: + sys.stderr.write('stripping BOM\n') + if encoding != 'utf-32le': + sys.stderr.write('trying utf-32le instead\n') + encoding = 'utf-32le' + data = data[4:] + newdata = unicode(data, encoding) + if _debug: sys.stderr.write('successfully converted %s data to unicode\n' % encoding) + declmatch = re.compile('^<\?xml[^>]*?>') + newdecl = '''''' + if declmatch.search(newdata): + newdata = declmatch.sub(newdecl, newdata) + else: + newdata = newdecl + u'\n' + newdata + return newdata.encode('utf-8') + +def _stripDoctype(data): + '''Strips DOCTYPE from XML document, returns (rss_version, stripped_data) + + rss_version may be 'rss091n' or None + stripped_data is the same XML document, minus the DOCTYPE + ''' + start = re.search(_s2bytes('<\w'), data) + start = start and start.start() or -1 + head,data = data[:start+1], data[start+1:] + + entity_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE) + entity_results=entity_pattern.findall(head) + head = entity_pattern.sub(_s2bytes(''), head) + doctype_pattern = re.compile(_s2bytes(r'^\s*]*?)>'), re.MULTILINE) + doctype_results = doctype_pattern.findall(head) + doctype = doctype_results and doctype_results[0] or _s2bytes('') + if doctype.lower().count(_s2bytes('netscape')): + version = 'rss091n' + else: + version = None + + # only allow in 'safe' inline entity definitions + replacement=_s2bytes('') + if len(doctype_results)==1 and entity_results: + safe_pattern=re.compile(_s2bytes('\s+(\w+)\s+"(&#\w+;|[^&"]*)"')) + safe_entities=filter(lambda e: safe_pattern.match(e),entity_results) + if safe_entities: + replacement=_s2bytes('\n \n]>') + data = doctype_pattern.sub(replacement, head) + data + + return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)]) + +def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}): + '''Parse a feed from a URL, file, stream, or string. + + request_headers, if given, is a dict from http header name to value to add + to the request; this overrides internally generated values. + ''' + result = FeedParserDict() + result['feed'] = FeedParserDict() + result['entries'] = [] + if _XML_AVAILABLE: + result['bozo'] = 0 + if not isinstance(handlers, list): + handlers = [handlers] + try: + f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers) + data = f.read() + except Exception, e: + result['bozo'] = 1 + result['bozo_exception'] = e + data = None + f = None + + if hasattr(f, 'headers'): + result['headers'] = dict(f.headers) + # overwrite existing headers using response_headers + if 'headers' in result: + result['headers'].update(response_headers) + elif response_headers: + result['headers'] = copy.deepcopy(response_headers) + + # if feed is gzip-compressed, decompress it + if f and data and 'headers' in result: + if gzip and result['headers'].get('content-encoding') == 'gzip': + try: + data = gzip.GzipFile(fileobj=_StringIO(data)).read() + except Exception, e: + # Some feeds claim to be gzipped but they're not, so + # we get garbage. Ideally, we should re-request the + # feed without the 'Accept-encoding: gzip' header, + # but we don't. + result['bozo'] = 1 + result['bozo_exception'] = e + data = '' + elif zlib and result['headers'].get('content-encoding') == 'deflate': + try: + data = zlib.decompress(data, -zlib.MAX_WBITS) + except Exception, e: + result['bozo'] = 1 + result['bozo_exception'] = e + data = '' + + # save HTTP headers + if 'headers' in result: + if 'etag' in result['headers'] or 'ETag' in result['headers']: + etag = result['headers'].get('etag', result['headers'].get('ETag')) + if etag: + result['etag'] = etag + if 'last-modified' in result['headers'] or 'Last-Modified' in result['headers']: + modified = result['headers'].get('last-modified', result['headers'].get('Last-Modified')) + if modified: + result['modified'] = _parse_date(modified) + if hasattr(f, 'url'): + result['href'] = f.url + result['status'] = 200 + if hasattr(f, 'status'): + result['status'] = f.status + if hasattr(f, 'close'): + f.close() + + # there are four encodings to keep track of: + # - http_encoding is the encoding declared in the Content-Type HTTP header + # - xml_encoding is the encoding declared in the }, L{Release }, or + L{Track } + + 2. L{webservice}: An interface to the MusicBrainz XML web service. + + 3. L{wsxml}: A parser for the web service XML format (MMD). + + 4. L{disc}: Functions for creating and submitting DiscIDs. + + 5. L{utils}: Utilities for working with URIs and other commonly needed tools. + +@author: Matthias Friedrich +""" +__revision__ = '$Id: __init__.py 12974 2011-05-01 08:43:54Z luks $' +__version__ = '0.7.3' + +# EOF diff --git a/lib/musicbrainz2/data/__init__.py b/lib/musicbrainz2/data/__init__.py new file mode 100644 index 00000000..3067fabc --- /dev/null +++ b/lib/musicbrainz2/data/__init__.py @@ -0,0 +1,10 @@ +"""Support data for the musicbrainz2 package. + +This package is I{not} part of the public API, it has been added to work +around shortcomings in python and may thus be removed at any time. + +Please use the L{musicbrainz2.utils} module instead. +""" +__revision__ = '$Id: __init__.py 7386 2006-04-30 11:12:55Z matt $' + +# EOF diff --git a/lib/musicbrainz2/data/countrynames.py b/lib/musicbrainz2/data/countrynames.py new file mode 100644 index 00000000..7c4ab023 --- /dev/null +++ b/lib/musicbrainz2/data/countrynames.py @@ -0,0 +1,253 @@ +# -*- coding: utf-8 -*- + +__revision__ = '$Id: countrynames.py 7386 2006-04-30 11:12:55Z matt $' + +countryNames = { + u'BD': u'Bangladesh', + u'BE': u'Belgium', + u'BF': u'Burkina Faso', + u'BG': u'Bulgaria', + u'BB': u'Barbados', + u'WF': u'Wallis and Futuna Islands', + u'BM': u'Bermuda', + u'BN': u'Brunei Darussalam', + u'BO': u'Bolivia', + u'BH': u'Bahrain', + u'BI': u'Burundi', + u'BJ': u'Benin', + u'BT': u'Bhutan', + u'JM': u'Jamaica', + u'BV': u'Bouvet Island', + u'BW': u'Botswana', + u'WS': u'Samoa', + u'BR': u'Brazil', + u'BS': u'Bahamas', + u'BY': u'Belarus', + u'BZ': u'Belize', + u'RU': u'Russian Federation', + u'RW': u'Rwanda', + u'RE': u'Reunion', + u'TM': u'Turkmenistan', + u'TJ': u'Tajikistan', + u'RO': u'Romania', + u'TK': u'Tokelau', + u'GW': u'Guinea-Bissau', + u'GU': u'Guam', + u'GT': u'Guatemala', + u'GR': u'Greece', + u'GQ': u'Equatorial Guinea', + u'GP': u'Guadeloupe', + u'JP': u'Japan', + u'GY': u'Guyana', + u'GF': u'French Guiana', + u'GE': u'Georgia', + u'GD': u'Grenada', + u'GB': u'United Kingdom', + u'GA': u'Gabon', + u'SV': u'El Salvador', + u'GN': u'Guinea', + u'GM': u'Gambia', + u'GL': u'Greenland', + u'GI': u'Gibraltar', + u'GH': u'Ghana', + u'OM': u'Oman', + u'TN': u'Tunisia', + u'JO': u'Jordan', + u'HT': u'Haiti', + u'HU': u'Hungary', + u'HK': u'Hong Kong', + u'HN': u'Honduras', + u'HM': u'Heard and Mc Donald Islands', + u'VE': u'Venezuela', + u'PR': u'Puerto Rico', + u'PW': u'Palau', + u'PT': u'Portugal', + u'SJ': u'Svalbard and Jan Mayen Islands', + u'PY': u'Paraguay', + u'IQ': u'Iraq', + u'PA': u'Panama', + u'PF': u'French Polynesia', + u'PG': u'Papua New Guinea', + u'PE': u'Peru', + u'PK': u'Pakistan', + u'PH': u'Philippines', + u'PN': u'Pitcairn', + u'PL': u'Poland', + u'PM': u'St. Pierre and Miquelon', + u'ZM': u'Zambia', + u'EH': u'Western Sahara', + u'EE': u'Estonia', + u'EG': u'Egypt', + u'ZA': u'South Africa', + u'EC': u'Ecuador', + u'IT': u'Italy', + u'VN': u'Viet Nam', + u'SB': u'Solomon Islands', + u'ET': u'Ethiopia', + u'SO': u'Somalia', + u'ZW': u'Zimbabwe', + u'SA': u'Saudi Arabia', + u'ES': u'Spain', + u'ER': u'Eritrea', + u'MD': u'Moldova, Republic of', + u'MG': u'Madagascar', + u'MA': u'Morocco', + u'MC': u'Monaco', + u'UZ': u'Uzbekistan', + u'MM': u'Myanmar', + u'ML': u'Mali', + u'MO': u'Macau', + u'MN': u'Mongolia', + u'MH': u'Marshall Islands', + u'MK': u'Macedonia, The Former Yugoslav Republic of', + u'MU': u'Mauritius', + u'MT': u'Malta', + u'MW': u'Malawi', + u'MV': u'Maldives', + u'MQ': u'Martinique', + u'MP': u'Northern Mariana Islands', + u'MS': u'Montserrat', + u'MR': u'Mauritania', + u'UG': u'Uganda', + u'MY': u'Malaysia', + u'MX': u'Mexico', + u'IL': u'Israel', + u'FR': u'France', + u'IO': u'British Indian Ocean Territory', + u'SH': u'St. Helena', + u'FI': u'Finland', + u'FJ': u'Fiji', + u'FK': u'Falkland Islands (Malvinas)', + u'FM': u'Micronesia, Federated States of', + u'FO': u'Faroe Islands', + u'NI': u'Nicaragua', + u'NL': u'Netherlands', + u'NO': u'Norway', + u'NA': u'Namibia', + u'VU': u'Vanuatu', + u'NC': u'New Caledonia', + u'NE': u'Niger', + u'NF': u'Norfolk Island', + u'NG': u'Nigeria', + u'NZ': u'New Zealand', + u'ZR': u'Zaire', + u'NP': u'Nepal', + u'NR': u'Nauru', + u'NU': u'Niue', + u'CK': u'Cook Islands', + u'CI': u'Cote d\'Ivoire', + u'CH': u'Switzerland', + u'CO': u'Colombia', + u'CN': u'China', + u'CM': u'Cameroon', + u'CL': u'Chile', + u'CC': u'Cocos (Keeling) Islands', + u'CA': u'Canada', + u'CG': u'Congo', + u'CF': u'Central African Republic', + u'CZ': u'Czech Republic', + u'CY': u'Cyprus', + u'CX': u'Christmas Island', + u'CR': u'Costa Rica', + u'CV': u'Cape Verde', + u'CU': u'Cuba', + u'SZ': u'Swaziland', + u'SY': u'Syrian Arab Republic', + u'KG': u'Kyrgyzstan', + u'KE': u'Kenya', + u'SR': u'Suriname', + u'KI': u'Kiribati', + u'KH': u'Cambodia', + u'KN': u'Saint Kitts and Nevis', + u'KM': u'Comoros', + u'ST': u'Sao Tome and Principe', + u'SI': u'Slovenia', + u'KW': u'Kuwait', + u'SN': u'Senegal', + u'SM': u'San Marino', + u'SL': u'Sierra Leone', + u'SC': u'Seychelles', + u'KZ': u'Kazakhstan', + u'KY': u'Cayman Islands', + u'SG': u'Singapore', + u'SE': u'Sweden', + u'SD': u'Sudan', + u'DO': u'Dominican Republic', + u'DM': u'Dominica', + u'DJ': u'Djibouti', + u'DK': u'Denmark', + u'VG': u'Virgin Islands (British)', + u'DE': u'Germany', + u'YE': u'Yemen', + u'DZ': u'Algeria', + u'US': u'United States', + u'UY': u'Uruguay', + u'YT': u'Mayotte', + u'UM': u'United States Minor Outlying Islands', + u'LB': u'Lebanon', + u'LC': u'Saint Lucia', + u'LA': u'Lao People\'s Democratic Republic', + u'TV': u'Tuvalu', + u'TW': u'Taiwan', + u'TT': u'Trinidad and Tobago', + u'TR': u'Turkey', + u'LK': u'Sri Lanka', + u'LI': u'Liechtenstein', + u'LV': u'Latvia', + u'TO': u'Tonga', + u'LT': u'Lithuania', + u'LU': u'Luxembourg', + u'LR': u'Liberia', + u'LS': u'Lesotho', + u'TH': u'Thailand', + u'TF': u'French Southern Territories', + u'TG': u'Togo', + u'TD': u'Chad', + u'TC': u'Turks and Caicos Islands', + u'LY': u'Libyan Arab Jamahiriya', + u'VA': u'Vatican City State (Holy See)', + u'VC': u'Saint Vincent and The Grenadines', + u'AE': u'United Arab Emirates', + u'AD': u'Andorra', + u'AG': u'Antigua and Barbuda', + u'AF': u'Afghanistan', + u'AI': u'Anguilla', + u'VI': u'Virgin Islands (U.S.)', + u'IS': u'Iceland', + u'IR': u'Iran (Islamic Republic of)', + u'AM': u'Armenia', + u'AL': u'Albania', + u'AO': u'Angola', + u'AN': u'Netherlands Antilles', + u'AQ': u'Antarctica', + u'AS': u'American Samoa', + u'AR': u'Argentina', + u'AU': u'Australia', + u'AT': u'Austria', + u'AW': u'Aruba', + u'IN': u'India', + u'TZ': u'Tanzania, United Republic of', + u'AZ': u'Azerbaijan', + u'IE': u'Ireland', + u'ID': u'Indonesia', + u'UA': u'Ukraine', + u'QA': u'Qatar', + u'MZ': u'Mozambique', + u'BA': u'Bosnia and Herzegovina', + u'CD': u'Congo, The Democratic Republic of the', + u'CS': u'Serbia and Montenegro', + u'HR': u'Croatia', + u'KP': u'Korea (North), Democratic People\'s Republic of', + u'KR': u'Korea (South), Republic of', + u'SK': u'Slovakia', + u'SU': u'Soviet Union (historical, 1922-1991)', + u'TL': u'East Timor', + u'XC': u'Czechoslovakia (historical, 1918-1992)', + u'XE': u'Europe', + u'XG': u'East Germany (historical, 1949-1990)', + u'XU': u'[Unknown Country]', + u'XW': u'[Worldwide]', + u'YU': u'Yugoslavia (historical, 1918-1992)', +} + +# EOF diff --git a/lib/musicbrainz2/data/languagenames.py b/lib/musicbrainz2/data/languagenames.py new file mode 100644 index 00000000..7f4252dc --- /dev/null +++ b/lib/musicbrainz2/data/languagenames.py @@ -0,0 +1,400 @@ +# -*- coding: utf-8 -*- + +__revision__ = '$Id: languagenames.py 8725 2006-12-17 22:39:07Z luks $' + +languageNames = { + u'ART': u'Artificial (Other)', + u'ROH': u'Raeto-Romance', + u'SCO': u'Scots', + u'SCN': u'Sicilian', + u'ROM': u'Romany', + u'RON': u'Romanian', + u'OSS': u'Ossetian; Ossetic', + u'ALE': u'Aleut', + u'MNI': u'Manipuri', + u'NWC': u'Classical Newari; Old Newari; Classical Nepal Bhasa', + u'OSA': u'Osage', + u'MNC': u'Manchu', + u'MWR': u'Marwari', + u'VEN': u'Venda', + u'MWL': u'Mirandese', + u'FAS': u'Persian', + u'FAT': u'Fanti', + u'FAN': u'Fang', + u'FAO': u'Faroese', + u'DIN': u'Dinka', + u'HYE': u'Armenian', + u'DSB': u'Lower Sorbian', + u'CAR': u'Carib', + u'DIV': u'Divehi', + u'TEL': u'Telugu', + u'TEM': u'Timne', + u'NBL': u'Ndebele, South; South Ndebele', + u'TER': u'Tereno', + u'TET': u'Tetum', + u'SUN': u'Sundanese', + u'KUT': u'Kutenai', + u'SUK': u'Sukuma', + u'KUR': u'Kurdish', + u'KUM': u'Kumyk', + u'SUS': u'Susu', + u'NEW': u'Newari; Nepal Bhasa', + u'KUA': u'Kuanyama; Kwanyama', + u'MEN': u'Mende', + u'LEZ': u'Lezghian', + u'GLA': u'Gaelic; Scottish Gaelic', + u'BOS': u'Bosnian', + u'GLE': u'Irish', + u'EKA': u'Ekajuk', + u'GLG': u'Gallegan', + u'AKA': u'Akan', + u'BOD': u'Tibetan', + u'GLV': u'Manx', + u'JRB': u'Judeo-Arabic', + u'VIE': u'Vietnamese', + u'IPK': u'Inupiaq', + u'UZB': u'Uzbek', + u'BRE': u'Breton', + u'BRA': u'Braj', + u'AYM': u'Aymara', + u'CHA': u'Chamorro', + u'CHB': u'Chibcha', + u'CHE': u'Chechen', + u'CHG': u'Chagatai', + u'CHK': u'Chuukese', + u'CHM': u'Mari', + u'CHN': u'Chinook jargon', + u'CHO': u'Choctaw', + u'CHP': u'Chipewyan', + u'CHR': u'Cherokee', + u'CHU': u'Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic', + u'CHV': u'Chuvash', + u'CHY': u'Cheyenne', + u'MSA': u'Malay', + u'III': u'Sichuan Yi', + u'ACE': u'Achinese', + u'IBO': u'Igbo', + u'IBA': u'Iban', + u'XHO': u'Xhosa', + u'DEU': u'German', + u'CAT': u'Catalan; Valencian', + u'DEL': u'Delaware', + u'DEN': u'Slave (Athapascan)', + u'CAD': u'Caddo', + u'TAT': u'Tatar', + u'RAJ': u'Rajasthani', + u'SPA': u'Spanish; Castilian', + u'TAM': u'Tamil', + u'TAH': u'Tahitian', + u'AFH': u'Afrihili', + u'ENG': u'English', + u'CSB': u'Kashubian', + u'NYN': u'Nyankole', + u'NYO': u'Nyoro', + u'SID': u'Sidamo', + u'NYA': u'Chichewa; Chewa; Nyanja', + u'SIN': u'Sinhala; Sinhalese', + u'AFR': u'Afrikaans', + u'LAM': u'Lamba', + u'SND': u'Sindhi', + u'MAR': u'Marathi', + u'LAH': u'Lahnda', + u'NYM': u'Nyamwezi', + u'SNA': u'Shona', + u'LAD': u'Ladino', + u'SNK': u'Soninke', + u'MAD': u'Madurese', + u'MAG': u'Magahi', + u'MAI': u'Maithili', + u'MAH': u'Marshallese', + u'LAV': u'Latvian', + u'MAL': u'Malayalam', + u'MAN': u'Mandingo', + u'ZND': u'Zande', + u'ZEN': u'Zenaga', + u'KBD': u'Kabardian', + u'ITA': u'Italian', + u'VAI': u'Vai', + u'TSN': u'Tswana', + u'TSO': u'Tsonga', + u'TSI': u'Tsimshian', + u'BYN': u'Blin; Bilin', + u'FIJ': u'Fijian', + u'FIN': u'Finnish', + u'EUS': u'Basque', + u'CEB': u'Cebuano', + u'DAN': u'Danish', + u'NOG': u'Nogai', + u'NOB': u'Norwegian BokmÃ¥l; BokmÃ¥l, Norwegian', + u'DAK': u'Dakota', + u'CES': u'Czech', + u'DAR': u'Dargwa', + u'DAY': u'Dayak', + u'NOR': u'Norwegian', + u'KPE': u'Kpelle', + u'GUJ': u'Gujarati', + u'MDF': u'Moksha', + u'MAS': u'Masai', + u'LAO': u'Lao', + u'MDR': u'Mandar', + u'GON': u'Gondi', + u'SMS': u'Skolt Sami', + u'SMO': u'Samoan', + u'SMN': u'Inari Sami', + u'SMJ': u'Lule Sami', + u'GOT': u'Gothic', + u'SME': u'Northern Sami', + u'BLA': u'Siksika', + u'SMA': u'Southern Sami', + u'GOR': u'Gorontalo', + u'AST': u'Asturian; Bable', + u'ORM': u'Oromo', + u'QUE': u'Quechua', + u'ORI': u'Oriya', + u'CRH': u'Crimean Tatar; Crimean Turkish', + u'ASM': u'Assamese', + u'PUS': u'Pushto', + u'DGR': u'Dogrib', + u'LTZ': u'Luxembourgish; Letzeburgesch', + u'NDO': u'Ndonga', + u'GEZ': u'Geez', + u'ISL': u'Icelandic', + u'LAT': u'Latin', + u'MAK': u'Makasar', + u'ZAP': u'Zapotec', + u'YID': u'Yiddish', + u'KOK': u'Konkani', + u'KOM': u'Komi', + u'KON': u'Kongo', + u'UKR': u'Ukrainian', + u'TON': u'Tonga (Tonga Islands)', + u'KOS': u'Kosraean', + u'KOR': u'Korean', + u'TOG': u'Tonga (Nyasa)', + u'HUN': u'Hungarian', + u'HUP': u'Hupa', + u'CYM': u'Welsh', + u'UDM': u'Udmurt', + u'BEJ': u'Beja', + u'BEN': u'Bengali', + u'BEL': u'Belarusian', + u'BEM': u'Bemba', + u'AAR': u'Afar', + u'NZI': u'Nzima', + u'SAH': u'Yakut', + u'SAN': u'Sanskrit', + u'SAM': u'Samaritan Aramaic', + u'SAG': u'Sango', + u'SAD': u'Sandawe', + u'RAR': u'Rarotongan', + u'RAP': u'Rapanui', + u'SAS': u'Sasak', + u'SAT': u'Santali', + u'MIN': u'Minangkabau', + u'LIM': u'Limburgan; Limburger; Limburgish', + u'LIN': u'Lingala', + u'LIT': u'Lithuanian', + u'EFI': u'Efik', + u'BTK': u'Batak (Indonesia)', + u'KAC': u'Kachin', + u'KAB': u'Kabyle', + u'KAA': u'Kara-Kalpak', + u'KAN': u'Kannada', + u'KAM': u'Kamba', + u'KAL': u'Kalaallisut; Greenlandic', + u'KAS': u'Kashmiri', + u'KAR': u'Karen', + u'KAU': u'Kanuri', + u'KAT': u'Georgian', + u'KAZ': u'Kazakh', + u'TYV': u'Tuvinian', + u'AWA': u'Awadhi', + u'URD': u'Urdu', + u'DOI': u'Dogri', + u'TPI': u'Tok Pisin', + u'MRI': u'Maori', + u'ABK': u'Abkhazian', + u'TKL': u'Tokelau', + u'NLD': u'Dutch; Flemish', + u'OJI': u'Ojibwa', + u'OCI': u'Occitan (post 1500); Provençal', + u'WOL': u'Wolof', + u'JAV': u'Javanese', + u'HRV': u'Croatian', + u'DYU': u'Dyula', + u'SSW': u'Swati', + u'MUL': u'Multiple languages', + u'HIL': u'Hiligaynon', + u'HIM': u'Himachali', + u'HIN': u'Hindi', + u'BAS': u'Basa', + u'GBA': u'Gbaya', + u'WLN': u'Walloon', + u'BAD': u'Banda', + u'NEP': u'Nepali', + u'CRE': u'Cree', + u'BAN': u'Balinese', + u'BAL': u'Baluchi', + u'BAM': u'Bambara', + u'BAK': u'Bashkir', + u'SHN': u'Shan', + u'ARP': u'Arapaho', + u'ARW': u'Arawak', + u'ARA': u'Arabic', + u'ARC': u'Aramaic', + u'ARG': u'Aragonese', + u'SEL': u'Selkup', + u'ARN': u'Araucanian', + u'LUS': u'Lushai', + u'MUS': u'Creek', + u'LUA': u'Luba-Lulua', + u'LUB': u'Luba-Katanga', + u'LUG': u'Ganda', + u'LUI': u'Luiseno', + u'LUN': u'Lunda', + u'LUO': u'Luo (Kenya and Tanzania)', + u'IKU': u'Inuktitut', + u'TUR': u'Turkish', + u'TUK': u'Turkmen', + u'TUM': u'Tumbuka', + u'COP': u'Coptic', + u'COS': u'Corsican', + u'COR': u'Cornish', + u'ILO': u'Iloko', + u'GWI': u'Gwich´in', + u'TLI': u'Tlingit', + u'TLH': u'Klingon; tlhIngan-Hol', + u'POR': u'Portuguese', + u'PON': u'Pohnpeian', + u'POL': u'Polish', + u'TGK': u'Tajik', + u'TGL': u'Tagalog', + u'FRA': u'French', + u'BHO': u'Bhojpuri', + u'SWA': u'Swahili', + u'DUA': u'Duala', + u'SWE': u'Swedish', + u'YAP': u'Yapese', + u'TIV': u'Tiv', + u'YAO': u'Yao', + u'XAL': u'Kalmyk', + u'FRY': u'Frisian', + u'GAY': u'Gayo', + u'OTA': u'Turkish, Ottoman (1500-1928)', + u'HMN': u'Hmong', + u'HMO': u'Hiri Motu', + u'GAA': u'Ga', + u'FUR': u'Friulian', + u'MLG': u'Malagasy', + u'SLV': u'Slovenian', + u'FIL': u'Filipino; Pilipino', + u'MLT': u'Maltese', + u'SLK': u'Slovak', + u'FUL': u'Fulah', + u'JPN': u'Japanese', + u'VOL': u'Volapük', + u'VOT': u'Votic', + u'IND': u'Indonesian', + u'AVE': u'Avestan', + u'JPR': u'Judeo-Persian', + u'AVA': u'Avaric', + u'PAP': u'Papiamento', + u'EWO': u'Ewondo', + u'PAU': u'Palauan', + u'EWE': u'Ewe', + u'PAG': u'Pangasinan', + u'PAM': u'Pampanga', + u'PAN': u'Panjabi; Punjabi', + u'KIR': u'Kirghiz', + u'NIA': u'Nias', + u'KIK': u'Kikuyu; Gikuyu', + u'SYR': u'Syriac', + u'KIN': u'Kinyarwanda', + u'NIU': u'Niuean', + u'EPO': u'Esperanto', + u'JBO': u'Lojban', + u'MIC': u'Mi\'kmaq; Micmac', + u'THA': u'Thai', + u'HAI': u'Haida', + u'ELL': u'Greek, Modern (1453-)', + u'ADY': u'Adyghe; Adygei', + u'ELX': u'Elamite', + u'ADA': u'Adangme', + u'GRB': u'Grebo', + u'HAT': u'Haitian; Haitian Creole', + u'HAU': u'Hausa', + u'HAW': u'Hawaiian', + u'BIN': u'Bini', + u'AMH': u'Amharic', + u'BIK': u'Bikol', + u'BIH': u'Bihari', + u'MOS': u'Mossi', + u'MOH': u'Mohawk', + u'MON': u'Mongolian', + u'MOL': u'Moldavian', + u'BIS': u'Bislama', + u'TVL': u'Tuvalu', + u'IJO': u'Ijo', + u'EST': u'Estonian', + u'KMB': u'Kimbundu', + u'UMB': u'Umbundu', + u'TMH': u'Tamashek', + u'FON': u'Fon', + u'HSB': u'Upper Sorbian', + u'RUN': u'Rundi', + u'RUS': u'Russian', + u'PLI': u'Pali', + u'SRD': u'Sardinian', + u'ACH': u'Acoli', + u'NDE': u'Ndebele, North; North Ndebele', + u'DZO': u'Dzongkha', + u'KRU': u'Kurukh', + u'SRR': u'Serer', + u'IDO': u'Ido', + u'SRP': u'Serbian', + u'KRO': u'Kru', + u'KRC': u'Karachay-Balkar', + u'NDS': u'Low German; Low Saxon; German, Low; Saxon, Low', + u'ZUN': u'Zuni', + u'ZUL': u'Zulu', + u'TWI': u'Twi', + u'NSO': u'Northern Sotho, Pedi; Sepedi', + u'SOM': u'Somali', + u'SON': u'Songhai', + u'SOT': u'Sotho, Southern', + u'MKD': u'Macedonian', + u'HER': u'Herero', + u'LOL': u'Mongo', + u'HEB': u'Hebrew', + u'LOZ': u'Lozi', + u'GIL': u'Gilbertese', + u'WAS': u'Washo', + u'WAR': u'Waray', + u'BUL': u'Bulgarian', + u'WAL': u'Walamo', + u'BUA': u'Buriat', + u'BUG': u'Buginese', + u'AZE': u'Azerbaijani', + u'ZHA': u'Zhuang; Chuang', + u'ZHO': u'Chinese', + u'NNO': u'Norwegian Nynorsk; Nynorsk, Norwegian', + u'UIG': u'Uighur; Uyghur', + u'MYV': u'Erzya', + u'INH': u'Ingush', + u'KHM': u'Khmer', + u'MYA': u'Burmese', + u'KHA': u'Khasi', + u'INA': u'Interlingua (International Auxiliary Language Association)', + u'NAH': u'Nahuatl', + u'TIR': u'Tigrinya', + u'NAP': u'Neapolitan', + u'NAV': u'Navajo; Navaho', + u'NAU': u'Nauru', + u'GRN': u'Guarani', + u'TIG': u'Tigre', + u'YOR': u'Yoruba', + u'ILE': u'Interlingue', + u'SQI': u'Albanian', +} + +# EOF diff --git a/lib/musicbrainz2/data/releasetypenames.py b/lib/musicbrainz2/data/releasetypenames.py new file mode 100644 index 00000000..f16ed19e --- /dev/null +++ b/lib/musicbrainz2/data/releasetypenames.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +__revision__ = '$Id: releasetypenames.py 8728 2006-12-17 23:42:30Z luks $' + +releaseTypeNames = { + u'http://musicbrainz.org/ns/mmd-1.0#None': u'None', + u'http://musicbrainz.org/ns/mmd-1.0#Album': u'Album', + u'http://musicbrainz.org/ns/mmd-1.0#Single': u'Single', + u'http://musicbrainz.org/ns/mmd-1.0#EP': u'EP', + u'http://musicbrainz.org/ns/mmd-1.0#Compilation': u'Compilation', + u'http://musicbrainz.org/ns/mmd-1.0#Soundtrack': u'Soundtrack', + u'http://musicbrainz.org/ns/mmd-1.0#Spokenword': u'Spokenword', + u'http://musicbrainz.org/ns/mmd-1.0#Interview': u'Interview', + u'http://musicbrainz.org/ns/mmd-1.0#Audiobook': u'Audiobook', + u'http://musicbrainz.org/ns/mmd-1.0#Live': u'Live', + u'http://musicbrainz.org/ns/mmd-1.0#Remix': u'Remix', + u'http://musicbrainz.org/ns/mmd-1.0#Other': u'Other', + u'http://musicbrainz.org/ns/mmd-1.0#Official': u'Official', + u'http://musicbrainz.org/ns/mmd-1.0#Promotion': u'Promotion', + u'http://musicbrainz.org/ns/mmd-1.0#Bootleg': u'Bootleg', + u'http://musicbrainz.org/ns/mmd-1.0#Pseudo-Release': u'Pseudo-Release', +} + +# EOF diff --git a/lib/musicbrainz2/data/scriptnames.py b/lib/musicbrainz2/data/scriptnames.py new file mode 100644 index 00000000..30a55bd7 --- /dev/null +++ b/lib/musicbrainz2/data/scriptnames.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- + +__revision__ = '$Id: scriptnames.py 7386 2006-04-30 11:12:55Z matt $' + +scriptNames = { + u'Yiii': u'Yi', + u'Telu': u'Telugu', + u'Taml': u'Tamil', + u'Guru': u'Gurmukhi', + u'Hebr': u'Hebrew', + u'Geor': u'Georgian (Mkhedruli)', + u'Ugar': u'Ugaritic', + u'Cyrl': u'Cyrillic', + u'Hrkt': u'Kanji & Kana', + u'Armn': u'Armenian', + u'Runr': u'Runic', + u'Khmr': u'Khmer', + u'Latn': u'Latin', + u'Hani': u'Han (Hanzi, Kanji, Hanja)', + u'Ital': u'Old Italic (Etruscan, Oscan, etc.)', + u'Hano': u'Hanunoo (Hanunóo)', + u'Ethi': u'Ethiopic (Ge\'ez)', + u'Gujr': u'Gujarati', + u'Hang': u'Hangul', + u'Arab': u'Arabic', + u'Thaa': u'Thaana', + u'Buhd': u'Buhid', + u'Sinh': u'Sinhala', + u'Orya': u'Oriya', + u'Hans': u'Han (Simplified variant)', + u'Thai': u'Thai', + u'Cprt': u'Cypriot', + u'Linb': u'Linear B', + u'Hant': u'Han (Traditional variant)', + u'Osma': u'Osmanya', + u'Mong': u'Mongolian', + u'Deva': u'Devanagari (Nagari)', + u'Laoo': u'Lao', + u'Tagb': u'Tagbanwa', + u'Hira': u'Hiragana', + u'Bopo': u'Bopomofo', + u'Goth': u'Gothic', + u'Tale': u'Tai Le', + u'Mymr': u'Myanmar (Burmese)', + u'Tglg': u'Tagalog', + u'Grek': u'Greek', + u'Mlym': u'Malayalam', + u'Cher': u'Cherokee', + u'Tibt': u'Tibetan', + u'Kana': u'Katakana', + u'Syrc': u'Syriac', + u'Cans': u'Unified Canadian Aboriginal Syllabics', + u'Beng': u'Bengali', + u'Limb': u'Limbu', + u'Ogam': u'Ogham', + u'Knda': u'Kannada', +} + +# EOF diff --git a/lib/musicbrainz2/disc.py b/lib/musicbrainz2/disc.py new file mode 100644 index 00000000..871c6084 --- /dev/null +++ b/lib/musicbrainz2/disc.py @@ -0,0 +1,221 @@ +"""Utilities for working with Audio CDs. + +This module contains utilities for working with Audio CDs. + +The functions in this module need both a working ctypes package (already +included in python-2.5) and an installed libdiscid. If you don't have +libdiscid, it can't be loaded, or your platform isn't supported by either +ctypes or this module, a C{NotImplementedError} is raised when using the +L{readDisc()} function. + +@author: Matthias Friedrich +""" +__revision__ = '$Id: disc.py 11987 2009-08-22 11:57:51Z matt $' + +import sys +import urllib +import urlparse +import ctypes +import ctypes.util +from lib.musicbrainz2.model import Disc + +__all__ = [ 'DiscError', 'readDisc', 'getSubmissionUrl' ] + + +class DiscError(IOError): + """The Audio CD could not be read. + + This may be simply because no disc was in the drive, the device name + was wrong or the disc can't be read. Reading errors can occur in case + of a damaged disc or a copy protection mechanism, for example. + """ + pass + + +def _openLibrary(): + """Tries to open libdiscid. + + @return: a C{ctypes.CDLL} object, representing the opened library + + @raise NotImplementedError: if the library can't be opened + """ + # This only works for ctypes >= 0.9.9.3. Any libdiscid is found, + # no matter how it's called on this platform. + try: + if hasattr(ctypes.cdll, 'find'): + libDiscId = ctypes.cdll.find('discid') + _setPrototypes(libDiscId) + return libDiscId + except OSError, e: + raise NotImplementedError('Error opening library: ' + str(e)) + + # Try to find the library using ctypes.util + libName = ctypes.util.find_library('discid') + if libName != None: + try: + libDiscId = ctypes.cdll.LoadLibrary(libName) + _setPrototypes(libDiscId) + return libDiscId + except OSError, e: + raise NotImplementedError('Error opening library: ' + + str(e)) + + # For compatibility with ctypes < 0.9.9.3 try to figure out the library + # name without the help of ctypes. We use cdll.LoadLibrary() below, + # which isn't available for ctypes == 0.9.9.3. + # + if sys.platform == 'linux2': + libName = 'libdiscid.so.0' + elif sys.platform == 'darwin': + libName = 'libdiscid.0.dylib' + elif sys.platform == 'win32': + libName = 'discid.dll' + else: + # This should at least work for Un*x-style operating systems + libName = 'libdiscid.so.0' + + try: + libDiscId = ctypes.cdll.LoadLibrary(libName) + _setPrototypes(libDiscId) + return libDiscId + except OSError, e: + raise NotImplementedError('Error opening library: ' + str(e)) + + assert False # not reached + + +def _setPrototypes(libDiscId): + ct = ctypes + libDiscId.discid_new.argtypes = ( ) + libDiscId.discid_new.restype = ct.c_void_p + + libDiscId.discid_free.argtypes = (ct.c_void_p, ) + + libDiscId.discid_read.argtypes = (ct.c_void_p, ct.c_char_p) + + libDiscId.discid_get_error_msg.argtypes = (ct.c_void_p, ) + libDiscId.discid_get_error_msg.restype = ct.c_char_p + + libDiscId.discid_get_id.argtypes = (ct.c_void_p, ) + libDiscId.discid_get_id.restype = ct.c_char_p + + libDiscId.discid_get_first_track_num.argtypes = (ct.c_void_p, ) + libDiscId.discid_get_first_track_num.restype = ct.c_int + + libDiscId.discid_get_last_track_num.argtypes = (ct.c_void_p, ) + libDiscId.discid_get_last_track_num.restype = ct.c_int + + libDiscId.discid_get_sectors.argtypes = (ct.c_void_p, ) + libDiscId.discid_get_sectors.restype = ct.c_int + + libDiscId.discid_get_track_offset.argtypes = (ct.c_void_p, ct.c_int) + libDiscId.discid_get_track_offset.restype = ct.c_int + + libDiscId.discid_get_track_length.argtypes = (ct.c_void_p, ct.c_int) + libDiscId.discid_get_track_length.restype = ct.c_int + + +def getSubmissionUrl(disc, host='mm.musicbrainz.org', port=80): + """Returns a URL for adding a disc to the MusicBrainz database. + + A fully initialized L{musicbrainz2.model.Disc} object is needed, as + returned by L{readDisc}. A disc object returned by the web service + doesn't provide the necessary information. + + Note that the created URL is intended for interactive use and points + to the MusicBrainz disc submission wizard by default. This method + just returns a URL, no network connection is needed. The disc drive + isn't used. + + @param disc: a fully initialized L{musicbrainz2.model.Disc} object + @param host: a string containing a host name + @param port: an integer containing a port number + + @return: a string containing the submission URL + + @see: L{readDisc} + """ + assert isinstance(disc, Disc), 'musicbrainz2.model.Disc expected' + discid = disc.getId() + first = disc.getFirstTrackNum() + last = disc.getLastTrackNum() + sectors = disc.getSectors() + assert None not in (discid, first, last, sectors) + + tracks = last - first + 1 + toc = "%d %d %d " % (first, last, sectors) + toc = toc + ' '.join( map(lambda x: str(x[0]), disc.getTracks()) ) + + query = urllib.urlencode({ 'id': discid, 'toc': toc, 'tracks': tracks }) + + if port == 80: + netloc = host + else: + netloc = host + ':' + str(port) + + url = ('http', netloc, '/bare/cdlookup.html', '', query, '') + + return urlparse.urlunparse(url) + + +def readDisc(deviceName=None): + """Reads an Audio CD in the disc drive. + + This reads a CD's table of contents (TOC) and calculates the MusicBrainz + DiscID, which is a 28 character ASCII string. This DiscID can be used + to retrieve a list of matching releases from the web service (see + L{musicbrainz2.webservice.Query}). + + Note that an Audio CD has to be in drive for this to work. The + C{deviceName} argument may be used to set the device. The default + depends on the operating system (on linux, it's C{'/dev/cdrom'}). + No network connection is needed for this function. + + If the device doesn't exist or there's no valid Audio CD in the drive, + a L{DiscError} exception is raised. + + @param deviceName: a string containing the CD drive's device name + + @return: a L{musicbrainz2.model.Disc} object + + @raise DiscError: if there was a problem reading the disc + @raise NotImplementedError: if DiscID generation isn't supported + """ + libDiscId = _openLibrary() + + handle = libDiscId.discid_new() + assert handle != 0, "libdiscid: discid_new() returned NULL" + + # Access the CD drive. This also works if deviceName is None because + # ctypes passes a NULL pointer in this case. + # + res = libDiscId.discid_read(handle, deviceName) + if res == 0: + raise DiscError(libDiscId.discid_get_error_msg(handle)) + + + # Now extract the data from the result. + # + disc = Disc() + + disc.setId( libDiscId.discid_get_id(handle) ) + + firstTrackNum = libDiscId.discid_get_first_track_num(handle) + lastTrackNum = libDiscId.discid_get_last_track_num(handle) + + disc.setSectors(libDiscId.discid_get_sectors(handle)) + + for i in range(firstTrackNum, lastTrackNum+1): + trackOffset = libDiscId.discid_get_track_offset(handle, i) + trackSectors = libDiscId.discid_get_track_length(handle, i) + + disc.addTrack( (trackOffset, trackSectors) ) + + disc.setFirstTrackNum(firstTrackNum) + disc.setLastTrackNum(lastTrackNum) + + libDiscId.discid_free(handle) + + return disc + +# EOF diff --git a/lib/musicbrainz2/model.py b/lib/musicbrainz2/model.py new file mode 100644 index 00000000..fe8f05df --- /dev/null +++ b/lib/musicbrainz2/model.py @@ -0,0 +1,2488 @@ +"""The MusicBrainz domain model. + +These classes are part of the MusicBrainz domain model. They may be used +by other modules and don't contain any network or other I/O code. If you +want to request data from the web service, please have a look at +L{musicbrainz2.webservice}. + +The most important classes, usually acting as entry points, are +L{Artist}, L{Release}, and L{Track}. + +@var VARIOUS_ARTISTS_ID: The ID of the special 'Various Artists' artist. + +@var NS_MMD_1: Default namespace prefix for all MusicBrainz metadata. +@var NS_REL_1: Namespace prefix for relations. +@var NS_EXT_1: Namespace prefix for MusicBrainz extensions. + +@see: L{musicbrainz2.webservice} + +@author: Matthias Friedrich +""" +try: + set +except NameError: + from sets import Set as set + +__revision__ = '$Id: model.py 12829 2010-09-15 12:00:11Z luks $' + +__all__ = [ + 'VARIOUS_ARTISTS_ID', 'NS_MMD_1', 'NS_REL_1', 'NS_EXT_1', + 'Entity', 'Artist', 'Release', 'Track', 'User', 'ReleaseGroup', + 'Relation', 'Disc', 'ReleaseEvent', 'Label', 'Tag', 'Rating', + 'AbstractAlias', 'ArtistAlias', 'LabelAlias', +] + + +VARIOUS_ARTISTS_ID = 'http://musicbrainz.org/artist/89ad4ac3-39f7-470e-963a-56509c546377' + +# Namespace URI prefixes +# +NS_MMD_1 = 'http://musicbrainz.org/ns/mmd-1.0#' +NS_REL_1 = 'http://musicbrainz.org/ns/rel-1.0#' +NS_EXT_1 = 'http://musicbrainz.org/ns/ext-1.0#' + + +class Entity(object): + """A first-level MusicBrainz class. + + All entities in MusicBrainz have unique IDs (which are absolute URIs) + as well as any number of L{relations } to other entities + and free text tags. This class is abstract and should not be + instantiated. + + Relations are differentiated by their I{target type}, that means, + where they link to. MusicBrainz currently supports four target types + (artists, releases, tracks, and URLs) each identified using a URI. + To get all relations with a specific target type, you can use + L{getRelations} and pass one of the following constants as the + parameter: + + - L{Relation.TO_ARTIST} + - L{Relation.TO_RELEASE} + - L{Relation.TO_TRACK} + - L{Relation.TO_URL} + + @see: L{Relation} + """ + + def __init__(self, id_=None): + """Constructor. + + This should only used by derived classes. + + @param id_: a string containing an absolute URI + """ + self._id = id_ + self._relations = { } + self._tags = { } + self._rating = Rating() + + def getId(self): + """Returns a MusicBrainz ID. + + @return: a string containing a URI, or None + """ + return self._id + + def setId(self, value): + """Sets a MusicBrainz ID. + + @param value: a string containing an absolute URI + """ + self._id = value + + id = property(getId, setId, doc='The MusicBrainz ID.') + + def getRelations(self, targetType=None, relationType=None, + requiredAttributes=(), direction=None): + """Returns a list of relations. + + If C{targetType} is given, only relations of that target + type are returned. For MusicBrainz, the following target + types are defined: + - L{Relation.TO_ARTIST} + - L{Relation.TO_RELEASE} + - L{Relation.TO_TRACK} + - L{Relation.TO_URL} + + If C{targetType} is L{Relation.TO_ARTIST}, for example, + this method returns all relations between this Entity and + artists. + + You may use the C{relationType} parameter to further restrict + the selection. If it is set, only relations with the given + relation type are returned. The C{requiredAttributes} sequence + lists attributes that have to be part of all returned relations. + + If C{direction} is set, only relations with the given reading + direction are returned. You can use the L{Relation.DIR_FORWARD}, + L{Relation.DIR_BACKWARD}, and L{Relation.DIR_NONE} constants + for this. + + @param targetType: a string containing an absolute URI, or None + @param relationType: a string containing an absolute URI, or None + @param requiredAttributes: a sequence containing absolute URIs + @param direction: one of L{Relation}'s direction constants + @return: a list of L{Relation} objects + + @see: L{Entity} + """ + allRels = [ ] + if targetType is not None: + allRels = self._relations.setdefault(targetType, [ ]) + else: + for (k, relList) in self._relations.items(): + for rel in relList: + allRels.append(rel) + + # Filter for direction. + # + if direction is not None: + allRels = [r for r in allRels if r.getDirection() == direction] + + # Filter for relation type. + # + if relationType is None: + return allRels + else: + allRels = [r for r in allRels if r.getType() == relationType] + + # Now filer for attribute type. + # + tmp = [] + required = set(iter(requiredAttributes)) + + for r in allRels: + attrs = set(iter(r.getAttributes())) + if required.issubset(attrs): + tmp.append(r) + return tmp + + + def getRelationTargets(self, targetType=None, relationType=None, + requiredAttributes=(), direction=None): + """Returns a list of relation targets. + + The arguments work exactly like in L{getRelations}, but + instead of L{Relation} objects, the matching relation + targets are returned. This can be L{Artist}, L{Release}, + or L{Track} objects, depending on the relations. + + As a special case, URL strings are returned if the target + is an URL. + + @param targetType: a string containing an absolute URI, or None + @param relationType: a string containing an absolute URI, or None + @param requiredAttributes: a sequence containing absolute URIs + @param direction: one of L{Relation}'s direction constants + @return: a list of objects, depending on the relation + + @see: L{getRelations} + """ + ret = [ ] + rels = self.getRelations(targetType, relationType, + requiredAttributes, direction) + + for r in rels: + if r.getTargetType() == Relation.TO_URL: + ret.append(r.getTargetId()) + else: + ret.append(r.getTarget()) + + return ret + + + def addRelation(self, relation): + """Adds a relation. + + This method adds C{relation} to the list of relations. The + given relation has to be initialized, at least the target + type has to be set. + + @param relation: the L{Relation} object to add + + @see: L{Entity} + """ + assert relation.getType is not None + assert relation.getTargetType is not None + assert relation.getTargetId is not None + l = self._relations.setdefault(relation.getTargetType(), [ ]) + l.append(relation) + + + def getRelationTargetTypes(self): + """Returns a list of target types available for this entity. + + Use this to find out to which types of targets this entity + has relations. If the entity only has relations to tracks and + artists, for example, then a list containg the strings + L{Relation.TO_TRACK} and L{Relation.TO_ARTIST} is returned. + + @return: a list of strings containing URIs + + @see: L{getRelations} + """ + return self._relations.keys() + + def getTag(self, value): + """Return the tag with the given value (aka the tag's name). + + @return: the L{Tag} with the given name or raises a KeyError + """ + return self._tags[value] + + def getTags(self): + """Return all tags attached to this Entity. + + @return: a list of L{Tag} objects + """ + return self._tags.values() + + tags = property(getTags, doc='The tags for this entity.') + + def addTag(self, tag): + """Add a new tag. + + This merges an existing tag with the same name. + + @param tag: the L{Tag} object to add + + @see: L{getTags} + """ + if self._tags.has_key(tag.value): + existing = self._tags[tag.value] + existing.count += tag.count + else: + self._tags[tag.value] = tag + + def getRating(self): + """Return the rating of this Entity. + 0 = Unrated + 1 - 5 = Rating + + @return: rating + """ + return self._rating + + rating = property(getRating, doc='The rating for this entity.') + + def setRating(self, value): + self._rating = value + + +class Artist(Entity): + """Represents an artist. + + Artists in MusicBrainz can have a type. Currently, this type can + be either Person or Group for which the following URIs are assigned: + + - C{http://musicbrainz.org/ns/mmd-1.0#Person} + - C{http://musicbrainz.org/ns/mmd-1.0#Group} + + Use the L{TYPE_PERSON} and L{TYPE_GROUP} constants for comparison. + """ + TYPE_PERSON = NS_MMD_1 + 'Person' + TYPE_GROUP = NS_MMD_1 + 'Group' + + def __init__(self, id_=None, type_=None, name=None, sortName=None): + """Constructor. + + @param id_: a string containing an absolute URI + @param type_: a string containing an absolute URI + @param name: a string containing the artist's name + @param sortName: a string containing the artist's sort name + """ + Entity.__init__(self, id_) + self._type = type_ + self._name = name + self._sortName = sortName + self._disambiguation = None + self._beginDate = None + self._endDate = None + self._aliases = [ ] + self._releases = [ ] + self._releasesCount = None + self._releasesOffset = None + self._releaseGroups = [ ] + self._releaseGroupsCount = None + self._releaseGroupsOffset = None + + def getType(self): + """Returns the artist's type. + + @return: a string containing an absolute URI, or None + """ + return self._type + + def setType(self, type_): + """Sets the artist's type. + + @param type_: a string containing an absolute URI + """ + self._type = type_ + + type = property(getType, setType, doc="The artist's type.") + + def getName(self): + """Returns the artist's name. + + @return: a string containing the artist's name, or None + """ + return self._name + + def setName(self, name): + """Sets the artist's name. + + @param name: a string containing the artist's name + """ + self._name = name + + name = property(getName, setName, doc="The artist's name.") + + def getSortName(self): + """Returns the artist's sort name. + + The sort name is the artist's name in a special format which + is better suited for lexicographic sorting. The MusicBrainz + style guide specifies this format. + + @see: U{The MusicBrainz Style Guidelines + } + """ + return self._sortName + + def setSortName(self, sortName): + """Sets the artist's sort name. + + @param sortName: a string containing the artist's sort name + + @see: L{getSortName} + """ + self._sortName = sortName + + sortName = property(getSortName, setSortName, + doc="The artist's sort name.") + + def getDisambiguation(self): + """Returns the disambiguation attribute. + + This attribute may be used if there is more than one artist + with the same name. In this case, disambiguation attributes + are added to the artists' names to keep them apart. + + For example, there are at least three bands named 'Vixen'. + Each band has a different disambiguation in the MusicBrainz + database, like 'Hip-hop' or 'all-female rock/glam band'. + + @return: a disambiguation string, or None + + @see: L{getUniqueName} + """ + return self._disambiguation + + def setDisambiguation(self, disambiguation): + """Sets the disambiguation attribute. + + @param disambiguation: a disambiguation string + + @see: L{getDisambiguation}, L{getUniqueName} + """ + self._disambiguation = disambiguation + + disambiguation = property(getDisambiguation, setDisambiguation, + doc="The disambiguation comment.") + + def getUniqueName(self): + """Returns a unique artist name (using disambiguation). + + This method returns the artist name together with the + disambiguation attribute in parenthesis if it exists. + Example: 'Vixen (Hip-hop)'. + + @return: a string containing the unique name + + @see: L{getDisambiguation} + """ + d = self.getDisambiguation() + if d is not None and d.strip() != '': + return '%s (%s)' % (self.getName(), d) + else: + return self.getName() + + def getBeginDate(self): + """Returns the birth/foundation date. + + The definition of the I{begin date} depends on the artist's + type. For persons, this is the day of birth, for groups it + is the day the group was founded. + + The returned date has the format 'YYYY', 'YYYY-MM', or + 'YYYY-MM-DD', depending on how much detail is known. + + @return: a string containing the date, or None + + @see: L{getType} + """ + return self._beginDate + + def setBeginDate(self, dateStr): + """Sets the begin/foundation date. + + @param dateStr: a date string + + @see: L{getBeginDate} + """ + self._beginDate = dateStr + + beginDate = property(getBeginDate, setBeginDate, + doc="The begin/foundation date.") + + def getEndDate(self): + """Returns the death/dissolving date. + + The definition of the I{end date} depends on the artist's + type. For persons, this is the day of death, for groups it + is the day the group was dissolved. + + @return: a string containing a date, or None + + @see: L{getBeginDate} + """ + return self._endDate + + def setEndDate(self, dateStr): + """Sets the death/dissolving date. + + @param dateStr: a string containing a date + + @see: L{setEndDate}, L{getBeginDate} + """ + self._endDate = dateStr + + endDate = property(getEndDate, setEndDate, + doc="The death/dissolving date.") + + def getAliases(self): + """Returns the list of aliases for this artist. + + @return: a list of L{ArtistAlias} objects + """ + return self._aliases + + aliases = property(getAliases, doc='The list of aliases.') + + def addAlias(self, alias): + """Adds an alias for this artist. + + @param alias: an L{ArtistAlias} object + """ + self._aliases.append(alias) + + def getReleases(self): + """Returns a list of releases from this artist. + + This may also include releases where this artist isn't the + I{main} artist but has just contributed one or more tracks + (aka VA-Releases). + + @return: a list of L{Release} objects + """ + return self._releases + + releases = property(getReleases, doc='The list of releases') + + def addRelease(self, release): + """Adds a release to this artist's list of releases. + + @param release: a L{Release} object + """ + self._releases.append(release) + + def getReleasesOffset(self): + """Returns the offset of the release list. + + This is used if the release list is incomplete (ie. the web + service only returned part of the release for this artist). + Note that the offset value is zero-based, which means release + C{0} is the first release. + + @return: an integer containing the offset, or None + + @see: L{getReleases}, L{getReleasesCount} + """ + return self._releasesOffset + + def setReleasesOffset(self, offset): + """Sets the offset of the release list. + + @param offset: an integer containing the offset, or None + + @see: L{getReleasesOffset} + """ + self._releasesOffset = offset + + releasesOffset = property(getReleasesOffset, setReleasesOffset, + doc='The offset of the release list.') + + def getReleasesCount(self): + """Returns the number of existing releases. + + This may or may not match with the number of elements that + L{getReleases} returns. If the count is higher than + the list, it indicates that the list is incomplete. + + @return: an integer containing the count, or None + + @see: L{setReleasesCount}, L{getReleasesOffset} + """ + return self._releasesCount + + def setReleasesCount(self, value): + """Sets the number of existing releases. + + @param value: an integer containing the count, or None + + @see: L{getReleasesCount}, L{setReleasesOffset} + """ + self._releasesCount = value + + releasesCount = property(getReleasesCount, setReleasesCount, + doc='The total number of releases') + + def getReleaseGroups(self): + """Returns a list of release groups from this artist. + + @return: a list of L{ReleaseGroup} objects + """ + return self._releaseGroups + + releaseGroups = property(getReleaseGroups, doc='The list of release groups') + + def addReleaseGroup(self, releaseGroup): + """Adds a release group to this artist's list of release groups. + + @param releaseGroup: a L{ReleaseGroup} object + """ + self._releaseGroups.append(releaseGroup) + + def getReleaseGroupsOffset(self): + """Returns the offset of the release group list. + + This is used if the release group list is incomplete (ie. the + web service only returned part of the result for this artist). + Note that the offset value is zero-based, which means release + group C{0} is the first release group. + + @return: an integer containing the offset, or None + + @see: L{getReleaseGroups}, L{getReleaseGroupsCount} + """ + return self._releaseGroupsOffset + + def setReleaseGroupsOffset(self, offset): + """Sets the offset of the release group list. + + @param offset: an integer containing the offset, or None + + @see: L{getReleaseGroupsOffset} + """ + self._releaseGroupsOffset = offset + + releaseGroupsOffset = property(getReleaseGroupsOffset, setReleaseGroupsOffset, + doc='The offset of the release group list.') + + def getReleaseGroupsCount(self): + """Returns the number of existing release groups. + + This may or may not match with the number of elements that + L{getReleaseGroups} returns. If the count is higher than + the list, it indicates that the list is incomplete. + + @return: an integer containing the count, or None + + @see: L{setReleaseGroupsCount}, L{getReleaseGroupsOffset} + """ + return self._releaseGroupsCount + + def setReleaseGroupsCount(self, value): + """Sets the number of existing release groups. + + @param value: an integer containing the count, or None + + @see: L{getReleaseGroupsCount}, L{setReleaseGroupsOffset} + """ + self._releaseGroupsCount = value + + releasesCount = property(getReleaseGroupsCount, setReleaseGroupsCount, + doc='The total number of release groups') + + +class Rating(object): + """The representation of a MusicBrain rating. + + The rating can have the following values: + + 0 = Unrated + [1..5] = Rating + """ + def __init__(self, value=None, count=None): + """Constructor. + + @param value: a string containing the tag's value + @param count: the number of users who added this tag + """ + self._value = value + self._count = count + + def getValue(self): + """Returns a string with the tag's value. + + @return: an integer containing the rating's value, or None + """ + return self._value + + def setValue(self, value): + """ Set the value of this rating. + + 0 or None = Clear your rating + 1 - 5 = Rating + + @param value: the rating to apply + + @raise ValueError: if value is not a double or not in the + range 0 - 5 or None. + """ + if value == None: + value = 0 + try: + value = float(value) + except ValueError, e: + raise ValueError("Value for rating needs to be an" \ + "float.") + if value < 0.0 or value > 5.0: + raise ValueError("Value needs to be in the range [0..5]") + self._value = value + + value = property(getValue, setValue, doc='The value of the rating.') + + def getCount(self): + """Returns an integer containing the rating's frequency count. + + @return: an integer containing the rating's frequency count, + or None + """ + return self._count + + def setCount(self, count): + """Sets the frequency count of this rating. + + @param count: an integer containing the tag's frequency count + """ + self._count = count + + count = property(getCount, setCount, doc="This tag's frequency count.") + + def __str__(self): + return str(self._value) + + def __unicode__(self): + return unicode(self._value) + + +class Tag(object): + """The representation of a MusicBrainz folksonomy tag. + + The tag's value is the text that's displayed in the tag cloud. + The count attribute keeps track of how many users added the tag + to its owning entity. + """ + def __init__(self, value=None, count=None): + """Constructor. + + @param value: a string containing the tag's value + @param count: the number of users who added this tag + """ + self._value = value + self._count = count + + def getValue(self): + """Returns a string with the tag's value. + + @return: a string containing the tags's value, or None + """ + return self._value + + def setValue(self, value): + """Sets the value of this tag. + + @param value: A string containing the value of the tag + """ + self._value = value + + value = property(getValue, setValue, doc='The value of the text.') + + def getCount(self): + """Returns an integer containing the tag's frequency count. + + @return: an integer containing the tags's frequency count, or None + """ + return self._count + + def setCount(self, count): + """Sets the frequency count of this tag. + + @param count: an integer containing the tag's frequency count + """ + self._count = count + + count = property(getCount, setCount, doc="This tag's frequency count.") + + def __str__(self): + return str(self._value) + + def __unicode__(self): + return unicode(self._value) + + +class Label(Entity): + """Represents a record label. + + A label within MusicBrainz is an L{Entity}. It contains information + about the label like when it was established, its name, label code and + other relationships. All release events may be assigned a label. + """ + TYPE_UNKNOWN = NS_MMD_1 + 'Unknown' + + TYPE_DISTRIBUTOR = NS_MMD_1 + 'Distributor' + TYPE_HOLDING = NS_MMD_1 + 'Holding' + TYPE_PRODUCTION = NS_MMD_1 + 'Production' + + TYPE_ORIGINAL = NS_MMD_1 + 'OriginalProduction' + TYPE_BOOTLEG = NS_MMD_1 + 'BootlegProduction' + TYPE_REISSUE = NS_MMD_1 + 'ReissueProduction' + + def __init__(self, id_=None): + """Constructor. + + @param id_: a string containing an absolute URI + """ + Entity.__init__(self, id_) + self._type = None + self._name = None + self._sortName = None + self._disambiguation = None + self._countryId = None + self._code = None + self._beginDate = None + self._endDate = None + self._aliases = [ ] + + def getType(self): + """Returns the type of this label. + + @return: a string containing an absolute URI + """ + return self._type + + def setType(self, type_): + """Sets the type of this label. + + @param type_: A string containing the absolute URI of the type of label. + """ + self._type = type_ + + type = property(getType, setType, doc='The type of label') + + def getName(self): + """Returns a string with the name of the label. + + @return: a string containing the label's name, or None + """ + return self._name + + def setName(self, name): + """Sets the name of this label. + + @param name: A string containing the name of the label + """ + self._name = name + + name = property(getName, setName, doc='The name of the label.') + + def getSortName(self): + """Returns the label's sort name. + + The sort name is the label's name in a special format which + is better suited for lexicographic sorting. The MusicBrainz + style guide specifies this format. + + @see: U{The MusicBrainz Style Guidelines + } + """ + return self._sortName + + def setSortName(self, sortName): + """Sets the label's sort name. + + @param sortName: a string containing the label's sort name + + @see: L{getSortName} + """ + self._sortName = sortName + + sortName = property(getSortName, setSortName, + doc="The label's sort name.") + + def getDisambiguation(self): + """Returns the disambiguation attribute. + + This attribute may be used if there is more than one label + with the same name. In this case, disambiguation attributes + are added to the labels' names to keep them apart. + + @return: a disambiguation string, or None + + @see: L{getUniqueName} + """ + return self._disambiguation + + def setDisambiguation(self, disambiguation): + """Sets the disambiguation attribute. + + @param disambiguation: a disambiguation string + + @see: L{getDisambiguation}, L{getUniqueName} + """ + self._disambiguation = disambiguation + + disambiguation = property(getDisambiguation, setDisambiguation, + doc="The disambiguation comment.") + + def getUniqueName(self): + """Returns a unique label name (using disambiguation). + + This method returns the label's name together with the + disambiguation attribute in parenthesis if it exists. + + @return: a string containing the unique name + + @see: L{getDisambiguation} + """ + d = self.getDisambiguation() + if d is not None and d.strip() != '': + return '%s (%s)' % (self.getName(), d) + else: + return self.getName() + + def getBeginDate(self): + """Returns the date this label was established. + + @return: A string contained the start date, or None + """ + return self._beginDate + + def setBeginDate(self, date): + """Set the date this label was established. + + @param date: A string in the format of YYYY-MM-DD + """ + self._beginDate = date + + beginDate = property(getBeginDate, setBeginDate, + doc='The date this label was established.') + + def getEndDate(self): + """Returns the date this label closed. + + The returned date has the format 'YYYY', 'YYYY-MM', or + 'YYYY-MM-DD', depending on how much detail is known. + + @return: A string containing the date, or None + """ + return self._endDate + + def setEndDate(self, date): + """Set the date this label closed. + + The date may have the format 'YYYY', 'YYYY-MM', or + 'YYYY-MM-DD', depending on how much detail is known. + + @param date: A string containing the date, or None + """ + self._endDate = date + + endDate = property(getEndDate, setEndDate, + doc='The date this label closed.') + + def getCountry(self): + """Returns the country the label is located. + + @return: a string containing an ISO-3166 country code, or None + + @see: L{musicbrainz2.utils.getCountryName} + """ + return self._countryId + + def setCountry(self, country): + """Sets the country the label is located. + + @param country: a string containing an ISO-3166 country code + """ + self._countryId = country + + country = property(getCountry, setCountry, + doc='The country the label is located.') + + def getCode(self): + """Returns the label code. + + Label codes have been introduced by the IFPI (International + Federation of Phonogram and Videogram Industries) to uniquely + identify record labels. The label code consists of 'LC-' and 4 + figures (currently being extended to 5 figures). + + @return: a string containing the label code, or None + """ + return self._code + + def setCode(self, code): + """Sets the label code. + + @param code: a string containing the label code + """ + self._code = code + + code = property(getCode, setCode, + doc='The label code.') + + def getAliases(self): + """Returns the list of aliases for this label. + + @return: a list of L{LabelAlias} objects + """ + return self._aliases + + aliases = property(getAliases, doc='The list of aliases.') + + def addAlias(self, alias): + """Adds an alias for this label. + + @param alias: a L{LabelAlias} object + """ + self._aliases.append(alias) + + +class Release(Entity): + """Represents a Release. + + A release within MusicBrainz is an L{Entity} which contains L{Track} + objects. Releases may be of more than one type: There can be albums, + singles, compilations, live recordings, official releases, bootlegs + etc. + + @note: The current MusicBrainz server implementation supports only a + limited set of types. + """ + TYPE_NONE = NS_MMD_1 + 'None' + TYPE_NON_ALBUM_TRACKS = NS_MMD_1 + "NonAlbum Track" + + TYPE_ALBUM = NS_MMD_1 + 'Album' + TYPE_SINGLE = NS_MMD_1 + 'Single' + TYPE_EP = NS_MMD_1 + 'EP' + TYPE_COMPILATION = NS_MMD_1 + 'Compilation' + TYPE_SOUNDTRACK = NS_MMD_1 + 'Soundtrack' + TYPE_SPOKENWORD = NS_MMD_1 + 'Spokenword' + TYPE_INTERVIEW = NS_MMD_1 + 'Interview' + TYPE_AUDIOBOOK = NS_MMD_1 + 'Audiobook' + TYPE_LIVE = NS_MMD_1 + 'Live' + TYPE_REMIX = NS_MMD_1 + 'Remix' + TYPE_OTHER = NS_MMD_1 + 'Other' + + TYPE_OFFICIAL = NS_MMD_1 + 'Official' + TYPE_PROMOTION = NS_MMD_1 + 'Promotion' + TYPE_BOOTLEG = NS_MMD_1 + 'Bootleg' + TYPE_PSEUDO_RELEASE = NS_MMD_1 + 'Pseudo-Release' + + def __init__(self, id_=None, title=None): + """Constructor. + + @param id_: a string containing an absolute URI + @param title: a string containing the title + """ + Entity.__init__(self, id_) + self._types = [ ] + self._title = title + self._textLanguage = None + self._textScript = None + self._asin = None + self._artist = None + self._releaseEvents = [ ] + #self._releaseEventsCount = None + self._releaseGroup = None + self._discs = [ ] + #self._discIdsCount = None + self._tracks = [ ] + self._tracksOffset = None + self._tracksCount = None + + + def getTypes(self): + """Returns the types of this release. + + To test for release types, you can use the constants + L{TYPE_ALBUM}, L{TYPE_SINGLE}, etc. + + @return: a list of strings containing absolute URIs + + @see: L{musicbrainz2.utils.getReleaseTypeName} + """ + return self._types + + types = property(getTypes, doc='The list of types for this release.') + + def addType(self, type_): + """Add a type to the list of types. + + @param type_: a string containing absolute URIs + + @see: L{getTypes} + """ + self._types.append(type_) + + def getTitle(self): + """Returns the release's title. + + @return: a string containing the release's title + """ + return self._title + + def setTitle(self, title): + """Sets the release's title. + + @param title: a string containing the release's title, or None + """ + self._title = title + + title = property(getTitle, setTitle, doc='The title of this release.') + + def getTextLanguage(self): + """Returns the language used in release and track titles. + + To represent the language, the ISO-639-2/T standard is used, + which provides three-letter terminological language codes like + 'ENG', 'DEU', 'JPN', 'KOR', 'ZHO' or 'YID'. + + Note that this refers to release and track I{titles}, not + lyrics. + + @return: a string containing the language code, or None + + @see: L{musicbrainz2.utils.getLanguageName} + """ + return self._textLanguage + + def setTextLanguage(self, language): + """Sets the language used in releaes and track titles. + + @param language: a string containing a language code + + @see: L{getTextLanguage} + """ + self._textLanguage = language + + textLanguage = property(getTextLanguage, setTextLanguage, + doc='The language used in release and track titles.') + + def getTextScript(self): + """Returns the script used in release and track titles. + + To represent the script, ISO-15924 script codes are used. + Valid codes are, among others: 'Latn', 'Cyrl', 'Hans', 'Hebr' + + Note that this refers to release and track I{titles}, not + lyrics. + + @return: a string containing the script code, or None + + @see: L{musicbrainz2.utils.getScriptName} + """ + return self._textScript + + def setTextScript(self, script): + """Sets the script used in releaes and track titles. + + @param script: a string containing a script code + + @see: L{getTextScript} + """ + self._textScript = script + + textScript = property(getTextScript, setTextScript, + doc='The script used in release and track titles.') + + def getAsin(self): + """Returns the amazon shop identifier (ASIN). + + The ASIN is a 10-letter code (except for books) assigned + by Amazon, which looks like 'B000002IT2' or 'B00006I4YD'. + + @return: a string containing the ASIN, or None + """ + return self._asin + + def setAsin(self, asin): + """Sets the amazon shop identifier (ASIN). + + @param asin: a string containing the ASIN + + @see: L{getAsin} + """ + self._asin = asin + + asin = property(getAsin, setAsin, doc='The amazon shop identifier.') + + def getArtist(self): + """Returns the main artist of this release. + + @return: an L{Artist} object, or None + """ + return self._artist + + def setArtist(self, artist): + """Sets this release's main artist. + + @param artist: an L{Artist} object + """ + self._artist = artist + + artist = property(getArtist, setArtist, + doc='The main artist of this release.') + + def getReleaseGroup(self): + """Returns the release group to which this release belongs. + + @return: a L{ReleaseGroup} object, or None. + """ + return self._releaseGroup + + def setReleaseGroup(self, releaseGroup): + """Sets the release's release group. + + @param releaseGroup: a L{ReleaseGroup} object, or None. + """ + self._releaseGroup = releaseGroup + + releaseGroup = property(getReleaseGroup, setReleaseGroup, + doc='The release group this release belongs to.') + + def isSingleArtistRelease(self): + """Checks if this is a single artist's release. + + Returns C{True} if the release's main artist (L{getArtist}) is + also the main artist for all of the tracks. This is checked by + comparing the artist IDs. + + Note that the release's artist has to be set (see L{setArtist}) + for this. The track artists may be unset. + + @return: True, if this is a single artist's release + """ + releaseArtist = self.getArtist() + assert releaseArtist is not None, 'Release Artist may not be None!' + for track in self.getTracks(): + if track.getArtist() is None: + continue + if track.getArtist().getId() != releaseArtist.getId(): + return False + + return True + + def getTracks(self): + """Returns the tracks this release contains. + + @return: a list containing L{Track} objects + + @see: L{getTracksOffset}, L{getTracksCount} + """ + return self._tracks + + tracks = property(getTracks, doc='The list of tracks.') + + def addTrack(self, track): + """Adds a track to this release. + + This appends a track at the end of this release's track list. + + @param track: a L{Track} object + """ + self._tracks.append(track) + + def getTracksOffset(self): + """Returns the offset of the track list. + + This is used if the track list is incomplete (ie. the web + service only returned part of the tracks on this release). + Note that the offset value is zero-based, which means track + C{0} is the first track. + + @return: an integer containing the offset, or None + + @see: L{getTracks}, L{getTracksCount} + """ + return self._tracksOffset + + def setTracksOffset(self, offset): + """Sets the offset of the track list. + + @param offset: an integer containing the offset, or None + + @see: L{getTracksOffset}, L{setTracksCount} + """ + self._tracksOffset = offset + + tracksOffset = property(getTracksOffset, setTracksOffset, + doc='The offset of the track list.') + + def getTracksCount(self): + """Returns the number of tracks on this release. + + This may or may not match with the number of elements that + L{getTracks} returns. If the count is higher than + the list, it indicates that the list is incomplete. + + @return: an integer containing the count, or None + + @see: L{setTracksCount}, L{getTracks}, L{getTracksOffset} + """ + return self._tracksCount + + def setTracksCount(self, value): + """Sets the number of tracks on this release. + + @param value: an integer containing the count, or None + + @see: L{getTracksCount}, L{setTracksOffset} + """ + self._tracksCount = value + + tracksCount = property(getTracksCount, setTracksCount, + doc='The total number of releases') + + + def getReleaseEvents(self): + """Returns the list of release events. + + A L{Release} may contain a list of so-called release events, + each represented using a L{ReleaseEvent} object. Release + evens specify where and when this release was, well, released. + + @return: a list of L{ReleaseEvent} objects + + @see: L{getReleaseEventsAsDict} + """ + return self._releaseEvents + + releaseEvents = property(getReleaseEvents, + doc='The list of release events.') + + def addReleaseEvent(self, event): + """Adds a release event to this release. + + @param event: a L{ReleaseEvent} object + + @see: L{getReleaseEvents} + """ + self._releaseEvents.append(event) + + def getReleaseEventsAsDict(self): + """Returns the release events represented as a dict. + + Keys are ISO-3166 country codes like 'DE', 'UK', 'FR' etc. + Values are dates in 'YYYY', 'YYYY-MM' or 'YYYY-MM-DD' format. + + @return: a dict containing (countryCode, date) entries + + @see: L{getReleaseEvents}, L{musicbrainz2.utils.getCountryName} + """ + d = { } + for event in self.getReleaseEvents(): + d[event.getCountry()] = event.getDate() + return d + + def getEarliestReleaseDate(self): + """Returns the earliest release date. + + This favours complete dates. For example, '2006-09' is + returned if there is '2000', too. If there is no release + event associated with this release, None is returned. + + @return: a string containing the date, or None + + @see: L{getReleaseEvents}, L{getReleaseEventsAsDict} + """ + event = self.getEarliestReleaseEvent() + + if event is None: + return None + else: + return event.getDate() + + def getEarliestReleaseEvent(self): + """Returns the earliest release event. + + This works like L{getEarliestReleaseDate}, but instead of + just the date, this returns a L{ReleaseEvent} object. + + @return: a L{ReleaseEvent} object, or None + + @see: L{getReleaseEvents}, L{getEarliestReleaseDate} + """ + dates = [ ] + for event in self.getReleaseEvents(): + date = event.getDate() + if len(date) == 10: # 'YYYY-MM-DD' + dates.append( (date, event) ) + elif len(date) == 7: # 'YYYY-MM' + dates.append( (date + '-99', event) ) + else: + dates.append( (date + '-99-99', event) ) + + dates.sort(lambda x, y: cmp(x[0], y[0])) + + if len(dates) > 0: + return dates[0][1] + else: + return None + + + #def getReleaseEventsCount(self): + # """Returns the number of release events. + # + # This may or may not match with the number of elements that + # getReleaseEvents() returns. If the count is higher than + # the list, it indicates that the list is incomplete. + # """ + # return self._releaseEventsCount + + #def setReleaseEventsCount(self, value): + # self._releaseEventsCount = value + + def getDiscs(self): + """Returns the discs associated with this release. + + Discs are currently containers for MusicBrainz DiscIDs. + Note that under rare circumstances (identical TOCs), a + DiscID could be associated with more than one release. + + @return: a list of L{Disc} objects + """ + return self._discs + + discs = property(getDiscs, doc='The list of associated discs.') + + def addDisc(self, disc): + """Adds a disc to this release. + + @param disc: a L{Disc} object + """ + self._discs.append(disc) + + #def getDiscIdsCount(self): + # return self._discIdsCount + + #def setDiscIdsCount(self, value): + # self._discIdsCount = value + + +class ReleaseGroup(Entity): + """Represents a ReleaseGroup. + + A ReleaseGroup in MusicBrainz is an L{Entity} which groups several different + versions of L{Release} objects (e.g., different editions of the same album). + + @see: L{Release} + @see: L{Entity} + """ + + def __init__(self, id_=None, title=None): + """Constructor. + + @param id_: a string containing an absolute URI + @param title: a string containing the title + """ + Entity.__init__(self, id_) + self._title = title + self._id = id_ + self._type = None + self._releases = [ ] + self._artist = None + self._releasesOffset = 0 + self._releasesCount = 0 + + def getType(self): + """Returns the type of this release group. + + To test for release types, you can use the constants + L{Release.TYPE_ALBUM}, L{Release.TYPE_SINGLE}, etc. + + @return: a string containing an absolute URI, or None. + + @see: L{musicbrainz2.utils.getReleaseTypeName} + """ + return self._type + + def setType(self, type_): + """Sets the type of this release group. + + Use a constant from the L{Release} class, such as + L{Release.TYPE_ALBUM} or L{Release.TYPE_SINGLE} to + set the value. + + @param type_: a string containing an absolute URI, or None. + + @see: L{musicbrainz2.utils.getReleaseTypeName} + """ + self._type = type_ + + type = property(getType, setType, + doc = 'The type of this release group.') + + def getReleases(self): + """Gets the releases in this release group. + + @return: a list of L{Release} objects + @see: L{Release} + """ + return self._releases + + releases = property(getReleases, + doc = 'The list of releases in this release group.') + + def addRelease(self, release): + """Adds a L{Release} to this release group. + + @param release: a L{Release} object + """ + self._releases.append(release) + + def getReleasesOffset(self): + """Returns the offset of the release list. + + This is used if the release list is incomplete (i.e., the web + service only returned a portion of the releases in this release + group). + + @return: an integer containing the offset, or None. + @see: L{getReleases}, L{getReleasesCount} + """ + return self._releasesOffset + + def setReleasesOffset(self, offset): + """Sets the offset of the release list. + + @param offset: an integer containing the offset, or None. + @see: L{getReleases}, L{getReleasesOffset} + """ + self._releasesOffset = offset + + releasesOffset = property(getReleasesOffset, setReleasesOffset, + doc='The offset of the release list.') + + def getReleasesCount(self): + """Returns the number of releases in this release group. + + This may or may not match the number of elements returned by + L{getReleases}. If the count is higher than the length of that + list, then the list is incomplete. + + @return: an integer containing the count, or None + @see: L{getReleases}, L{setReleasesCount}, L{getReleasesOffset} + """ + return self._releasesCount + + def setReleasesCount(self, value): + """Sets the number of releases in this release group. + + @param value: an integer containing the count, or None. + @see: L{getReleases}, L{getReleasesCount}, L{getReleasesOffset} + """ + self._releasesCount = value + + releasesCount = property(getReleasesCount, setReleasesCount, + doc = 'The total number of releases') + + def getTitle(self): + """Returns this release group's title. + + @return: a string containing the release group's title + """ + return self._title + + def setTitle(self, title): + """Sets the release group's title. + + @param title: a string containing the release group's title. + """ + self._title = title + + title = property(getTitle, setTitle, + doc = 'The title of this release group.') + + def getArtist(self): + """Returns the main artist of this release group. + + @return: an L{Artist} object, or None + """ + return self._artist + + def setArtist(self, artist): + """Sets the release group's main artist. + + @param artist: an L{Artist} object + """ + self._artist = artist + + artist = property(getArtist, setArtist, + doc = 'The main artist of this release group') + + +class Track(Entity): + """Represents a track. + + This class represents a track which may appear on one or more releases. + A track may be associated with exactly one artist (the I{main} artist). + + Using L{getReleases}, you can find out on which releases this track + appears. To get the track number, too, use the + L{Release.getTracksOffset} method. + + @note: Currently, the MusicBrainz server doesn't support tracks to + be on more than one release. + + @see: L{Release}, L{Artist} + """ + def __init__(self, id_=None, title=None): + """Constructor. + + @param id_: a string containing an absolute URI + @param title: a string containing the title + """ + Entity.__init__(self, id_) + self._title = title + self._artist = None + self._duration = None + self._puids = [ ] + self._releases = [ ] + self._isrcs = [ ] + + def getTitle(self): + """Returns the track's title. + + The style and format of this attribute is specified by the + style guide. + + @return: a string containing the title, or None + + @see: U{The MusicBrainz Style Guidelines + } + """ + return self._title + + def setTitle(self, title): + """Sets the track's title. + + @param title: a string containing the title + + @see: L{getTitle} + """ + self._title = title + + title = property(getTitle, setTitle, doc="The track's title.") + + def getArtist(self): + """Returns the main artist of this track. + + @return: an L{Artist} object, or None + """ + return self._artist + + def setArtist(self, artist): + """Sets this track's main artist. + + @param artist: an L{Artist} object + """ + self._artist = artist + + artist = property(getArtist, setArtist, doc="The track's main artist.") + + def getDuration(self): + """Returns the duration of this track in milliseconds. + + @return: an int containing the duration in milliseconds, or None + """ + return self._duration + + def setDuration(self, duration): + """Sets the duration of this track in milliseconds. + + @param duration: an int containing the duration in milliseconds + """ + self._duration = duration + + duration = property(getDuration, setDuration, + doc='The duration in milliseconds.') + + def getDurationSplit(self): + """Returns the duration as a (minutes, seconds) tuple. + + If no duration is set, (0, 0) is returned. Seconds are + rounded towards the ceiling if at least 500 milliseconds + are left. + + @return: a (minutes, seconds) tuple, both entries being ints + """ + duration = self.getDuration() + if duration is None: + return (0, 0) + else: + seconds = int( round(duration / 1000.0) ) + return (seconds / 60, seconds % 60) + + def getPuids(self): + """Returns the PUIDs associated with this track. + + Please note that a PUID may be associated with more than one + track. + + @return: a list of strings, each containing one PUID + """ + return self._puids + + puids = property(getPuids, doc='The list of associated PUIDs.') + + def addPuid(self, puid): + """Add a PUID to this track. + + @param puid: a string containing a PUID + """ + self._puids.append(puid) + + def getISRCs(self): + """Returns the ISRCs associated with this track. + + @return: a list of strings, each containing one ISRC + """ + return self._isrcs + + isrcs = property(getISRCs, doc='The list of associated ISRCs') + + def addISRC(self, isrc): + """Add a ISRC to this track. + + @param isrc: a string containing an ISRC + """ + self._isrcs.append(isrc) + + def getReleases(self): + """Returns the list of releases this track appears on. + + @return: a list of L{Release} objects + """ + return self._releases + + releases = property(getReleases, + doc='The releases on which this track appears.') + + def addRelease(self, release): + """Add a release on which this track appears. + + @param release: a L{Release} object + """ + self._releases.append(release) + + +class Relation(object): + """Represents a relation between two Entities. + + There may be an arbitrary number of relations between all first + class objects in MusicBrainz. The Relation itself has multiple + attributes, which may or may not be used for a given relation + type. + + Note that a L{Relation} object only contains the target but not + the source end of the relation. + + @todo: Add some examples. + + @cvar TO_ARTIST: Identifies relations linking to an artist. + @cvar TO_RELEASE: Identifies relations linking to a release. + @cvar TO_TRACK: Identifies relations linking to a track. + @cvar TO_URL: Identifies relations linking to an URL. + + @cvar DIR_NONE: Relation reading direction doesn't matter. + @cvar DIR_FORWARD: Relation reading direction is from source to target. + @cvar DIR_BACKWARD: Relation reading direction is from target to source. + @cvar DIR_BOTH: Relation reading direction doesn't matter (no longer used!). + """ + # Relation target types + # + TO_ARTIST = NS_REL_1 + 'Artist' + TO_RELEASE = NS_REL_1 + 'Release' + TO_TRACK = NS_REL_1 + 'Track' + TO_URL = NS_REL_1 + 'Url' + + # Relation reading directions + # + DIR_BOTH = 'both' + DIR_FORWARD = 'forward' + DIR_BACKWARD = 'backward' + DIR_NONE = 'none' + + def __init__(self, relationType=None, targetType=None, targetId=None, + direction=DIR_NONE, attributes=None, + beginDate=None, endDate=None, target=None): + """Constructor. + + @param relationType: a string containing an absolute URI + @param targetType: a string containing an absolute URI + @param targetId: a string containing an absolute URI + @param direction: one of C{Relation.DIR_FORWARD}, + C{Relation.DIR_BACKWARD}, or C{Relation.DIR_NONE} + @param attributes: a list of strings containing absolute URIs + @param beginDate: a string containing a date + @param endDate: a string containing a date + @param target: an instance of a subclass of L{Entity} + """ + self._relationType = relationType + self._targetType = targetType + self._targetId = targetId + self._direction = direction + self._beginDate = beginDate + self._endDate = endDate + self._target = target + self._attributes = attributes + if self._attributes is None: + self._attributes = [ ] + + def getType(self): + """Returns this relation's type. + + @return: a string containing an absolute URI, or None + """ + return self._relationType + + def setType(self, type_): + """Sets this relation's type. + + @param type_: a string containing an absolute URI + """ + self._relationType = type_ + + type = property(getType, setType, doc="The relation's type.") + + def getTargetId(self): + """Returns the target's ID. + + This is the ID the relation points to. It is an absolute + URI, and in case of an URL relation, it is a URL. + + @return: a string containing an absolute URI + """ + return self._targetId + + def setTargetId(self, targetId): + """Sets the target's ID. + + @param targetId: a string containing an absolute URI + + @see: L{getTargetId} + """ + self._targetId = targetId + + targetId = property(getTargetId, setTargetId, doc="The target's ID.") + + def getTargetType(self): + """Returns the target's type. + + For MusicBrainz data, the following target types are defined: + - artists: L{Relation.TO_ARTIST} + - releases: L{Relation.TO_RELEASE} + - tracks: L{Relation.TO_TRACK} + - urls: L{Relation.TO_URL} + + @return: a string containing an absolute URI + """ + return self._targetType + + def setTargetType(self, targetType): + """Sets the target's type. + + @param targetType: a string containing an absolute URI + + @see: L{getTargetType} + """ + self._targetType = targetType + + targetId = property(getTargetId, setTargetId, + doc="The type of target this relation points to.") + + def getAttributes(self): + """Returns a list of attributes describing this relation. + + The attributes permitted depend on the relation type. + + @return: a list of strings containing absolute URIs + """ + return self._attributes + + attributes = property(getAttributes, + doc='The list of attributes describing this relation.') + + def addAttribute(self, attribute): + """Adds an attribute to the list. + + @param attribute: a string containing an absolute URI + """ + self._attributes.append(attribute) + + def getBeginDate(self): + """Returns the begin date. + + The definition depends on the relation's type. It may for + example be the day of a marriage or the year an artist + joined a band. For other relation types this may be + undefined. + + @return: a string containing a date + """ + return self._beginDate + + def setBeginDate(self, dateStr): + """Sets the begin date. + + @param dateStr: a string containing a date + + @see: L{getBeginDate} + """ + self._beginDate = dateStr + + beginDate = property(getBeginDate, setBeginDate, doc="The begin date.") + + def getEndDate(self): + """Returns the end date. + + As with the begin date, the definition depends on the + relation's type. Depending on the relation type, this may + or may not be defined. + + @return: a string containing a date + + @see: L{getBeginDate} + """ + return self._endDate + + def setEndDate(self, dateStr): + """Sets the end date. + + @param dateStr: a string containing a date + + @see: L{getBeginDate} + """ + self._endDate = dateStr + + endDate = property(getEndDate, setEndDate, doc="The end date.") + + def getDirection(self): + """Returns the reading direction. + + The direction may be one of L{Relation.DIR_FORWARD}, + L{Relation.DIR_BACKWARD}, or L{Relation.DIR_NONE}, + depending on how the relation should be read. For example, + if direction is L{Relation.DIR_FORWARD} for a cover relation, + it is read as "X is a cover of Y". For some relations there is + no reading direction (like marriages) and the web service doesn't + send a direction. In these cases, the direction is set to + L{Relation.DIR_NONE}. + + @return: L{Relation.DIR_FORWARD}, L{Relation.DIR_BACKWARD}, + or L{Relation.DIR_NONE} + """ + return self._direction + + def setDirection(self, direction): + """Sets the reading direction. + + @param direction: L{Relation.DIR_FORWARD}, + L{Relation.DIR_BACKWARD}, or L{Relation.DIR_NONE} + + @see: L{getDirection} + """ + self._direction = direction + + direction = property(getDirection, setDirection, + doc="The reading direction.") + + def getTarget(self): + """Returns this relation's target object. + + Note that URL relations never have a target object. Use the + L{getTargetId} method to get the URL. + + @return: a subclass of L{Entity}, or None + """ + return self._target + + def setTarget(self, target): + """Sets this relation's target object. + + Note that URL relations never have a target object, they + are set using L{setTargetId}. + + @param target: a subclass of L{Entity} + """ + self._target = target + + target = property(getTarget, setTarget, + doc="The relation's target object.") + + +class ReleaseEvent(object): + """A release event, indicating where and when a release took place. + + All country codes used must be valid ISO-3166 country codes (i.e. 'DE', + 'UK' or 'FR'). The dates are strings and must have the format 'YYYY', + 'YYYY-MM' or 'YYYY-MM-DD'. + + The format of the release medium is a URI that can be compared to the + constants on this class (L{FORMAT_CD}, L{FORMAT_DVD} and others). + """ + FORMAT_CD = NS_MMD_1 + 'CD' + FORMAT_DVD = NS_MMD_1 + 'DVD' + FORMAT_SACD = NS_MMD_1 + 'SACD' + FORMAT_DUALDISC = NS_MMD_1 + 'DualDisc' + FORMAT_LASERDISC = NS_MMD_1 + 'LaserDisc' + FORMAT_MINIDISC = NS_MMD_1 + 'MiniDisc' + FORMAT_VINYL = NS_MMD_1 + 'Vinyl' + FORMAT_CASSETTE = NS_MMD_1 + 'Cassette' + FORMAT_CARTRIDGE = NS_MMD_1 + 'Cartridge' + FORMAT_REEL_TO_REEL = NS_MMD_1 + 'ReelToReel' + FORMAT_DAT = NS_MMD_1 + 'DAT' + FORMAT_DIGITAL = NS_MMD_1 + 'Digital' + FORMAT_WAX_CYLINDER = NS_MMD_1 + 'WaxCylinder' + FORMAT_PIANO_ROLL = NS_MMD_1 + 'PianoRoll' + FORMAT_OTHER = NS_MMD_1 + 'Other' + + def __init__(self, country=None, dateStr=None): + """Constructor. + + @param country: a string containing an ISO-3166 country code + @param dateStr: a string containing a date string + """ + self._countryId = country + self._dateStr = dateStr + self._catalogNumber = None + self._barcode = None + self._label = None + self._format = None + + def getCountry(self): + """Returns the country a release took place. + + @note: Due to a server limitation, the web service does not + return country IDs for release collection queries. This only + affects the L{musicbrainz2.webservice.Query.getReleases} query. + + @return: a string containing an ISO-3166 country code, or None + + @see: L{musicbrainz2.utils.getCountryName} + """ + return self._countryId + + def setCountry(self, country): + """Sets the country a release took place. + + @param country: a string containing an ISO-3166 country code + """ + self._countryId = country + + country = property(getCountry, setCountry, + doc='The country a release took place.') + + def getCatalogNumber(self): + """Returns the catalog number of this release event. + + @return: A string containing the catalog number, or None + """ + return self._catalogNumber + + def setCatalogNumber(self, catalogNumber): + """Sets the catalog number of this release event. + + @param catalogNumber: A string containing the catalog number + """ + self._catalogNumber = catalogNumber + + catalogNumber = property(getCatalogNumber, setCatalogNumber, + doc='The catalog number of the release event') + + def getBarcode(self): + """Returns the barcode of this release event. + + @return: A string containing the barcode, or None + """ + return self._barcode + + def setBarcode(self, barcode): + """Sets the barcode of this release event. + + @param barcode: A string containing the barcode + """ + self._barcode = barcode + + barcode = property(getBarcode, setBarcode, + doc='The barcode of the release event') + + def getLabel(self): + """Returns a L{Label} object for the label associated with this release. + + @return: a L{Label} object, or None + """ + return self._label + + def setLabel(self, label): + """Sets the label of this release event. + + @param label: A L{Label} object + """ + self._label = label + + label = property(getLabel, setLabel, doc='The label of the release') + + def getDate(self): + """Returns the date a release took place. + + @return: a string containing a date + """ + return self._dateStr + + def setDate(self, dateStr): + """Sets the date a release took place. + + @param dateStr: a string containing a date + """ + self._dateStr = dateStr + + date = property(getDate, setDate, doc='The date a release took place.') + + def getFormat(self): + """Returns the format of the release medium. + + @return: a string containing a URI, or None + """ + return self._format + + def setFormat(self, format): + """Sets the format of the release medium. + + @param format: a string containing a URI + """ + self._format = format + + format = property(getFormat, setFormat, + doc='The format of the release medium.') + + +class CDStub(object): + """Represents a CD Stub""" + + def __init__(self, disc): + """Constructor. + + @param disc: a L{Disc} object to create this CD Stub from + """ + assert isinstance(disc, Disc), 'musicbrainz2.model.Disc expected' + self._disc = disc + self._tracks = [ ] + self._title = "" + self._artist = "" + self._barcode = "" + self._comment = "" + + def setTitle(self, title): + """Sets the title of this release. + + @param title: a string containing the title + """ + self._title = title + + def getTitle(self): + """Returns the title of this release. + + @return: a string containing the title + """ + return self._title + + title = property(getTitle, setTitle, + doc='The title of the release') + + def setArtist(self, artist): + """Sets the artist of this release. + + @param artist: a string containing the artist + """ + self._artist = artist + + def getArtist(self): + """Returns the artist of this release. + + @return: a string containing the artist + """ + return self._artist + + artist = property(getArtist, setArtist, + doc='The artist of the release') + + def setComment(self, comment): + """Sets the comment for this release. + + @param comment: a string containing the comment + """ + self._comment = comment + + def getComment(self): + """Returns the comment for this release. + + @return: a string containing the comment + """ + return self._comment + + comment = property(getComment, setComment, + doc='Comment for the release (optional)') + + def setBarcode(self, barcode): + """Sets the barcode of this release. + + @param barcode: a string containing the barcode + """ + self._barcode = barcode + + def getBarcode(self): + """Returns the barcode of this release. + + @return: a string containing the barcode + """ + return self._barcode + + barcode = property(getBarcode, setBarcode, + doc='Barcode for the release (optional)') + + def addTrack(self, title, artist=''): + """Add a track to this release + + @param title: a string containing the title of the track + @param artist: a string containing the artist of the track, + if different to the album artist + """ + self._tracks.append((title, artist)) + + def getTracks(self): + """Return all the tracks on the release. + + @return: a list of tuples containing (title, artist) pairs + for each track + """ + return self._tracks + + tracks = property(getTracks, doc='The tracks of the release.') + +class Disc(object): + """Represents an Audio CD. + + This class represents an Audio CD. A disc can have an ID (the + MusicBrainz DiscID), which is calculated from the CD's table of + contents (TOC). There may also be data from the TOC like the length + of the disc in sectors, as well as position and length of the tracks. + + Note that different TOCs, maybe due to different pressings, lead to + different DiscIDs. Conversely, if two different discs have the same + TOC, they also have the same DiscID (which is unlikely but not + impossible). DiscIDs are always 28 characters long and look like this: + C{'J68I_CDcUFdCRCIbHSEbTBCbooA-'}. Sometimes they are also referred + to as CDIndex IDs. + + The L{MusicBrainz web service } only returns + the DiscID and the number of sectors. The DiscID calculation function + L{musicbrainz2.disc.readDisc}, however, can retrieve the other + attributes of L{Disc} from an Audio CD in the disc drive. + """ + def __init__(self, id_=None): + """Constructor. + + @param id_: a string containing a 28-character DiscID + """ + self._id = id_ + self._sectors = None + self._firstTrackNum = None + self._lastTrackNum = None + self._tracks = [ ] + + def getId(self): + """Returns the MusicBrainz DiscID. + + @return: a string containing a 28-character DiscID + """ + return self._id + + def setId(self, id_): + """Sets the MusicBrainz DiscId. + + @param id_: a string containing a 28-character DiscID + """ + self._id = id_ + + id = property(getId, setId, doc="The MusicBrainz DiscID.") + + def getSectors(self): + """Returns the length of the disc in sectors. + + @return: the length in sectors as an integer, or None + """ + return self._sectors + + def setSectors(self, sectors): + """Sets the length of the disc in sectors. + + @param sectors: the length in sectors as an integer + """ + self._sectors = sectors + + sectors = property(getSectors, setSectors, + doc="The length of the disc in sectors.") + + def getFirstTrackNum(self): + """Returns the number of the first track on this disc. + + @return: an int containing the track number, or None + """ + return self._firstTrackNum + + def setFirstTrackNum(self, trackNum): + """Sets the number of the first track on this disc. + + @param trackNum: an int containing the track number, or None + """ + self._firstTrackNum = trackNum + + firstTrackNum = property(getFirstTrackNum, setFirstTrackNum, + doc="The number of the first track on this disc.") + + def getLastTrackNum(self): + """Returns the number of the last track on this disc. + + @return: an int containing the track number, or None + """ + return self._lastTrackNum + + def setLastTrackNum(self, trackNum): + """Sets the number of the last track on this disc. + + @param trackNum: an int containing the track number, or None + """ + self._lastTrackNum = trackNum + + lastTrackNum = property(getLastTrackNum, setLastTrackNum, + doc="The number of the last track on this disc.") + + def getTracks(self): + """Returns the sector offset and length of this disc. + + This method returns a list of tuples containing the track + offset and length in sectors for all tracks on this disc. + The track offset is measured from the beginning of the disc, + the length is relative to the track's offset. Note that the + leadout track is I{not} included. + + @return: a list of (offset, length) tuples (values are ints) + """ + return self._tracks + + tracks = property(getTracks, + doc='Sector offset and length of all tracks.') + + def addTrack(self, track): + """Adds a track to the list. + + This method adds an (offset, length) tuple to the list of + tracks. The leadout track must I{not} be added. The total + length of the disc can be set using L{setSectors}. + + @param track: an (offset, length) tuple (values are ints) + + @see: L{getTracks} + """ + self._tracks.append(track) + + +class AbstractAlias(object): + """An abstract super class for all alias classes.""" + def __init__(self, value=None, type_=None, script=None): + """Constructor. + + @param value: a string containing the alias + @param type_: a string containing an absolute URI + @param script: a string containing an ISO-15924 script code + """ + self._value = value + self._type = type_ + self._script = script + + def getValue(self): + """Returns the alias. + + @return: a string containing the alias + """ + return self._value + + def setValue(self, value): + """Sets the alias. + + @param value: a string containing the alias + """ + self._value = value + + value = property(getValue, setValue, doc='The alias value.') + + def getType(self): + """Returns the alias type. + + @return: a string containing an absolute URI, or None + """ + return self._type + + def setType(self, type_): + """Sets the alias type. + + @param type_: a string containing an absolute URI, or None + """ + self._type = type_ + + type = property(getType, setType, doc='The alias type.') + + def getScript(self): + """Returns the alias script. + + @return: a string containing an ISO-15924 script code + """ + return self._script + + def setScript(self, script): + """Sets the alias script. + + @param script: a string containing an ISO-15924 script code + """ + self._script = script + + script = property(getScript, setScript, doc='The alias script.') + + +class ArtistAlias(AbstractAlias): + """Represents an artist alias. + + An alias (the I{alias value}) is a different representation of an + artist's name. This may be a common misspelling or a transliteration + (the I{alias type}). + + The I{alias script} is interesting mostly for transliterations and + indicates which script is used for the alias value. To represent the + script, ISO-15924 script codes like 'Latn', 'Cyrl', or 'Hebr' are used. + """ + pass + + +class LabelAlias(AbstractAlias): + """Represents a label alias. + + An alias (the I{alias value}) is a different representation of a + label's name. This may be a common misspelling or a transliteration + (the I{alias type}). + + The I{alias script} is interesting mostly for transliterations and + indicates which script is used for the alias value. To represent the + script, ISO-15924 script codes like 'Latn', 'Cyrl', or 'Hebr' are used. + """ + pass + + +class User(object): + """Represents a MusicBrainz user.""" + + def __init__(self): + """Constructor.""" + self._name = None + self._types = [ ] + self._showNag = None + + def getName(self): + """Returns the user name. + + @return: a string containing the user name + """ + return self._name + + def setName(self, name): + """Sets the user name. + + @param name: a string containing the user name + """ + self._name = name + + name = property(getName, setName, doc='The MusicBrainz user name.') + + def getTypes(self): + """Returns the types of this user. + + Most users' type list is empty. Currently, the following types + are defined: + + - 'http://musicbrainz.org/ns/ext-1.0#AutoEditor' + - 'http://musicbrainz.org/ns/ext-1.0#RelationshipEditor' + - 'http://musicbrainz.org/ns/ext-1.0#Bot' + - 'http://musicbrainz.org/ns/ext-1.0#NotNaggable' + + @return: a list of strings containing absolute URIs + """ + return self._types + + types = property(getTypes, doc="The user's types.") + + def addType(self, type_): + """Add a type to the list of types. + + @param type_: a string containing absolute URIs + + @see: L{getTypes} + """ + self._types.append(type_) + + def getShowNag(self): + """Returns true if a nag screen should be displayed to the user. + + @return: C{True}, C{False}, or None + """ + return self._showNag + + def setShowNag(self, value): + """Sets the value of the nag screen flag. + + If set to C{True}, + + @param value: C{True} or C{False} + + @see: L{getShowNag} + """ + self._showNag = value + + showNag = property(getShowNag, setShowNag, + doc='The value of the nag screen flag.') + +# EOF diff --git a/lib/musicbrainz2/utils.py b/lib/musicbrainz2/utils.py new file mode 100644 index 00000000..0eff7be8 --- /dev/null +++ b/lib/musicbrainz2/utils.py @@ -0,0 +1,204 @@ +"""Various utilities to simplify common tasks. + +This module contains helper functions to make common tasks easier. + +@author: Matthias Friedrich +""" +__revision__ = '$Id: utils.py 11853 2009-07-21 09:26:50Z luks $' + +import re +import urlparse +import os.path + +__all__ = [ + 'extractUuid', 'extractFragment', 'extractEntityType', + 'getReleaseTypeName', 'getCountryName', 'getLanguageName', + 'getScriptName', +] + + +# A pattern to split the path part of an absolute MB URI. +PATH_PATTERN = '^/(artist|release|track|label|release-group)/([^/]*)$' + + +def extractUuid(uriStr, resType=None): + """Extract the UUID part from a MusicBrainz identifier. + + This function takes a MusicBrainz ID (an absolute URI) as the input + and returns the UUID part of the URI, thus turning it into a relative + URI. If C{uriStr} is None or a relative URI, then it is returned + unchanged. + + The C{resType} parameter can be used for error checking. Set it to + 'artist', 'release', or 'track' to make sure C{uriStr} is a + syntactically valid MusicBrainz identifier of the given resource + type. If it isn't, a C{ValueError} exception is raised. + This error checking only works if C{uriStr} is an absolute URI, of + course. + + Example: + + >>> from musicbrainz2.utils import extractUuid + >>> extractUuid('http://musicbrainz.org/artist/c0b2500e-0cef-4130-869d-732b23ed9df5', 'artist') + 'c0b2500e-0cef-4130-869d-732b23ed9df5' + >>> + + @param uriStr: a string containing a MusicBrainz ID (an URI), or None + @param resType: a string containing a resource type + + @return: a string containing a relative URI, or None + + @raise ValueError: the given URI is no valid MusicBrainz ID + """ + if uriStr is None: + return None + + (scheme, netloc, path) = urlparse.urlparse(uriStr)[:3] + + if scheme == '': + return uriStr # no URI, probably already the UUID + + if scheme != 'http' or netloc != 'musicbrainz.org': + raise ValueError('%s is no MB ID.' % uriStr) + + m = re.match(PATH_PATTERN, path) + + if m: + if resType is None: + return m.group(2) + else: + if m.group(1) == resType: + return m.group(2) + else: + raise ValueError('expected "%s" Id' % resType) + else: + raise ValueError('%s is no valid MB ID.' % uriStr) + + +def extractFragment(uriStr, uriPrefix=None): + """Extract the fragment part from a URI. + + If C{uriStr} is None or no absolute URI, then it is returned unchanged. + + The C{uriPrefix} parameter can be used for error checking. If C{uriStr} + is an absolute URI, then the function checks if it starts with + C{uriPrefix}. If it doesn't, a C{ValueError} exception is raised. + + @param uriStr: a string containing an absolute URI + @param uriPrefix: a string containing an URI prefix + + @return: a string containing the fragment, or None + + @raise ValueError: the given URI doesn't start with C{uriPrefix} + """ + if uriStr is None: + return None + + (scheme, netloc, path, params, query, frag) = urlparse.urlparse(uriStr) + if scheme == '': + return uriStr # this is no URI + + if uriPrefix is None or uriStr.startswith(uriPrefix): + return frag + else: + raise ValueError("prefix doesn't match URI %s" % uriStr) + + +def extractEntityType(uriStr): + """Returns the entity type an entity URI is referring to. + + @param uriStr: a string containing an absolute entity URI + + @return: a string containing 'artist', 'release', 'track', or 'label' + + @raise ValueError: if the given URI is no valid MusicBrainz ID + """ + if uriStr is None: + raise ValueError('None is no valid entity URI') + + (scheme, netloc, path) = urlparse.urlparse(uriStr)[:3] + + if scheme == '': + raise ValueError('%s is no absolute MB ID.' % uriStr) + + if scheme != 'http' or netloc != 'musicbrainz.org': + raise ValueError('%s is no MB ID.' % uriStr) + + m = re.match(PATH_PATTERN, path) + + if m: + return m.group(1) + else: + raise ValueError('%s is no valid MB ID.' % uriStr) + + +def getReleaseTypeName(releaseType): + """Returns the name of a release type URI. + + @param releaseType: a string containing a release type URI + + @return: a string containing a printable name for the release type + + @see: L{musicbrainz2.model.Release} + """ + from musicbrainz2.data.releasetypenames import releaseTypeNames + return releaseTypeNames.get(releaseType) + + +def getCountryName(id_): + """Returns a country's name based on an ISO-3166 country code. + + The country table this function is based on has been modified for + MusicBrainz purposes by using the extension mechanism defined in + ISO-3166. All IDs are still valid ISO-3166 country codes, but some + IDs have been added to include historic countries and some of the + country names have been modified to make them better suited for + display purposes. + + If the country ID is not found, None is returned. This may happen + for example, when new countries are added to the MusicBrainz web + service which aren't known to this library yet. + + @param id_: a two-letter upper case string containing an ISO-3166 code + + @return: a string containing the country's name, or None + + @see: L{musicbrainz2.model} + """ + from musicbrainz2.data.countrynames import countryNames + return countryNames.get(id_) + + +def getLanguageName(id_): + """Returns a language name based on an ISO-639-2/T code. + + This function uses a subset of the ISO-639-2/T code table to map + language IDs (terminologic, not bibliographic ones!) to names. + + @param id_: a three-letter upper case string containing an ISO-639-2/T code + + @return: a string containing the language's name, or None + + @see: L{musicbrainz2.model} + """ + from musicbrainz2.data.languagenames import languageNames + return languageNames.get(id_) + + +def getScriptName(id_): + """Returns a script name based on an ISO-15924 code. + + This function uses a subset of the ISO-15924 code table to map + script IDs to names. + + @param id_: a four-letter string containing an ISO-15924 script code + + @return: a string containing the script's name, or None + + @see: L{musicbrainz2.model} + """ + from musicbrainz2.data.scriptnames import scriptNames + return scriptNames.get(id_) + + +# EOF diff --git a/lib/musicbrainz2/webservice.py b/lib/musicbrainz2/webservice.py new file mode 100644 index 00000000..c0b76868 --- /dev/null +++ b/lib/musicbrainz2/webservice.py @@ -0,0 +1,1519 @@ +"""Classes for interacting with the MusicBrainz XML web service. + +The L{WebService} class talks to a server implementing the MusicBrainz XML +web service. It mainly handles URL generation and network I/O. Use this +if maximum control is needed. + +The L{Query} class provides a convenient interface to the most commonly +used features of the web service. By default it uses L{WebService} to +retrieve data and the L{XML parser } to parse the +responses. The results are object trees using the L{MusicBrainz domain +model }. + +@author: Matthias Friedrich +""" +__revision__ = '$Id: webservice.py 12973 2011-04-29 11:49:31Z luks $' + +import re +import urllib +import urllib2 +import urlparse +import logging +import os.path +from StringIO import StringIO +import lib.musicbrainz2 as musicbrainz2 +from lib.musicbrainz2.model import Artist, Release, Track +from lib.musicbrainz2.wsxml import MbXmlParser, ParseError +import lib.musicbrainz2.utils as mbutils + +__all__ = [ + 'WebServiceError', 'AuthenticationError', 'ConnectionError', + 'RequestError', 'ResourceNotFoundError', 'ResponseError', + 'IIncludes', 'ArtistIncludes', 'ReleaseIncludes', 'TrackIncludes', + 'LabelIncludes', 'ReleaseGroupIncludes', + 'IFilter', 'ArtistFilter', 'ReleaseFilter', 'TrackFilter', + 'UserFilter', 'LabelFilter', 'ReleaseGroupFilter', + 'IWebService', 'WebService', 'Query', +] + + +class IWebService(object): + """An interface all concrete web service classes have to implement. + + All web service classes have to implement this and follow the + method specifications. + """ + + def get(self, entity, id_, include, filter, version): + """Query the web service. + + Using this method, you can either get a resource by id (using + the C{id_} parameter, or perform a query on all resources of + a type. + + The C{filter} and the C{id_} parameter exclude each other. If + you are using a filter, you may not set C{id_} and vice versa. + + Returns a file-like object containing the result or raises a + L{WebServiceError} or one of its subclasses in case of an + error. Which one is used depends on the implementing class. + + @param entity: a string containing the entity's name + @param id_: a string containing a UUID, or the empty string + @param include: a tuple containing values for the 'inc' parameter + @param filter: parameters, depending on the entity + @param version: a string containing the web service version to use + + @return: a file-like object + + @raise WebServiceError: in case of errors + """ + raise NotImplementedError() + + + def post(self, entity, id_, data, version): + """Submit data to the web service. + + @param entity: a string containing the entity's name + @param id_: a string containing a UUID, or the empty string + @param data: A string containing the data to post + @param version: a string containing the web service version to use + + @return: a file-like object + + @raise WebServiceError: in case of errors + """ + raise NotImplementedError() + + +class WebServiceError(Exception): + """A web service error has occurred. + + This is the base class for several other web service related + exceptions. + """ + + def __init__(self, msg='Webservice Error', reason=None): + """Constructor. + + Set C{msg} to an error message which explains why this + exception was raised. The C{reason} parameter should be the + original exception which caused this L{WebService} exception + to be raised. If given, it has to be an instance of + C{Exception} or one of its child classes. + + @param msg: a string containing an error message + @param reason: another exception instance, or None + """ + Exception.__init__(self) + self.msg = msg + self.reason = reason + + def __str__(self): + """Makes this class printable. + + @return: a string containing an error message + """ + return self.msg + + +class ConnectionError(WebServiceError): + """Getting a server connection failed. + + This exception is mostly used if the client couldn't connect to + the server because of an invalid host name or port. It doesn't + make sense if the web service in question doesn't use the network. + """ + pass + + +class RequestError(WebServiceError): + """An invalid request was made. + + This exception is raised if the client made an invalid request. + That could be syntactically invalid identifiers or unknown or + invalid parameter values. + """ + pass + + +class ResourceNotFoundError(WebServiceError): + """No resource with the given ID exists. + + This is usually a wrapper around IOError (which is superclass of + HTTPError). + """ + pass + + +class AuthenticationError(WebServiceError): + """Authentication failed. + + This is thrown if user name, password or realm were invalid while + trying to access a protected resource. + """ + pass + + +class ResponseError(WebServiceError): + """The returned resource was invalid. + + This may be due to a malformed XML document or if the requested + data wasn't part of the response. It can only occur in case of + bugs in the web service itself. + """ + pass + +class DigestAuthHandler(urllib2.HTTPDigestAuthHandler): + """Patched DigestAuthHandler to correctly handle Digest Auth according to RFC 2617. + + This will allow multiple qop values in the WWW-Authenticate header (e.g. "auth,auth-int"). + The only supported qop value is still auth, though. + See http://bugs.python.org/issue9714 + + @author Kuno Woudt + """ + def get_authorization(self, req, chal): + qop = chal.get('qop') + if qop and ',' in qop and 'auth' in qop.split(','): + chal['qop'] = 'auth' + + return urllib2.HTTPDigestAuthHandler.get_authorization(self, req, chal) + +class WebService(IWebService): + """An interface to the MusicBrainz XML web service via HTTP. + + By default, this class uses the MusicBrainz server but may be + configured for accessing other servers as well using the + L{constructor <__init__>}. This implements L{IWebService}, so + additional documentation on method parameters can be found there. + """ + + def __init__(self, host='musicbrainz.org', port=80, pathPrefix='/ws', + username=None, password=None, realm='musicbrainz.org', + opener=None): + """Constructor. + + This can be used without parameters. In this case, the + MusicBrainz server will be used. + + @param host: a string containing a host name + @param port: an integer containing a port number + @param pathPrefix: a string prepended to all URLs + @param username: a string containing a MusicBrainz user name + @param password: a string containing the user's password + @param realm: a string containing the realm used for authentication + @param opener: an C{urllib2.OpenerDirector} object used for queries + """ + self._host = host + self._port = port + self._username = username + self._password = password + self._realm = realm + self._pathPrefix = pathPrefix + self._log = logging.getLogger(str(self.__class__)) + + if opener is None: + self._opener = urllib2.build_opener() + else: + self._opener = opener + + passwordMgr = self._RedirectPasswordMgr() + authHandler = DigestAuthHandler(passwordMgr) + authHandler.add_password(self._realm, (), # no host set + self._username, self._password) + self._opener.add_handler(authHandler) + + + def _makeUrl(self, entity, id_, include=( ), filter={ }, + version='1', type_='xml'): + params = dict(filter) + if type_ is not None: + params['type'] = type_ + if len(include) > 0: + params['inc'] = ' '.join(include) + + netloc = self._host + if self._port != 80: + netloc += ':' + str(self._port) + path = '/'.join((self._pathPrefix, version, entity, id_)) + + query = urllib.urlencode(params) + + url = urlparse.urlunparse(('http', netloc, path, '', query,'')) + + return url + + + def _openUrl(self, url, data=None): + userAgent = 'python-musicbrainz/' + musicbrainz2.__version__ + req = urllib2.Request(url) + req.add_header('User-Agent', userAgent) + return self._opener.open(req, data) + + + def get(self, entity, id_, include=( ), filter={ }, version='1'): + """Query the web service via HTTP-GET. + + Returns a file-like object containing the result or raises a + L{WebServiceError}. Conditions leading to errors may be + invalid entities, IDs, C{include} or C{filter} parameters + and unsupported version numbers. + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid IDs or parameters + @raise AuthenticationError: invalid user name and/or password + @raise ResourceNotFoundError: resource doesn't exist + + @see: L{IWebService.get} + """ + url = self._makeUrl(entity, id_, include, filter, version) + + self._log.debug('GET ' + url) + + try: + return self._openUrl(url) + except urllib2.HTTPError, e: + self._log.debug("GET failed: " + str(e)) + if e.code == 400: # in python 2.4: httplib.BAD_REQUEST + raise RequestError(str(e), e) + elif e.code == 401: # httplib.UNAUTHORIZED + raise AuthenticationError(str(e), e) + elif e.code == 404: # httplib.NOT_FOUND + raise ResourceNotFoundError(str(e), e) + else: + raise WebServiceError(str(e), e) + except urllib2.URLError, e: + self._log.debug("GET failed: " + str(e)) + raise ConnectionError(str(e), e) + + + def post(self, entity, id_, data, version='1'): + """Send data to the web service via HTTP-POST. + + Note that this may require authentication. You can set + user name, password and realm in the L{constructor <__init__>}. + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid IDs or parameters + @raise AuthenticationError: invalid user name and/or password + @raise ResourceNotFoundError: resource doesn't exist + + @see: L{IWebService.post} + """ + url = self._makeUrl(entity, id_, version=version, type_=None) + + self._log.debug('POST ' + url) + self._log.debug('POST-BODY: ' + data) + + try: + return self._openUrl(url, data) + except urllib2.HTTPError, e: + self._log.debug("POST failed: " + str(e)) + if e.code == 400: # in python 2.4: httplib.BAD_REQUEST + raise RequestError(str(e), e) + elif e.code == 401: # httplib.UNAUTHORIZED + raise AuthenticationError(str(e), e) + elif e.code == 404: # httplib.NOT_FOUND + raise ResourceNotFoundError(str(e), e) + else: + raise WebServiceError(str(e), e) + except urllib2.URLError, e: + self._log.debug("POST failed: " + str(e)) + raise ConnectionError(str(e), e) + + + # Special password manager which also works with redirects by simply + # ignoring the URI. As a consequence, only *ONE* (username, password) + # tuple per realm can be used for all URIs. + # + class _RedirectPasswordMgr(urllib2.HTTPPasswordMgr): + def __init__(self): + self._realms = { } + + def find_user_password(self, realm, uri): + # ignoring the uri parameter intentionally + try: + return self._realms[realm] + except KeyError: + return (None, None) + + def add_password(self, realm, uri, username, password): + # ignoring the uri parameter intentionally + self._realms[realm] = (username, password) + + +class IFilter(object): + """A filter for collections. + + This is the interface all filters have to implement. Filter classes + are initialized with a set of criteria and are then applied to + collections of items. The criteria are usually strings or integer + values, depending on the filter. + + Note that all strings passed to filters should be unicode strings + (python type C{unicode}). Standard strings are converted to unicode + internally, but have a limitation: Only 7 Bit pure ASCII characters + may be used, otherwise a C{UnicodeDecodeError} is raised. + """ + def createParameters(self): + """Create a list of query parameters. + + This method creates a list of (C{parameter}, C{value}) tuples, + based on the contents of the implementing subclass. + C{parameter} is a string containing a parameter name + and C{value} an arbitrary string. No escaping of those strings + is required. + + @return: a sequence of (key, value) pairs + """ + raise NotImplementedError() + + +class ArtistFilter(IFilter): + """A filter for the artist collection.""" + + def __init__(self, name=None, limit=None, offset=None, query=None): + """Constructor. + + The C{query} parameter may contain a query in U{Lucene syntax + }. + Note that the C{name} and C{query} may not be used together. + + @param name: a unicode string containing the artist's name + @param limit: the maximum number of artists to return + @param offset: start results at this zero-based offset + @param query: a string containing a query in Lucene syntax + """ + self._params = [ + ('name', name), + ('limit', limit), + ('offset', offset), + ('query', query), + ] + + if not _paramsValid(self._params): + raise ValueError('invalid combination of parameters') + + def createParameters(self): + return _createParameters(self._params) + + +class LabelFilter(IFilter): + """A filter for the label collection.""" + + def __init__(self, name=None, limit=None, offset=None, query=None): + """Constructor. + + The C{query} parameter may contain a query in U{Lucene syntax + }. + Note that the C{name} and C{query} may not be used together. + + @param name: a unicode string containing the label's name + @param limit: the maximum number of labels to return + @param offset: start results at this zero-based offset + @param query: a string containing a query in Lucene syntax + """ + self._params = [ + ('name', name), + ('limit', limit), + ('offset', offset), + ('query', query), + ] + + if not _paramsValid(self._params): + raise ValueError('invalid combination of parameters') + + def createParameters(self): + return _createParameters(self._params) + +class ReleaseGroupFilter(IFilter): + """A filter for the release group collection.""" + + def __init__(self, title=None, releaseTypes=None, artistName=None, + artistId=None, limit=None, offset=None, query=None): + """Constructor. + + If C{artistId} is set, only releases matching those IDs are + returned. The C{releaseTypes} parameter allows you to limit + the types of the release groups returned. You can set it to + C{(Release.TYPE_ALBUM, Release.TYPE_OFFICIAL)}, for example, + to only get officially released albums. Note that those values + are connected using the I{AND} operator. MusicBrainz' support + is currently very limited, so C{Release.TYPE_LIVE} and + C{Release.TYPE_COMPILATION} exclude each other (see U{the + documentation on release attributes + } for more + information and all valid values). + + If both the C{artistName} and the C{artistId} parameter are + given, the server will ignore C{artistName}. + + The C{query} parameter may contain a query in U{Lucene syntax + }. + Note that C{query} may not be used together with the other + parameters except for C{limit} and C{offset}. + + @param title: a unicode string containing the release group's title + @param releaseTypes: a sequence of release type URIs + @param artistName: a unicode string containing the artist's name + @param artistId: a unicode string containing the artist's ID + @param limit: the maximum number of release groups to return + @param offset: start results at this zero-based offset + @param query: a string containing a query in Lucene syntax + + @see: the constants in L{musicbrainz2.model.Release} + """ + if releaseTypes is None or len(releaseTypes) == 0: + releaseTypesStr = None + else: + releaseTypesStr = ' '.join(map(mbutils.extractFragment, releaseTypes)) + + self._params = [ + ('title', title), + ('releasetypes', releaseTypesStr), + ('artist', artistName), + ('artistid', mbutils.extractUuid(artistId)), + ('limit', limit), + ('offset', offset), + ('query', query), + ] + + if not _paramsValid(self._params): + raise ValueError('invalid combination of parameters') + + def createParameters(self): + return _createParameters(self._params) + + +class ReleaseFilter(IFilter): + """A filter for the release collection.""" + + def __init__(self, title=None, discId=None, releaseTypes=None, + artistName=None, artistId=None, limit=None, + offset=None, query=None, trackCount=None): + """Constructor. + + If C{discId} or C{artistId} are set, only releases matching + those IDs are returned. The C{releaseTypes} parameter allows + to limit the types of the releases returned. You can set it to + C{(Release.TYPE_ALBUM, Release.TYPE_OFFICIAL)}, for example, + to only get officially released albums. Note that those values + are connected using the I{AND} operator. MusicBrainz' support + is currently very limited, so C{Release.TYPE_LIVE} and + C{Release.TYPE_COMPILATION} exclude each other (see U{the + documentation on release attributes + } for more + information and all valid values). + + If both the C{artistName} and the C{artistId} parameter are + given, the server will ignore C{artistName}. + + The C{query} parameter may contain a query in U{Lucene syntax + }. + Note that C{query} may not be used together with the other + parameters except for C{limit} and C{offset}. + + @param title: a unicode string containing the release's title + @param discId: a unicode string containing the DiscID + @param releaseTypes: a sequence of release type URIs + @param artistName: a unicode string containing the artist's name + @param artistId: a unicode string containing the artist's ID + @param limit: the maximum number of releases to return + @param offset: start results at this zero-based offset + @param query: a string containing a query in Lucene syntax + @param trackCount: the number of tracks in the release + + @see: the constants in L{musicbrainz2.model.Release} + """ + if releaseTypes is None or len(releaseTypes) == 0: + releaseTypesStr = None + else: + tmp = [ mbutils.extractFragment(x) for x in releaseTypes ] + releaseTypesStr = ' '.join(tmp) + + self._params = [ + ('title', title), + ('discid', discId), + ('releasetypes', releaseTypesStr), + ('artist', artistName), + ('artistid', mbutils.extractUuid(artistId)), + ('limit', limit), + ('offset', offset), + ('query', query), + ('count', trackCount), + ] + + if not _paramsValid(self._params): + raise ValueError('invalid combination of parameters') + + def createParameters(self): + return _createParameters(self._params) + + +class TrackFilter(IFilter): + """A filter for the track collection.""" + + def __init__(self, title=None, artistName=None, artistId=None, + releaseTitle=None, releaseId=None, + duration=None, puid=None, limit=None, offset=None, + query=None): + """Constructor. + + If C{artistId}, C{releaseId} or C{puid} are set, only tracks + matching those IDs are returned. + + The server will ignore C{artistName} and C{releaseTitle} if + C{artistId} or ${releaseId} are set respectively. + + The C{query} parameter may contain a query in U{Lucene syntax + }. + Note that C{query} may not be used together with the other + parameters except for C{limit} and C{offset}. + + @param title: a unicode string containing the track's title + @param artistName: a unicode string containing the artist's name + @param artistId: a string containing the artist's ID + @param releaseTitle: a unicode string containing the release's title + @param releaseId: a string containing the release's title + @param duration: the track's length in milliseconds + @param puid: a string containing a PUID + @param limit: the maximum number of releases to return + @param offset: start results at this zero-based offset + @param query: a string containing a query in Lucene syntax + """ + self._params = [ + ('title', title), + ('artist', artistName), + ('artistid', mbutils.extractUuid(artistId)), + ('release', releaseTitle), + ('releaseid', mbutils.extractUuid(releaseId)), + ('duration', duration), + ('puid', puid), + ('limit', limit), + ('offset', offset), + ('query', query), + ] + + if not _paramsValid(self._params): + raise ValueError('invalid combination of parameters') + + def createParameters(self): + return _createParameters(self._params) + + +class UserFilter(IFilter): + """A filter for the user collection.""" + + def __init__(self, name=None): + """Constructor. + + @param name: a unicode string containing a MusicBrainz user name + """ + self._name = name + + def createParameters(self): + if self._name is not None: + return [ ('name', self._name.encode('utf-8')) ] + else: + return [ ] + + +class IIncludes(object): + """An interface implemented by include tag generators.""" + def createIncludeTags(self): + raise NotImplementedError() + + +class ArtistIncludes(IIncludes): + """A specification on how much data to return with an artist. + + Example: + + >>> from musicbrainz2.model import Release + >>> from musicbrainz2.webservice import ArtistIncludes + >>> inc = ArtistIncludes(artistRelations=True, releaseRelations=True, + ... releases=(Release.TYPE_ALBUM, Release.TYPE_OFFICIAL)) + >>> + + The MusicBrainz server only supports some combinations of release + types for the C{releases} and C{vaReleases} include tags. At the + moment, not more than two release types should be selected, while + one of them has to be C{Release.TYPE_OFFICIAL}, + C{Release.TYPE_PROMOTION} or C{Release.TYPE_BOOTLEG}. + + @note: Only one of C{releases} and C{vaReleases} may be given. + """ + def __init__(self, aliases=False, releases=(), vaReleases=(), + artistRelations=False, releaseRelations=False, + trackRelations=False, urlRelations=False, tags=False, + ratings=False, releaseGroups=False): + + assert not isinstance(releases, basestring) + assert not isinstance(vaReleases, basestring) + assert len(releases) == 0 or len(vaReleases) == 0 + + self._includes = { + 'aliases': aliases, + 'artist-rels': artistRelations, + 'release-groups': releaseGroups, + 'release-rels': releaseRelations, + 'track-rels': trackRelations, + 'url-rels': urlRelations, + 'tags': tags, + 'ratings': ratings, + } + + for elem in releases: + self._includes['sa-' + mbutils.extractFragment(elem)] = True + + for elem in vaReleases: + self._includes['va-' + mbutils.extractFragment(elem)] = True + + def createIncludeTags(self): + return _createIncludes(self._includes) + + +class ReleaseIncludes(IIncludes): + """A specification on how much data to return with a release.""" + def __init__(self, artist=False, counts=False, releaseEvents=False, + discs=False, tracks=False, + artistRelations=False, releaseRelations=False, + trackRelations=False, urlRelations=False, + labels=False, tags=False, ratings=False, isrcs=False, + releaseGroup=False): + self._includes = { + 'artist': artist, + 'counts': counts, + 'labels': labels, + 'release-groups': releaseGroup, + 'release-events': releaseEvents, + 'discs': discs, + 'tracks': tracks, + 'artist-rels': artistRelations, + 'release-rels': releaseRelations, + 'track-rels': trackRelations, + 'url-rels': urlRelations, + 'tags': tags, + 'ratings': ratings, + 'isrcs': isrcs, + } + + # Requesting labels without releaseEvents makes no sense, + # so we pull in releaseEvents, if necessary. + if labels and not releaseEvents: + self._includes['release-events'] = True + # Ditto for isrcs with no tracks + if isrcs and not tracks: + self._includes['tracks'] = True + + def createIncludeTags(self): + return _createIncludes(self._includes) + + +class ReleaseGroupIncludes(IIncludes): + """A specification on how much data to return with a release group.""" + + def __init__(self, artist=False, releases=False, tags=False): + """Constructor. + + @param artist: Whether to include the release group's main artist info. + @param releases: Whether to include the release group's releases. + """ + self._includes = { + 'artist': artist, + 'releases': releases, + } + + def createIncludeTags(self): + return _createIncludes(self._includes) + + +class TrackIncludes(IIncludes): + """A specification on how much data to return with a track.""" + def __init__(self, artist=False, releases=False, puids=False, + artistRelations=False, releaseRelations=False, + trackRelations=False, urlRelations=False, tags=False, + ratings=False, isrcs=False): + self._includes = { + 'artist': artist, + 'releases': releases, + 'puids': puids, + 'artist-rels': artistRelations, + 'release-rels': releaseRelations, + 'track-rels': trackRelations, + 'url-rels': urlRelations, + 'tags': tags, + 'ratings': ratings, + 'isrcs': isrcs, + } + + def createIncludeTags(self): + return _createIncludes(self._includes) + + +class LabelIncludes(IIncludes): + """A specification on how much data to return with a label.""" + def __init__(self, aliases=False, tags=False, ratings=False): + self._includes = { + 'aliases': aliases, + 'tags': tags, + 'ratings': ratings, + } + + def createIncludeTags(self): + return _createIncludes(self._includes) + + +class Query(object): + """A simple interface to the MusicBrainz web service. + + This is a facade which provides a simple interface to the MusicBrainz + web service. It hides all the details like fetching data from a server, + parsing the XML and creating an object tree. Using this class, you can + request data by ID or search the I{collection} of all resources + (artists, releases, or tracks) to retrieve those matching given + criteria. This document contains examples to get you started. + + + Working with Identifiers + ======================== + + MusicBrainz uses absolute URIs as identifiers. For example, the artist + 'Tori Amos' is identified using the following URI:: + http://musicbrainz.org/artist/c0b2500e-0cef-4130-869d-732b23ed9df5 + + In some situations it is obvious from the context what type of + resource an ID refers to. In these cases, abbreviated identifiers may + be used, which are just the I{UUID} part of the URI. Thus the ID above + may also be written like this:: + c0b2500e-0cef-4130-869d-732b23ed9df5 + + All methods in this class which require IDs accept both the absolute + URI and the abbreviated form (aka the relative URI). + + + Creating a Query Object + ======================= + + In most cases, creating a L{Query} object is as simple as this: + + >>> import musicbrainz2.webservice as ws + >>> q = ws.Query() + >>> + + The instantiated object uses the standard L{WebService} class to + access the MusicBrainz web service. If you want to use a different + server or you have to pass user name and password because one of + your queries requires authentication, you have to create the + L{WebService} object yourself and configure it appropriately. + This example uses the MusicBrainz test server and also sets + authentication data: + + >>> import musicbrainz2.webservice as ws + >>> service = ws.WebService(host='test.musicbrainz.org', + ... username='whatever', password='secret') + >>> q = ws.Query(service) + >>> + + + Querying for Individual Resources + ================================= + + If the MusicBrainz ID of a resource is known, then the L{getArtistById}, + L{getReleaseById}, or L{getTrackById} method can be used to retrieve + it. Example: + + >>> import musicbrainz2.webservice as ws + >>> q = ws.Query() + >>> artist = q.getArtistById('c0b2500e-0cef-4130-869d-732b23ed9df5') + >>> artist.name + u'Tori Amos' + >>> artist.sortName + u'Amos, Tori' + >>> print artist.type + http://musicbrainz.org/ns/mmd-1.0#Person + >>> + + This returned just the basic artist data, however. To get more detail + about a resource, the C{include} parameters may be used which expect + an L{ArtistIncludes}, L{ReleaseIncludes}, or L{TrackIncludes} object, + depending on the resource type. + + To get data about a release which also includes the main artist + and all tracks, for example, the following query can be used: + + >>> import musicbrainz2.webservice as ws + >>> q = ws.Query() + >>> releaseId = '33dbcf02-25b9-4a35-bdb7-729455f33ad7' + >>> include = ws.ReleaseIncludes(artist=True, tracks=True) + >>> release = q.getReleaseById(releaseId, include) + >>> release.title + u'Tales of a Librarian' + >>> release.artist.name + u'Tori Amos' + >>> release.tracks[0].title + u'Precious Things' + >>> + + Note that the query gets more expensive for the server the more + data you request, so please be nice. + + + Searching in Collections + ======================== + + For each resource type (artist, release, and track), there is one + collection which contains all resources of a type. You can search + these collections using the L{getArtists}, L{getReleases}, and + L{getTracks} methods. The collections are huge, so you have to + use filters (L{ArtistFilter}, L{ReleaseFilter}, or L{TrackFilter}) + to retrieve only resources matching given criteria. + + For example, If you want to search the release collection for + releases with a specified DiscID, you would use L{getReleases} + and a L{ReleaseFilter} object: + + >>> import musicbrainz2.webservice as ws + >>> q = ws.Query() + >>> filter = ws.ReleaseFilter(discId='8jJklE258v6GofIqDIrE.c5ejBE-') + >>> results = q.getReleases(filter=filter) + >>> results[0].score + 100 + >>> results[0].release.title + u'Under the Pink' + >>> + + The query returns a list of results (L{wsxml.ReleaseResult} objects + in this case), which are ordered by score, with a higher score + indicating a better match. Note that those results don't contain + all the data about a resource. If you need more detail, you can then + use the L{getArtistById}, L{getReleaseById}, or L{getTrackById} + methods to request the resource. + + All filters support the C{limit} argument to limit the number of + results returned. This defaults to 25, but the server won't send + more than 100 results to save bandwidth and processing power. Using + C{limit} and the C{offset} parameter, you can page through the + results. + + + Error Handling + ============== + + All methods in this class raise a L{WebServiceError} exception in case + of errors. Depending on the method, a subclass of L{WebServiceError} may + be raised which allows an application to handle errors more precisely. + The following example handles connection errors (invalid host name + etc.) separately and all other web service errors in a combined + catch clause: + + >>> try: + ... artist = q.getArtistById('c0b2500e-0cef-4130-869d-732b23ed9df5') + ... except ws.ConnectionError, e: + ... pass # implement your error handling here + ... except ws.WebServiceError, e: + ... pass # catches all other web service errors + ... + >>> + """ + + def __init__(self, ws=None, wsFactory=WebService, clientId=None): + """Constructor. + + The C{ws} parameter has to be a subclass of L{IWebService}. + If it isn't given, the C{wsFactory} parameter is used to + create an L{IWebService} subclass. + + If the constructor is called without arguments, an instance + of L{WebService} is used, preconfigured to use the MusicBrainz + server. This should be enough for most users. + + If you want to use queries which require authentication you + have to pass a L{WebService} instance where user name and + password have been set. + + The C{clientId} parameter is required for data submission. + The format is C{'application-version'}, where C{application} + is your application's name and C{version} is a version + number which may not include a '-' character. + + @param ws: a subclass instance of L{IWebService}, or None + @param wsFactory: a callable object which creates an object + @param clientId: a unicode string containing the application's ID + """ + if ws is None: + self._ws = wsFactory() + else: + self._ws = ws + + self._clientId = clientId + self._log = logging.getLogger(str(self.__class__)) + + + def getArtistById(self, id_, include=None): + """Returns an artist. + + If no artist with that ID can be found, C{include} contains + invalid tags or there's a server problem, an exception is + raised. + + @param id_: a string containing the artist's ID + @param include: an L{ArtistIncludes} object, or None + + @return: an L{Artist } object, or None + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or include tags + @raise ResourceNotFoundError: artist doesn't exist + @raise ResponseError: server returned invalid data + """ + uuid = mbutils.extractUuid(id_, 'artist') + result = self._getFromWebService('artist', uuid, include) + artist = result.getArtist() + if artist is not None: + return artist + else: + raise ResponseError("server didn't return artist") + + + def getArtists(self, filter): + """Returns artists matching given criteria. + + @param filter: an L{ArtistFilter} object + + @return: a list of L{musicbrainz2.wsxml.ArtistResult} objects + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or include tags + @raise ResponseError: server returned invalid data + """ + result = self._getFromWebService('artist', '', filter=filter) + return result.getArtistResults() + + def getLabelById(self, id_, include=None): + """Returns a L{model.Label} + + If no label with that ID can be found, or there is a server problem, + an exception is raised. + + @param id_: a string containing the label's ID. + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or include tags + @raise ResourceNotFoundError: release doesn't exist + @raise ResponseError: server returned invalid data + """ + uuid = mbutils.extractUuid(id_, 'label') + result = self._getFromWebService('label', uuid, include) + label = result.getLabel() + if label is not None: + return label + else: + raise ResponseError("server didn't return a label") + + def getLabels(self, filter): + result = self._getFromWebService('label', '', filter=filter) + return result.getLabelResults() + + def getReleaseById(self, id_, include=None): + """Returns a release. + + If no release with that ID can be found, C{include} contains + invalid tags or there's a server problem, and exception is + raised. + + @param id_: a string containing the release's ID + @param include: a L{ReleaseIncludes} object, or None + + @return: a L{Release } object, or None + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or include tags + @raise ResourceNotFoundError: release doesn't exist + @raise ResponseError: server returned invalid data + """ + uuid = mbutils.extractUuid(id_, 'release') + result = self._getFromWebService('release', uuid, include) + release = result.getRelease() + if release is not None: + return release + else: + raise ResponseError("server didn't return release") + + + def getReleases(self, filter): + """Returns releases matching given criteria. + + @param filter: a L{ReleaseFilter} object + + @return: a list of L{musicbrainz2.wsxml.ReleaseResult} objects + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or include tags + @raise ResponseError: server returned invalid data + """ + result = self._getFromWebService('release', '', filter=filter) + return result.getReleaseResults() + + def getReleaseGroupById(self, id_, include=None): + """Returns a release group. + + If no release group with that ID can be found, C{include} + contains invalid tags, or there's a server problem, an + exception is raised. + + @param id_: a string containing the release group's ID + @param include: a L{ReleaseGroupIncludes} object, or None + + @return: a L{ReleaseGroup } object, or None + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or include tags + @raise ResourceNotFoundError: release doesn't exist + @raise ResponseError: server returned invalid data + """ + uuid = mbutils.extractUuid(id_, 'release-group') + result = self._getFromWebService('release-group', uuid, include) + releaseGroup = result.getReleaseGroup() + if releaseGroup is not None: + return releaseGroup + else: + raise ResponseError("server didn't return releaseGroup") + + def getReleaseGroups(self, filter): + """Returns release groups matching the given criteria. + + @param filter: a L{ReleaseGroupFilter} object + + @return: a list of L{musicbrainz2.wsxml.ReleaseGroupResult} objects + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or include tags + @raise ResponseError: server returned invalid data + """ + result = self._getFromWebService('release-group', '', filter=filter) + return result.getReleaseGroupResults() + + def getTrackById(self, id_, include=None): + """Returns a track. + + If no track with that ID can be found, C{include} contains + invalid tags or there's a server problem, an exception is + raised. + + @param id_: a string containing the track's ID + @param include: a L{TrackIncludes} object, or None + + @return: a L{Track } object, or None + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or include tags + @raise ResourceNotFoundError: track doesn't exist + @raise ResponseError: server returned invalid data + """ + uuid = mbutils.extractUuid(id_, 'track') + result = self._getFromWebService('track', uuid, include) + track = result.getTrack() + if track is not None: + return track + else: + raise ResponseError("server didn't return track") + + + def getTracks(self, filter): + """Returns tracks matching given criteria. + + @param filter: a L{TrackFilter} object + + @return: a list of L{musicbrainz2.wsxml.TrackResult} objects + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or include tags + @raise ResponseError: server returned invalid data + """ + result = self._getFromWebService('track', '', filter=filter) + return result.getTrackResults() + + + def getUserByName(self, name): + """Returns information about a MusicBrainz user. + + You can only request user data if you know the user name and + password for that account. If username and/or password are + incorrect, an L{AuthenticationError} is raised. + + See the example in L{Query} on how to supply user name and + password. + + @param name: a unicode string containing the user's name + + @return: a L{User } object + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or include tags + @raise AuthenticationError: invalid user name and/or password + @raise ResourceNotFoundError: track doesn't exist + @raise ResponseError: server returned invalid data + """ + filter = UserFilter(name=name) + result = self._getFromWebService('user', '', None, filter) + + if len(result.getUserList()) > 0: + return result.getUserList()[0] + else: + raise ResponseError("response didn't contain user data") + + + def _getFromWebService(self, entity, id_, include=None, filter=None): + if filter is None: + filterParams = [ ] + else: + filterParams = filter.createParameters() + + if include is None: + includeParams = [ ] + else: + includeParams = include.createIncludeTags() + + stream = self._ws.get(entity, id_, includeParams, filterParams) + try: + parser = MbXmlParser() + return parser.parse(stream) + except ParseError, e: + raise ResponseError(str(e), e) + + + def submitPuids(self, tracks2puids): + """Submit track to PUID mappings. + + The C{tracks2puids} parameter has to be a dictionary, with the + keys being MusicBrainz track IDs (either as absolute URIs or + in their 36 character ASCII representation) and the values + being PUIDs (ASCII, 36 characters). + + Note that this method only works if a valid user name and + password have been set. See the example in L{Query} on how + to supply authentication data. + + @param tracks2puids: a dictionary mapping track IDs to PUIDs + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid track or PUIDs + @raise AuthenticationError: invalid user name and/or password + """ + assert self._clientId is not None, 'Please supply a client ID' + params = [ ] + params.append( ('client', self._clientId.encode('utf-8')) ) + + for (trackId, puid) in tracks2puids.iteritems(): + trackId = mbutils.extractUuid(trackId, 'track') + params.append( ('puid', trackId + ' ' + puid) ) + + encodedStr = urllib.urlencode(params, True) + + self._ws.post('track', '', encodedStr) + + def submitISRCs(self, tracks2isrcs): + """Submit track to ISRC mappings. + + The C{tracks2isrcs} parameter has to be a dictionary, with the + keys being MusicBrainz track IDs (either as absolute URIs or + in their 36 character ASCII representation) and the values + being ISRCs (ASCII, 12 characters). + + Note that this method only works if a valid user name and + password have been set. See the example in L{Query} on how + to supply authentication data. + + @param tracks2isrcs: a dictionary mapping track IDs to ISRCs + + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid track or ISRCs + @raise AuthenticationError: invalid user name and/or password + """ + params = [ ] + + for (trackId, isrc) in tracks2isrcs.iteritems(): + trackId = mbutils.extractUuid(trackId, 'track') + params.append( ('isrc', trackId + ' ' + isrc) ) + + encodedStr = urllib.urlencode(params, True) + + self._ws.post('track', '', encodedStr) + + def addToUserCollection(self, releases): + """Add releases to a user's collection. + + The releases parameter must be a list. It can contain either L{Release} + objects or a string representing a MusicBrainz release ID (either as + absolute URIs or in their 36 character ASCII representation). + + Adding a release that is already in the collection has no effect. + + @param releases: a list of releases to add to the user collection + + @raise ConnectionError: couldn't connect to server + @raise AuthenticationError: invalid user name and/or password + """ + rels = [] + for release in releases: + if isinstance(release, Release): + rels.append(mbutils.extractUuid(release.id)) + else: + rels.append(mbutils.extractUuid(release)) + encodedStr = urllib.urlencode({'add': ",".join(rels)}, True) + self._ws.post('collection', '', encodedStr) + + def removeFromUserCollection(self, releases): + """Remove releases from a user's collection. + + The releases parameter must be a list. It can contain either L{Release} + objects or a string representing a MusicBrainz release ID (either as + absolute URIs or in their 36 character ASCII representation). + + Removing a release that is not in the collection has no effect. + + @param releases: a list of releases to remove from the user collection + + @raise ConnectionError: couldn't connect to server + @raise AuthenticationError: invalid user name and/or password + """ + rels = [] + for release in releases: + if isinstance(release, Release): + rels.append(mbutils.extractUuid(release.id)) + else: + rels.append(mbutils.extractUuid(release)) + encodedStr = urllib.urlencode({'remove': ",".join(rels)}, True) + self._ws.post('collection', '', encodedStr) + + def getUserCollection(self, offset=0, maxitems=100): + """Get the releases that are in a user's collection + + A maximum of 100 items will be returned for any one call + to this method. To fetch more than 100 items, use the offset + parameter. + + @param offset: the offset to start fetching results from + @param maxitems: the upper limit on items to return + + @return: a list of L{musicbrainz2.wsxml.ReleaseResult} objects + + @raise ConnectionError: couldn't connect to server + @raise AuthenticationError: invalid user name and/or password + """ + params = { 'offset': offset, 'maxitems': maxitems } + + stream = self._ws.get('collection', '', filter=params) + print stream + try: + parser = MbXmlParser() + result = parser.parse(stream) + except ParseError, e: + raise ResponseError(str(e), e) + + return result.getReleaseResults() + + def submitUserTags(self, entityUri, tags): + """Submit folksonomy tags for an entity. + + Note that all previously existing tags from the authenticated + user are replaced with the ones given to this method. Other + users' tags are not affected. + + @param entityUri: a string containing an absolute MB ID + @param tags: A list of either L{Tag } objects + or strings + + @raise ValueError: invalid entityUri + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID, entity or tags + @raise AuthenticationError: invalid user name and/or password + """ + entity = mbutils.extractEntityType(entityUri) + uuid = mbutils.extractUuid(entityUri, entity) + params = ( + ('type', 'xml'), + ('entity', entity), + ('id', uuid), + ('tags', ','.join([unicode(tag).encode('utf-8') for tag in tags])) + ) + + encodedStr = urllib.urlencode(params) + + self._ws.post('tag', '', encodedStr) + + + def getUserTags(self, entityUri): + """Returns a list of folksonomy tags a user has applied to an entity. + + The given parameter has to be a fully qualified MusicBrainz ID, as + returned by other library functions. + + Note that this method only works if a valid user name and + password have been set. Only the tags the authenticated user + applied to the entity will be returned. If username and/or + password are incorrect, an AuthenticationError is raised. + + This method will return a list of L{Tag } + objects. + + @param entityUri: a string containing an absolute MB ID + + @raise ValueError: invalid entityUri + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or entity + @raise AuthenticationError: invalid user name and/or password + """ + entity = mbutils.extractEntityType(entityUri) + uuid = mbutils.extractUuid(entityUri, entity) + params = { 'entity': entity, 'id': uuid } + + stream = self._ws.get('tag', '', filter=params) + try: + parser = MbXmlParser() + result = parser.parse(stream) + except ParseError, e: + raise ResponseError(str(e), e) + + return result.getTagList() + + def submitUserRating(self, entityUri, rating): + """Submit rating for an entity. + + Note that all previously existing rating from the authenticated + user are replaced with the one given to this method. Other + users' ratings are not affected. + + @param entityUri: a string containing an absolute MB ID + @param rating: A L{Rating } object + or integer + + @raise ValueError: invalid entityUri + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID, entity or tags + @raise AuthenticationError: invalid user name and/or password + """ + entity = mbutils.extractEntityType(entityUri) + uuid = mbutils.extractUuid(entityUri, entity) + params = ( + ('type', 'xml'), + ('entity', entity), + ('id', uuid), + ('rating', unicode(rating).encode('utf-8')) + ) + + encodedStr = urllib.urlencode(params) + + self._ws.post('rating', '', encodedStr) + + + def getUserRating(self, entityUri): + """Return the rating a user has applied to an entity. + + The given parameter has to be a fully qualified MusicBrainz + ID, as returned by other library functions. + + Note that this method only works if a valid user name and + password have been set. Only the rating the authenticated user + applied to the entity will be returned. If username and/or + password are incorrect, an AuthenticationError is raised. + + This method will return a L{Rating } + object. + + @param entityUri: a string containing an absolute MB ID + + @raise ValueError: invalid entityUri + @raise ConnectionError: couldn't connect to server + @raise RequestError: invalid ID or entity + @raise AuthenticationError: invalid user name and/or password + """ + entity = mbutils.extractEntityType(entityUri) + uuid = mbutils.extractUuid(entityUri, entity) + params = { 'entity': entity, 'id': uuid } + + stream = self._ws.get('rating', '', filter=params) + try: + parser = MbXmlParser() + result = parser.parse(stream) + except ParseError, e: + raise ResponseError(str(e), e) + + return result.getRating() + + def submitCDStub(self, cdstub): + """Submit a CD Stub to the database. + + The number of tracks added to the CD Stub must match the TOC and DiscID + otherwise the submission wil fail. The submission will also fail if + the Disc ID is already in the MusicBrainz database. + + This method will only work if no user name and password are set. + + @param cdstub: a L{CDStub} object to submit + + @raise RequestError: Missmatching TOC/Track information or the + the CD Stub already exists or the Disc ID already exists + """ + assert self._clientId is not None, 'Please supply a client ID' + disc = cdstub._disc + params = [ ] + params.append( ('client', self._clientId.encode('utf-8')) ) + params.append( ('discid', disc.id) ) + params.append( ('title', cdstub.title) ) + params.append( ('artist', cdstub.artist) ) + if cdstub.barcode != "": + params.append( ('barcode', cdstub.barcode) ) + if cdstub.comment != "": + params.append( ('comment', cdstub.comment) ) + + trackind = 0 + for track,artist in cdstub.tracks: + params.append( ('track%d' % trackind, track) ) + if artist != "": + params.append( ('artist%d' % trackind, artist) ) + + trackind += 1 + + toc = "%d %d %d " % (disc.firstTrackNum, disc.lastTrackNum, disc.sectors) + toc = toc + ' '.join( map(lambda x: str(x[0]), disc.getTracks()) ) + + params.append( ('toc', toc) ) + + encodedStr = urllib.urlencode(params) + self._ws.post('release', '', encodedStr) + +def _createIncludes(tagMap): + selected = filter(lambda x: x[1] == True, tagMap.items()) + return map(lambda x: x[0], selected) + +def _createParameters(params): + """Remove (x, None) tuples and encode (x, str/unicode) to utf-8.""" + ret = [ ] + for p in params: + if isinstance(p[1], (str, unicode)): + ret.append( (p[0], p[1].encode('utf-8')) ) + elif p[1] is not None: + ret.append(p) + + return ret + +def _paramsValid(params): + """Check if the query parameter collides with other parameters.""" + tmp = [ ] + for name, value in params: + if value is not None and name not in ('offset', 'limit'): + tmp.append(name) + + if 'query' in tmp and len(tmp) > 1: + return False + else: + return True + +if __name__ == '__main__': + import doctest + doctest.testmod() + +# EOF diff --git a/lib/musicbrainz2/wsxml.py b/lib/musicbrainz2/wsxml.py new file mode 100644 index 00000000..7d031ca5 --- /dev/null +++ b/lib/musicbrainz2/wsxml.py @@ -0,0 +1,1675 @@ +"""A parser for the Music Metadata XML Format (MMD). + +This module contains L{MbXmlParser}, which parses the U{Music Metadata XML +Format (MMD) } returned by the +MusicBrainz webservice. + +There are also DOM helper functions in this module used by the parser which +probably aren't useful to users. +""" +__revision__ = '$Id: wsxml.py 12028 2009-09-01 13:15:50Z matt $' + +import re +import logging +import urlparse +import xml.dom.minidom +import xml.sax.saxutils as saxutils +from xml.parsers.expat import ExpatError +from xml.dom import DOMException + +import lib.musicbrainz2.utils as mbutils +import lib.musicbrainz2.model as model +from lib.musicbrainz2.model import NS_MMD_1, NS_REL_1, NS_EXT_1 + +__all__ = [ + 'DefaultFactory', 'Metadata', 'ParseError', + 'MbXmlParser', 'MbXmlWriter', + 'AbstractResult', + 'ArtistResult', 'ReleaseResult', 'TrackResult', 'LabelResult', + 'ReleaseGroupResult' +] + + +class DefaultFactory(object): + """A factory to instantiate classes from the domain model. + + This factory may be used to create objects from L{musicbrainz2.model}. + """ + def newArtist(self): return model.Artist() + def newRelease(self): return model.Release() + def newReleaseGroup(self): return model.ReleaseGroup() + def newTrack(self): return model.Track() + def newRelation(self): return model.Relation() + def newReleaseEvent(self): return model.ReleaseEvent() + def newDisc(self): return model.Disc() + def newArtistAlias(self): return model.ArtistAlias() + def newUser(self): return model.User() + def newLabel(self): return model.Label() + def newLabelAlias(self): return model.LabelAlias() + def newTag(self): return model.Tag() + def newRating(self): return model.Rating() + + +class ParseError(Exception): + """Exception to be thrown if a parse error occurs. + + The C{'msg'} attribute contains a printable error message, C{'reason'} + is the lower level exception that was raised. + """ + + def __init__(self, msg='Parse Error', reason=None): + Exception.__init__(self) + self.msg = msg + self.reason = reason + + def __str__(self): + return self.msg + + +class Metadata(object): + """Represents a parsed Music Metadata XML document. + + The Music Metadata XML format is very flexible and may contain a + diverse set of data (e.g. an artist, a release and a list of tracks), + but usually only a small subset is used (either an artist, a release + or a track, or a lists of objects from one class). + + @see: L{MbXmlParser} for reading, and L{MbXmlWriter} for writing + Metadata objects + """ + def __init__(self): + self._artist = None + self._release = None + self._track = None + self._label = None + self._releaseGroup = None + self._artistResults = [ ] + self._artistResultsOffset = None + self._artistResultsCount = None + self._releaseResults = [ ] + self._releaseResultsOffset = None + self._releaseResultsCount = None + self._releaseGroupResults = [ ] + self._releaseGroupResultsOffset = None + self._releaseGroupResultsCount = None + self._trackResults = [ ] + self._trackResultsOffset = None + self._trackResultsCount = None + self._labelResults = [ ] + self._labelResultsOffset = None + self._labelResultsCount = None + self._tagList = [ ] + self._rating = None + self._userList = [ ] + + def getArtist(self): + return self._artist + + def setArtist(self, artist): + self._artist = artist + + artist = property(getArtist, setArtist, doc='An Artist object.') + + def getLabel(self): + return self._label + + def setLabel(self, label): + self._label = label + + label = property(getLabel, setLabel, doc='A Label object.') + + def getRelease(self): + return self._release + + def setRelease(self, release): + self._release = release + + release = property(getRelease, setRelease, doc='A Release object.') + + def getReleaseGroup(self): + return self._releaseGroup + + def setReleaseGroup(self, releaseGroup): + self._releaseGroup = releaseGroup + + releaseGroup = property(getReleaseGroup, setReleaseGroup) + + def getTrack(self): + return self._track + + def setTrack(self, track): + self._track = track + + track = property(getTrack, setTrack, doc='A Track object.') + + def getArtistResults(self): + """Returns an artist result list. + + @return: a list of L{ArtistResult} objects. + """ + return self._artistResults + + artistResults = property(getArtistResults, + doc='A list of ArtistResult objects.') + + def getArtistResultsOffset(self): + """Returns the offset of the artist result list. + + The offset is used for paging through the result list. It + is zero-based. + + @return: an integer containing the offset, or None + + @see: L{getArtistResults}, L{getArtistResultsCount} + """ + return self._artistResultsOffset + + def setArtistResultsOffset(self, value): + """Sets the offset of the artist result list. + + @param value: an integer containing the offset, or None + + @see: L{getArtistResultsOffset} + """ + self._artistResultsOffset = value + + artistResultsOffset = property( + getArtistResultsOffset, setArtistResultsOffset, + doc='The offset of the artist results.') + + def getArtistResultsCount(self): + """Returns the total number of results available. + + This may or may not match with the number of elements that + L{getArtistResults} returns. If the count is higher than + the list, it indicates that the list is incomplete. + + @return: an integer containing the count, or None + + @see: L{setArtistResultsCount}, L{getArtistResultsOffset} + """ + return self._artistResultsCount + + def setArtistResultsCount(self, value): + """Sets the total number of available results. + + @param value: an integer containing the count, or None + + @see: L{getArtistResults}, L{setArtistResultsOffset} + """ + self._artistResultsCount = value + + artistResultsCount = property( + getArtistResultsCount, setArtistResultsCount, + doc='The total number of artists results.') + + def getLabelResults(self): + """Returns a label result list. + + @return: a list of L{LabelResult} objects. + """ + return self._labelResults + + labelResults = property(getLabelResults, + doc='A list of LabelResult objects') + + def getLabelResultsOffset(self): + """Returns the offset of the label result list. + + The offset is used for paging through the result list. It + is zero-based. + + @return: an integer containing the offset, or None + + @see: L{getLabelResults}, L{getLabelResultsCount} + """ + return self._labelResultsOffset + + def setLabelResultsOffset(self, value): + """Sets the offset of the label result list. + + @param value: an integer containing the offset, or None + + @see: L{getLabelResultsOffset} + """ + self._labelResultsOffset = value + + labelResultsOffset = property( + getLabelResultsOffset, setLabelResultsOffset, + doc='The offset of the label results.') + + def getLabelResultsCount(self): + """Returns the total number of results available. + + This may or may not match with the number of elements that + L{getLabelResults} returns. If the count is higher than + the list, it indicates that the list is incomplete. + + @return: an integer containing the count, or None + + @see: L{setLabelResultsCount}, L{getLabelResultsOffset} + """ + return self._labelResultsCount + + def setLabelResultsCount(self, value): + """Sets the total number of available results. + + @param value: an integer containing the count, or None + + @see: L{getLabelResults}, L{setLabelResultsOffset} + """ + self._labelResultsCount = value + + labelResultsCount = property( + getLabelResultsCount, setLabelResultsCount, + doc='The total number of label results.') + + def getReleaseResults(self): + """Returns a release result list. + + @return: a list of L{ReleaseResult} objects. + """ + return self._releaseResults + + releaseResults = property(getReleaseResults, + doc='A list of ReleaseResult objects.') + + def getReleaseResultsOffset(self): + """Returns the offset of the release result list. + + The offset is used for paging through the result list. It + is zero-based. + + @return: an integer containing the offset, or None + + @see: L{getReleaseResults}, L{getReleaseResultsCount} + """ + return self._releaseResultsOffset + + def setReleaseResultsOffset(self, value): + """Sets the offset of the release result list. + + @param value: an integer containing the offset, or None + + @see: L{getReleaseResultsOffset} + """ + self._releaseResultsOffset = value + + releaseResultsOffset = property( + getReleaseResultsOffset, setReleaseResultsOffset, + doc='The offset of the release results.') + + def getReleaseResultsCount(self): + """Returns the total number of results available. + + This may or may not match with the number of elements that + L{getReleaseResults} returns. If the count is higher than + the list, it indicates that the list is incomplete. + + @return: an integer containing the count, or None + + @see: L{setReleaseResultsCount}, L{getReleaseResultsOffset} + """ + return self._releaseResultsCount + + def setReleaseResultsCount(self, value): + """Sets the total number of available results. + + @param value: an integer containing the count, or None + + @see: L{getReleaseResults}, L{setReleaseResultsOffset} + """ + self._releaseResultsCount = value + + releaseResultsCount = property( + getReleaseResultsCount, setReleaseResultsCount, + doc='The total number of release results.') + + def getReleaseGroupResults(self): + """Returns a release group result list. + + @return: a list of L{ReleaseGroupResult} objects. + """ + return self._releaseGroupResults + + releaseGroupResults = property(getReleaseGroupResults, + doc = 'A list of ReleaseGroupResult objects.') + + def getReleaseGroupResultsOffset(self): + """Returns the offset of the release group result list. + + The offset is used for paging through the result list. It + is zero-based. + + @return: an integer containing the offset, or None. + + @see: L{getReleaseGroupResults}, L{getReleaseGroupResultsCount} + """ + return self._releaseGroupResultsOffset + + def setReleaseGroupResultsOffset(self, value): + """Sets the offset of the release group result list. + + @param value: an integer containing the offset, or None + + @see: L{getReleaseGroupResultsOffset} + """ + self._releaseGroupResultsOffset = value + + releaseGroupResultsOffset = property( + getReleaseGroupResultsOffset, setReleaseGroupResultsOffset, + doc='The offset of the release group results.') + + def getReleaseGroupResultsCount(self): + """Returns the total number of results available. + + This may or may not match with the number of elements that + L{getReleaseGroupResults} returns. If the count is higher than + the list, it indicates that the list is incomplete. + + @return: an integer containing the count, or None + + @see: L{setReleaseGroupResultsCount}, L{getReleaseGroupResultsOffset} + """ + return self._releaseGroupResultsCount + + def setReleaseGroupResultsCount(self, value): + """Sets the total number of available results. + + @param value: an integer containing the count, or None + + @see: L{getReleaseGroupResults}, L{setReleaseGroupResultsOffset} + """ + self._releaseGroupResultsCount = value + + releaseGroupResultsCount = property( + getReleaseGroupResultsCount, setReleaseGroupResultsCount, + doc='The total number of release group results.') + + def getTrackResults(self): + """Returns a track result list. + + @return: a list of L{TrackResult} objects. + """ + return self._trackResults + + trackResults = property(getTrackResults, + doc='A list of TrackResult objects.') + + def getTrackResultsOffset(self): + """Returns the offset of the track result list. + + The offset is used for paging through the result list. It + is zero-based. + + @return: an integer containing the offset, or None + + @see: L{getTrackResults}, L{getTrackResultsCount} + """ + return self._trackResultsOffset + + def setTrackResultsOffset(self, value): + """Sets the offset of the track result list. + + @param value: an integer containing the offset, or None + + @see: L{getTrackResultsOffset} + """ + self._trackResultsOffset = value + + trackResultsOffset = property( + getTrackResultsOffset, setTrackResultsOffset, + doc='The offset of the track results.') + + def getTrackResultsCount(self): + """Returns the total number of results available. + + This may or may not match with the number of elements that + L{getTrackResults} returns. If the count is higher than + the list, it indicates that the list is incomplete. + + @return: an integer containing the count, or None + + @see: L{setTrackResultsCount}, L{getTrackResultsOffset} + """ + return self._trackResultsCount + + def setTrackResultsCount(self, value): + """Sets the total number of available results. + + @param value: an integer containing the count, or None + + @see: L{getTrackResults}, L{setTrackResultsOffset} + """ + self._trackResultsCount = value + + trackResultsCount = property( + getTrackResultsCount, setTrackResultsCount, + doc='The total number of track results.') + + + def getTagList(self): + """Returns a list of tags. + + @return: a list of L{model.Tag} objects + """ + return self._tagList + + tagResults = property(getTagList, + doc='A list of Tag objects.') + + def getRating(self): + """Returns the rating. + + @return: rating object + """ + return self._rating + + def setRating(self, value): + """Sets the rating. + + @param value: a L{model.Rating} object + """ + self._rating = value + + rating = property(getRating, setRating, doc='A Rating object.') + + + # MusicBrainz extension to the schema + def getUserList(self): + """Returns a list of users. + + @return: a list of L{model.User} objects + + @note: This is a MusicBrainz extension. + """ + return self._userList + + userResults = property(getUserList, + doc='A list of User objects.') + + +class AbstractResult(object): + """The abstract representation of a result. + + A result is an instance of some kind (Artist, Release, ...) + associated with a score. + """ + + def __init__(self, score): + self._score = score + + def getScore(self): + """Returns the result score. + + The score indicates how good this result matches the search + parameters. The higher the value, the better the match. + + @return: an int between 0 and 100 (both inclusive), or None + """ + return self._score + + def setScore(self, score): + self._score = score + + score = property(getScore, setScore, doc='The relevance score.') + + +class ArtistResult(AbstractResult): + """Represents an artist result. + + An ArtistResult consists of a I{score} and an artist. The score is a + number between 0 and 100, where a higher number indicates a better + match. + """ + def __init__(self, artist, score): + super(ArtistResult, self).__init__(score) + self._artist = artist + + def getArtist(self): + """Returns an Artist object. + + @return: a L{musicbrainz2.model.Artist} object + """ + return self._artist + + def setArtist(self, artist): + self._artist = artist + + artist = property(getArtist, setArtist, doc='An Artist object.') + + +class ReleaseResult(AbstractResult): + """Represents a release result. + + A ReleaseResult consists of a I{score} and a release. The score is a + number between 0 and 100, where a higher number indicates a better + match. + """ + def __init__(self, release, score): + super(ReleaseResult, self).__init__(score) + self._release = release + + def getRelease(self): + """Returns a Release object. + + @return: a L{musicbrainz2.model.Release} object + """ + return self._release + + def setRelease(self, release): + self._release = release + + release = property(getRelease, setRelease, doc='A Release object.') + +class ReleaseGroupResult(AbstractResult): + """Represents a release group result. + + A ReleaseGroupResult consists of a I{score} and a release group. The + score is a number between 0 and 100, where a higher number indicates + a better match. + """ + def __init__(self, releaseGroup, score): + super(ReleaseGroupResult, self).__init__(score) + self._releaseGroup = releaseGroup + + def getReleaseGroup(self): + """Returns a ReleaseGroup object. + + @return: a L{musicbrainz2.model.ReleaseGroup} object + """ + return self._releaseGroup + + def setReleaseGroup(self, value): + self._releaseGroup = value + + releaseGroup = property(getReleaseGroup, setReleaseGroup, doc='A ReleaseGroup object.') + +class TrackResult(AbstractResult): + """Represents a track result. + + A TrackResult consists of a I{score} and a track. The score is a + number between 0 and 100, where a higher number indicates a better + match. + """ + def __init__(self, track, score): + super(TrackResult, self).__init__(score) + self._track = track + + def getTrack(self): + """Returns a Track object. + + @return: a L{musicbrainz2.model.Track} object + """ + return self._track + + def setTrack(self, track): + self._track = track + + track = property(getTrack, setTrack, doc='A Track object.') + + +class LabelResult(AbstractResult): + """Represents a label result. + + An LabelResult consists of a I{score} and a label. The score is a + number between 0 and 100, where a higher number indicates a better + match. + """ + def __init__(self, label, score): + super(LabelResult, self).__init__(score) + self._label = label + + def getLabel(self): + """Returns a Label object. + + @return: a L{musicbrainz2.model.Label} object + """ + return self._label + + def setLabel(self, label): + self._label = label + + label = property(getLabel, setLabel, doc='A Label object.') + + +class MbXmlParser(object): + """A parser for the Music Metadata XML format. + + This parser supports all basic features and extensions defined by + MusicBrainz, including unlimited document nesting. By default it + reads an XML document from a file-like object (stream) and returns + an object tree representing the document using classes from + L{musicbrainz2.model}. + + The implementation tries to be as permissive as possible. Invalid + contents are skipped, but documents have to be well-formed and using + the correct namespace. In case of unrecoverable errors, a L{ParseError} + exception is raised. + + @see: U{The Music Metadata XML Format + } + """ + + def __init__(self, factory=DefaultFactory()): + """Constructor. + + The C{factory} parameter has be an instance of L{DefaultFactory} + or a subclass of it. It is used by L{parse} to obtain objects + from L{musicbrainz2.model} to build resulting object tree. + If you supply your own factory, you have to make sure all + returned objects have the same interface as their counterparts + from L{musicbrainz2.model}. + + @param factory: an object factory + """ + self._log = logging.getLogger(str(self.__class__)) + self._factory = factory + + def parse(self, inStream): + """Parses the MusicBrainz web service XML. + + Returns a L{Metadata} object representing the parsed XML or + raises a L{ParseError} exception if the data was malformed. + The parser tries to be liberal and skips invalid content if + possible. + + Note that an L{IOError} may be raised if there is a problem + reading C{inStream}. + + @param inStream: a file-like object + @return: a L{Metadata} object (never None) + @raise ParseError: if the document is not valid + @raise IOError: if reading from the stream failed + """ + + try: + doc = xml.dom.minidom.parse(inStream) + + # Try to find the root element. If this isn't an mmd + # XML file or the namespace is wrong, this will fail. + elems = doc.getElementsByTagNameNS(NS_MMD_1, 'metadata') + + if len(elems) != 0: + md = self._createMetadata(elems[0]) + else: + msg = 'cannot find root element mmd:metadata' + self._log.debug('ParseError: ' + msg) + raise ParseError(msg) + + doc.unlink() + + return md + except ExpatError, e: + self._log.debug('ExpatError: ' + str(e)) + raise ParseError(msg=str(e), reason=e) + except DOMException, e: + self._log.debug('DOMException: ' + str(e)) + raise ParseError(msg=str(e), reason=e) + + + def _createMetadata(self, metadata): + md = Metadata() + + for node in _getChildElements(metadata): + if _matches(node, 'artist'): + md.artist = self._createArtist(node) + elif _matches(node, 'release'): + md.release = self._createRelease(node) + elif _matches(node, 'release-group'): + md.releaseGroup = self._createReleaseGroup(node) + elif _matches(node, 'track'): + md.track = self._createTrack(node) + elif _matches(node, 'label'): + md.label = self._createLabel(node) + elif _matches(node, 'artist-list'): + (offset, count) = self._getListAttrs(node) + md.artistResultsOffset = offset + md.artistResultsCount = count + self._addArtistResults(node, md.getArtistResults()) + elif _matches(node, 'release-list'): + (offset, count) = self._getListAttrs(node) + md.releaseResultsOffset = offset + md.releaseResultsCount = count + self._addReleaseResults(node, md.getReleaseResults()) + elif _matches(node, 'release-group-list'): + (offset, count) = self._getListAttrs(node) + md.releaseGroupResultsOffset = offset + md.releaseGroupResultsCount = count + self._addReleaseGroupResults(node, md.getReleaseGroupResults()) + elif _matches(node, 'track-list'): + (offset, count) = self._getListAttrs(node) + md.trackResultsOffset = offset + md.trackResultsCount = count + self._addTrackResults(node, md.getTrackResults()) + elif _matches(node, 'label-list'): + (offset, count) = self._getListAttrs(node) + md.labelResultsOffset = offset + md.labelResultsCount = count + self._addLabelResults(node, md.getLabelResults()) + elif _matches(node, 'tag-list'): + self._addTagsToList(node, md.getTagList()) + elif _matches(node, 'user-list', NS_EXT_1): + self._addUsersToList(node, md.getUserList()) + + return md + + + def _addArtistResults(self, listNode, resultList): + for c in _getChildElements(listNode): + artist = self._createArtist(c) + score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) + if artist is not None: + resultList.append(ArtistResult(artist, score)) + + def _addReleaseResults(self, listNode, resultList): + for c in _getChildElements(listNode): + release = self._createRelease(c) + score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) + if release is not None: + resultList.append(ReleaseResult(release, score)) + + def _addReleaseGroupResults(self, listNode, resultList): + for c in _getChildElements(listNode): + releaseGroup = self._createReleaseGroup(c) + score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) + if releaseGroup is not None: + resultList.append(ReleaseGroupResult(releaseGroup, score)) + + def _addTrackResults(self, listNode, resultList): + for c in _getChildElements(listNode): + track = self._createTrack(c) + score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) + if track is not None: + resultList.append(TrackResult(track, score)) + + def _addLabelResults(self, listNode, resultList): + for c in _getChildElements(listNode): + label = self._createLabel(c) + score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1) + if label is not None: + resultList.append(LabelResult(label, score)) + + def _addReleasesToList(self, listNode, resultList): + self._addToList(listNode, resultList, self._createRelease) + + def _addReleaseGroupsToList(self, listNode, resultList): + self._addToList(listNode, resultList, self._createReleaseGroup) + + def _addTracksToList(self, listNode, resultList): + self._addToList(listNode, resultList, self._createTrack) + + def _addUsersToList(self, listNode, resultList): + self._addToList(listNode, resultList, self._createUser) + + def _addTagsToList(self, listNode, resultList): + self._addToList(listNode, resultList, self._createTag) + + def _addTagsToEntity(self, listNode, entity): + for node in _getChildElements(listNode): + tag = self._createTag(node) + entity.addTag(tag) + + def _addRatingToEntity(self, attrNode, entity): + rating = self._createRating(attrNode) + entity.setRating(rating) + + def _addToList(self, listNode, resultList, creator): + for c in _getChildElements(listNode): + resultList.append(creator(c)) + + def _getListAttrs(self, listNode): + offset = _getIntAttr(listNode, 'offset') + count = _getIntAttr(listNode, 'count') + return (offset, count) + + + def _createArtist(self, artistNode): + artist = self._factory.newArtist() + artist.setId(_getIdAttr(artistNode, 'id', 'artist')) + artist.setType(_getUriAttr(artistNode, 'type')) + + for node in _getChildElements(artistNode): + if _matches(node, 'name'): + artist.setName(_getText(node)) + elif _matches(node, 'sort-name'): + artist.setSortName(_getText(node)) + elif _matches(node, 'disambiguation'): + artist.setDisambiguation(_getText(node)) + elif _matches(node, 'life-span'): + artist.setBeginDate(_getDateAttr(node, 'begin')) + artist.setEndDate(_getDateAttr(node, 'end')) + elif _matches(node, 'alias-list'): + self._addArtistAliases(node, artist) + elif _matches(node, 'release-list'): + (offset, count) = self._getListAttrs(node) + artist.setReleasesOffset(offset) + artist.setReleasesCount(count) + self._addReleasesToList(node, artist.getReleases()) + elif _matches(node, 'release-group-list'): + (offset, count) = self._getListAttrs(node) + artist.setReleaseGroupsOffset(offset) + artist.setReleaseGroupsCount(count) + self._addReleaseGroupsToList(node, artist.getReleaseGroups()) + elif _matches(node, 'relation-list'): + self._addRelationsToEntity(node, artist) + elif _matches(node, 'tag-list'): + self._addTagsToEntity(node, artist) + elif _matches(node, 'rating'): + self._addRatingToEntity(node, artist) + + return artist + + def _createLabel(self, labelNode): + label = self._factory.newLabel() + label.setId(_getIdAttr(labelNode, 'id', 'label')) + label.setType(_getUriAttr(labelNode, 'type')) + + for node in _getChildElements(labelNode): + if _matches(node, 'name'): + label.setName(_getText(node)) + if _matches(node, 'sort-name'): + label.setSortName(_getText(node)) + elif _matches(node, 'disambiguation'): + label.setDisambiguation(_getText(node)) + elif _matches(node, 'label-code'): + label.setCode(_getText(node)) + elif _matches(node, 'country'): + country = _getText(node, '^[A-Z]{2}$') + label.setCountry(country) + elif _matches(node, 'life-span'): + label.setBeginDate(_getDateAttr(node, 'begin')) + label.setEndDate(_getDateAttr(node, 'end')) + elif _matches(node, 'alias-list'): + self._addLabelAliases(node, label) + elif _matches(node, 'tag-list'): + self._addTagsToEntity(node, label) + elif _matches(node, 'rating'): + self._addRatingToEntity(node, label) + + return label + + def _createRelease(self, releaseNode): + release = self._factory.newRelease() + release.setId(_getIdAttr(releaseNode, 'id', 'release')) + for t in _getUriListAttr(releaseNode, 'type'): + release.addType(t) + + for node in _getChildElements(releaseNode): + if _matches(node, 'title'): + release.setTitle(_getText(node)) + elif _matches(node, 'text-representation'): + lang = _getAttr(node, 'language', '^[A-Z]{3}$') + release.setTextLanguage(lang) + script = _getAttr(node, 'script', '^[A-Z][a-z]{3}$') + release.setTextScript(script) + elif _matches(node, 'asin'): + release.setAsin(_getText(node)) + elif _matches(node, 'artist'): + release.setArtist(self._createArtist(node)) + elif _matches(node, 'release-event-list'): + self._addReleaseEvents(node, release) + elif _matches(node, 'release-group'): + release.setReleaseGroup(self._createReleaseGroup(node)) + elif _matches(node, 'disc-list'): + self._addDiscs(node, release) + elif _matches(node, 'track-list'): + (offset, count) = self._getListAttrs(node) + release.setTracksOffset(offset) + release.setTracksCount(count) + self._addTracksToList(node, release.getTracks()) + elif _matches(node, 'relation-list'): + self._addRelationsToEntity(node, release) + elif _matches(node, 'tag-list'): + self._addTagsToEntity(node, release) + elif _matches(node, 'rating'): + self._addRatingToEntity(node, release) + + return release + + def _createReleaseGroup(self, node): + rg = self._factory.newReleaseGroup() + rg.setId(_getIdAttr(node, 'id', 'release-group')) + rg.setType(_getUriAttr(node, 'type')) + + for child in _getChildElements(node): + if _matches(child, 'title'): + rg.setTitle(_getText(child)) + elif _matches(child, 'artist'): + rg.setArtist(self._createArtist(child)) + elif _matches(child, 'release-list'): + (offset, count) = self._getListAttrs(child) + rg.setReleasesOffset(offset) + rg.setReleasesCount(count) + self._addReleasesToList(child, rg.getReleases()) + + return rg + + def _addReleaseEvents(self, releaseListNode, release): + for node in _getChildElements(releaseListNode): + if _matches(node, 'event'): + country = _getAttr(node, 'country', '^[A-Z]{2}$') + date = _getDateAttr(node, 'date') + catalogNumber = _getAttr(node, 'catalog-number') + barcode = _getAttr(node, 'barcode') + format = _getUriAttr(node, 'format') + + # The date attribute is mandatory. If it isn't present, + # we don't add anything from this release event. + if date is not None: + event = self._factory.newReleaseEvent() + event.setCountry(country) + event.setDate(date) + event.setCatalogNumber(catalogNumber) + event.setBarcode(barcode) + event.setFormat(format) + + for subNode in _getChildElements(node): + if _matches(subNode, 'label'): + event.setLabel(self._createLabel(subNode)) + + release.addReleaseEvent(event) + + + def _addDiscs(self, discIdListNode, release): + for node in _getChildElements(discIdListNode): + if _matches(node, 'disc') and node.hasAttribute('id'): + d = self._factory.newDisc() + d.setId(node.getAttribute('id')) + d.setSectors(_getIntAttr(node, 'sectors', 0)) + release.addDisc(d) + + + def _addArtistAliases(self, aliasListNode, artist): + for node in _getChildElements(aliasListNode): + if _matches(node, 'alias'): + alias = self._factory.newArtistAlias() + self._initializeAlias(alias, node) + artist.addAlias(alias) + + + def _addLabelAliases(self, aliasListNode, label): + for node in _getChildElements(aliasListNode): + if _matches(node, 'alias'): + alias = self._factory.newLabelAlias() + self._initializeAlias(alias, node) + label.addAlias(alias) + + + def _initializeAlias(self, alias, node): + alias.setValue(_getText(node)) + alias.setType(_getUriAttr(node, 'type')) + alias.setScript(_getAttr(node, 'script', + '^[A-Z][a-z]{3}$')) + + + def _createTrack(self, trackNode): + track = self._factory.newTrack() + track.setId(_getIdAttr(trackNode, 'id', 'track')) + + for node in _getChildElements(trackNode): + if _matches(node, 'title'): + track.setTitle(_getText(node)) + elif _matches(node, 'artist'): + track.setArtist(self._createArtist(node)) + elif _matches(node, 'duration'): + track.setDuration(_getPositiveIntText(node)) + elif _matches(node, 'release-list'): + self._addReleasesToList(node, track.getReleases()) + elif _matches(node, 'puid-list'): + self._addPuids(node, track) + elif _matches(node, 'isrc-list'): + self._addISRCs(node, track) + elif _matches(node, 'relation-list'): + self._addRelationsToEntity(node, track) + elif _matches(node, 'tag-list'): + self._addTagsToEntity(node, track) + elif _matches(node, 'rating'): + self._addRatingToEntity(node, track) + + return track + + # MusicBrainz extension + def _createUser(self, userNode): + user = self._factory.newUser() + for t in _getUriListAttr(userNode, 'type', NS_EXT_1): + user.addType(t) + + for node in _getChildElements(userNode): + if _matches(node, 'name'): + user.setName(_getText(node)) + elif _matches(node, 'nag', NS_EXT_1): + user.setShowNag(_getBooleanAttr(node, 'show')) + + return user + + def _createRating(self, ratingNode): + rating = self._factory.newRating() + rating.value = _getText(ratingNode) + rating.count = _getIntAttr(ratingNode, 'votes-count') + return rating + + def _createTag(self, tagNode): + tag = self._factory.newTag() + tag.value = _getText(tagNode) + tag.count = _getIntAttr(tagNode, 'count') + return tag + + + def _addPuids(self, puidListNode, track): + for node in _getChildElements(puidListNode): + if _matches(node, 'puid') and node.hasAttribute('id'): + track.addPuid(node.getAttribute('id')) + + def _addISRCs(self, isrcListNode, track): + for node in _getChildElements(isrcListNode): + if _matches(node, 'isrc') and node.hasAttribute('id'): + track.addISRC(node.getAttribute('id')) + + def _addRelationsToEntity(self, relationListNode, entity): + targetType = _getUriAttr(relationListNode, 'target-type', NS_REL_1) + + if targetType is None: + return + + for node in _getChildElements(relationListNode): + if _matches(node, 'relation'): + rel = self._createRelation(node, targetType) + if rel is not None: + entity.addRelation(rel) + + + def _createRelation(self, relationNode, targetType): + relation = self._factory.newRelation() + + relation.setType(_getUriAttr(relationNode, 'type', NS_REL_1)) + relation.setTargetType(targetType) + resType = _getResourceType(targetType) + relation.setTargetId(_getIdAttr(relationNode, 'target', resType)) + + if relation.getType() is None \ + or relation.getTargetType() is None \ + or relation.getTargetId() is None: + return None + + relation.setDirection(_getDirectionAttr(relationNode, 'direction')) + relation.setBeginDate(_getDateAttr(relationNode, 'begin')) + relation.setEndDate(_getDateAttr(relationNode, 'end')) + + for a in _getUriListAttr(relationNode, 'attributes', NS_REL_1): + relation.addAttribute(a) + + target = None + children = _getChildElements(relationNode) + if len(children) > 0: + node = children[0] + if _matches(node, 'artist'): + target = self._createArtist(node) + elif _matches(node, 'release'): + target = self._createRelease(node) + elif _matches(node, 'track'): + target = self._createTrack(node) + + relation.setTarget(target) + + return relation + + +# +# XML output +# + +class _XmlWriter(object): + def __init__(self, outStream, indentAmount=' ', newline="\n"): + self._out = outStream + self._indentAmount = indentAmount + self._stack = [ ] + self._newline = newline + + def prolog(self, encoding='UTF-8', version='1.0'): + pi = '' % (version, encoding) + self._out.write(pi + self._newline) + + def start(self, name, attrs={ }): + indent = self._getIndention() + self._stack.append(name) + self._out.write(indent + self._makeTag(name, attrs) + self._newline) + + def end(self): + name = self._stack.pop() + indent = self._getIndention() + self._out.write('%s\n' % (indent, name)) + + def elem(self, name, value, attrs={ }): + # delete attributes with an unset value + for (k, v) in attrs.items(): + if v is None or v == '': + del attrs[k] + + if value is None or value == '': + if len(attrs) == 0: + return + self._out.write(self._getIndention()) + self._out.write(self._makeTag(name, attrs, True) + '\n') + else: + escValue = saxutils.escape(value or '') + self._out.write(self._getIndention()) + self._out.write(self._makeTag(name, attrs)) + self._out.write(escValue) + self._out.write('\n' % name) + + def _getIndention(self): + return self._indentAmount * len(self._stack) + + def _makeTag(self, name, attrs={ }, close=False): + ret = '<' + name + + for (k, v) in attrs.iteritems(): + if v is not None: + v = saxutils.quoteattr(str(v)) + ret += ' %s=%s' % (k, v) + + if close: + return ret + '/>' + else: + return ret + '>' + + + +class MbXmlWriter(object): + """Write XML in the Music Metadata XML format.""" + + def __init__(self, indentAmount=' ', newline="\n"): + """Constructor. + + @param indentAmount: the amount of whitespace to use per level + """ + self._indentAmount = indentAmount + self._newline = newline + + + def write(self, outStream, metadata): + """Writes the XML representation of a Metadata object to a file. + + @param outStream: an open file-like object + @param metadata: a L{Metadata} object + """ + xml = _XmlWriter(outStream, self._indentAmount, self._newline) + + xml.prolog() + xml.start('metadata', { + 'xmlns': NS_MMD_1, + 'xmlns:ext': NS_EXT_1, + }) + + self._writeArtist(xml, metadata.getArtist()) + self._writeRelease(xml, metadata.getRelease()) + self._writeReleaseGroup(xml, metadata.getReleaseGroup()) + self._writeTrack(xml, metadata.getTrack()) + self._writeLabel(xml, metadata.getLabel()) + + if len(metadata.getArtistResults()) > 0: + xml.start('artist-list', { + 'offset': metadata.artistResultsOffset, + 'count': metadata.artistResultsCount, + }) + for result in metadata.getArtistResults(): + self._writeArtist(xml, result.getArtist(), + result.getScore()) + xml.end() + + if len(metadata.getReleaseResults()) > 0: + xml.start('release-list', { + 'offset': metadata.releaseResultsOffset, + 'count': metadata.releaseResultsCount, + }) + for result in metadata.getReleaseResults(): + self._writeRelease(xml, result.getRelease(), + result.getScore()) + xml.end() + + if len(metadata.getReleaseGroupResults()) > 0: + xml.start('release-group-list', { + 'offset': metadata.releaseGroupResultsOffset, + 'count': metadata.releaseGroupResultsCount + }) + for result in metadata.getReleaseGroupResults(): + self._writeReleaseGroup(xml, result.getReleaseGroup(), + result.getScore()) + xml.end() + + if len(metadata.getTrackResults()) > 0: + xml.start('track-list', { + 'offset': metadata.trackResultsOffset, + 'count': metadata.trackResultsCount, + }) + for result in metadata.getTrackResults(): + self._writeTrack(xml, result.getTrack(), + result.getScore()) + xml.end() + + if len(metadata.getLabelResults()) > 0: + xml.start('label-list', { + 'offset': metadata.labelResultsOffset, + 'count': metadata.labelResultsCount, + }) + for result in metadata.getLabelResults(): + self._writeLabel(xml, result.getLabel(), + result.getScore()) + xml.end() + + xml.end() + + + def _writeArtist(self, xml, artist, score=None): + if artist is None: + return + + xml.start('artist', { + 'id': mbutils.extractUuid(artist.getId()), + 'type': mbutils.extractFragment(artist.getType()), + 'ext:score': score, + }) + + xml.elem('name', artist.getName()) + xml.elem('sort-name', artist.getSortName()) + xml.elem('disambiguation', artist.getDisambiguation()) + xml.elem('life-span', None, { + 'begin': artist.getBeginDate(), + 'end': artist.getEndDate(), + }) + + if len(artist.getAliases()) > 0: + xml.start('alias-list') + for alias in artist.getAliases(): + xml.elem('alias', alias.getValue(), { + 'type': alias.getType(), + 'script': alias.getScript(), + }) + xml.end() + + if len(artist.getReleases()) > 0: + xml.start('release-list') + for release in artist.getReleases(): + self._writeRelease(xml, release) + xml.end() + + if len(artist.getReleaseGroups()) > 0: + xml.start('release-group-list') + for releaseGroup in artist.getReleaseGroups(): + self._writeReleaseGroup(xml, releaseGroup) + xml.end() + + self._writeRelationList(xml, artist) + # TODO: extensions + + xml.end() + + + def _writeRelease(self, xml, release, score=None): + if release is None: + return + + types = [mbutils.extractFragment(t) for t in release.getTypes()] + typesStr = None + if len(types) > 0: + typesStr = ' '.join(types) + + xml.start('release', { + 'id': mbutils.extractUuid(release.getId()), + 'type': typesStr, + 'ext:score': score, + }) + + xml.elem('title', release.getTitle()) + xml.elem('text-representation', None, { + 'language': release.getTextLanguage(), + 'script': release.getTextScript() + }) + xml.elem('asin', release.getAsin()) + + self._writeArtist(xml, release.getArtist()) + self._writeReleaseGroup(xml, release.getReleaseGroup()) + + if len(release.getReleaseEvents()) > 0: + xml.start('release-event-list') + for event in release.getReleaseEvents(): + self._writeReleaseEvent(xml, event) + xml.end() + + if len(release.getDiscs()) > 0: + xml.start('disc-list') + for disc in release.getDiscs(): + xml.elem('disc', None, { 'id': disc.getId() }) + xml.end() + + if len(release.getTracks()) > 0: + # TODO: count attribute + xml.start('track-list', { + 'offset': release.getTracksOffset() + }) + for track in release.getTracks(): + self._writeTrack(xml, track) + xml.end() + + self._writeRelationList(xml, release) + # TODO: extensions + + xml.end() + + def _writeReleaseGroup(self, xml, rg, score = None): + if rg is None: + return + + xml.start('release-group', { + 'id': mbutils.extractUuid(rg.getId()), + 'type': mbutils.extractFragment(rg.getType()), + 'ext:score': score, + }) + + xml.elem('title', rg.getTitle()) + self._writeArtist(xml, rg.getArtist()) + + if len(rg.getReleases()) > 0: + xml.start('release-list') + for rel in rg.getReleases(): + self._writeRelease(xml, rel) + xml.end() + + xml.end() + + def _writeReleaseEvent(self, xml, event): + xml.start('event', { + 'country': event.getCountry(), + 'date': event.getDate(), + 'catalog-number': event.getCatalogNumber(), + 'barcode': event.getBarcode(), + 'format': event.getFormat() + }) + + self._writeLabel(xml, event.getLabel()) + + xml.end() + + + def _writeTrack(self, xml, track, score=None): + if track is None: + return + + xml.start('track', { + 'id': mbutils.extractUuid(track.getId()), + 'ext:score': score, + }) + + xml.elem('title', track.getTitle()) + xml.elem('duration', str(track.getDuration())) + self._writeArtist(xml, track.getArtist()) + + if len(track.getReleases()) > 0: + # TODO: offset + count + xml.start('release-list') + for release in track.getReleases(): + self._writeRelease(xml, release) + xml.end() + + if len(track.getPuids()) > 0: + xml.start('puid-list') + for puid in track.getPuids(): + xml.elem('puid', None, { 'id': puid }) + xml.end() + + self._writeRelationList(xml, track) + # TODO: extensions + + xml.end() + + + def _writeLabel(self, xml, label, score=None): + if label is None: + return + + xml.start('label', { + 'id': mbutils.extractUuid(label.getId()), + 'type': mbutils.extractFragment(label.getType()), + 'ext:score': score, + }) + + xml.elem('name', label.getName()) + xml.elem('sort-name', label.getSortName()) + xml.elem('disambiguation', label.getDisambiguation()) + xml.elem('life-span', None, { + 'begin': label.getBeginDate(), + 'end': label.getEndDate(), + }) + + if len(label.getAliases()) > 0: + xml.start('alias-list') + for alias in label.getAliases(): + xml.elem('alias', alias.getValue(), { + 'type': alias.getType(), + 'script': alias.getScript(), + }) + xml.end() + + # TODO: releases, artists + + self._writeRelationList(xml, label) + # TODO: extensions + + xml.end() + + + def _writeRelationList(self, xml, entity): + for tt in entity.getRelationTargetTypes(): + xml.start('relation-list', { + 'target-type': mbutils.extractFragment(tt), + }) + for rel in entity.getRelations(targetType=tt): + self._writeRelation(xml, rel, tt) + xml.end() + + + def _writeRelation(self, xml, rel, targetType): + relAttrs = ' '.join([mbutils.extractFragment(a) + for a in rel.getAttributes()]) + + if relAttrs == '': + relAttrs = None + + attrs = { + 'type': mbutils.extractFragment(rel.getType()), + 'target': rel.getTargetId(), + 'direction': rel.getDirection(), + 'begin': rel.getBeginDate(), + 'end': rel.getBeginDate(), + 'attributes': relAttrs, + } + + if rel.getTarget() is None: + xml.elem('relation', None, attrs) + else: + xml.start('relation', attrs) + if targetType == NS_REL_1 + 'Artist': + self._writeArtist(xml, rel.getTarget()) + elif targetType == NS_REL_1 + 'Release': + self._writeRelease(xml, rel.getTarget()) + elif targetType == NS_REL_1 + 'Track': + self._writeTrack(xml, rel.getTarget()) + xml.end() + + +# +# DOM Utilities +# + +def _matches(node, name, namespace=NS_MMD_1): + """Checks if an xml.dom.Node and a given name and namespace match.""" + + if node.localName == name and node.namespaceURI == namespace: + return True + else: + return False + + +def _getChildElements(parentNode): + """Returns all direct child elements of the given xml.dom.Node.""" + + children = [ ] + for node in parentNode.childNodes: + if node.nodeType == node.ELEMENT_NODE: + children.append(node) + + return children + + +def _getText(element, regex=None, default=None): + """Returns the text content of the given xml.dom.Element. + + This function simply fetches all contained text nodes, so the element + should not contain child elements. + """ + res = '' + for node in element.childNodes: + if node.nodeType == node.TEXT_NODE: + res += node.data + + if regex is None or re.match(regex, res): + return res + else: + return default + + +def _getPositiveIntText(element): + """Returns the text content of the given xml.dom.Element as an int.""" + + res = _getText(element) + + if res is None: + return None + + try: + return int(res) + except ValueError: + return None + + +def _getAttr(element, attrName, regex=None, default=None, ns=None): + """Returns an attribute of the given element. + + If there is no attribute with that name or the attribute doesn't + match the regular expression, default is returned. + """ + if element.hasAttributeNS(ns, attrName): + content = element.getAttributeNS(ns, attrName) + + if regex is None or re.match(regex, content): + return content + else: + return default + else: + return default + + +def _getDateAttr(element, attrName): + """Gets an incomplete date from an attribute.""" + return _getAttr(element, attrName, '^\d+(-\d\d)?(-\d\d)?$') + + +def _getIdAttr(element, attrName, typeName): + """Gets an ID from an attribute and turns it into an absolute URI.""" + value = _getAttr(element, attrName) + + return _makeAbsoluteUri('http://musicbrainz.org/' + typeName + '/', value) + + + +def _getIntAttr(element, attrName, min=0, max=None, ns=None): + """Gets an int from an attribute, or None.""" + try: + val = int(_getAttr(element, attrName, ns=ns)) + + if max is None: + max = val + + if min <= val <= max: + return val + else: + return None + except ValueError: + return None # raised if conversion to int fails + except TypeError: + return None # raised if no such attribute exists + + +def _getUriListAttr(element, attrName, prefix=NS_MMD_1): + """Gets a list of URIs from an attribute.""" + if not element.hasAttribute(attrName): + return [ ] + + f = lambda x: x != '' + uris = filter(f, re.split('\s+', element.getAttribute(attrName))) + + m = lambda x: _makeAbsoluteUri(prefix, x) + uris = map(m, uris) + + return uris + + +def _getUriAttr(element, attrName, prefix=NS_MMD_1): + """Gets a URI from an attribute. + + This also works for space-separated URI lists. In this case, the + first URI is returned. + """ + uris = _getUriListAttr(element, attrName, prefix) + if len(uris) > 0: + return uris[0] + else: + return None + + +def _getBooleanAttr(element, attrName): + """Gets a boolean value from an attribute.""" + value = _getAttr(element, attrName) + if value == 'true': + return True + elif value == 'false': + return False + else: + return None + + +def _getDirectionAttr(element, attrName): + """Gets the Relation reading direction from an attribute.""" + regex = '^\s*(' + '|'.join(( + model.Relation.DIR_FORWARD, + model.Relation.DIR_BACKWARD)) + ')\s*$' + return _getAttr(element, 'direction', regex, model.Relation.DIR_NONE) + + +def _makeAbsoluteUri(prefix, uriStr): + """Creates an absolute URI adding prefix, if necessary.""" + if uriStr is None: + return None + + (scheme, netloc, path, params, query, frag) = urlparse.urlparse(uriStr) + + if scheme == '' and netloc == '': + return prefix + uriStr + else: + return uriStr + + +def _getResourceType(uri): + """Gets the resource type from a URI. + + The resource type is the basename of the URI's path. + """ + m = re.match('^' + NS_REL_1 + '(.*)$', uri) + + if m: + return m.group(1).lower() + else: + return None + +# EOF diff --git a/lib/pyItunes/Library.py b/lib/pyItunes/Library.py new file mode 100644 index 00000000..2400c969 --- /dev/null +++ b/lib/pyItunes/Library.py @@ -0,0 +1,41 @@ +from lib.pyItunes.Song import Song +import time +class Library: + def __init__(self,dictionary): + self.songs = self.parseDictionary(dictionary) + + def parseDictionary(self,dictionary): + songs = [] + format = "%Y-%m-%dT%H:%M:%SZ" + for song,attributes in dictionary.iteritems(): + s = Song() + s.name = attributes.get('Name') + s.artist = attributes.get('Artist') + s.album_artist = attributes.get('Album Aritst') + s.composer = attributes.get('Composer') + s.album = attributes.get('Album') + s.genre = attributes.get('Genre') + s.kind = attributes.get('Kind') + if attributes.get('Size'): + s.size = int(attributes.get('Size')) + s.total_time = attributes.get('Total Time') + s.track_number = attributes.get('Track Number') + if attributes.get('Year'): + s.year = int(attributes.get('Year')) + if attributes.get('Date Modified'): + s.date_modified = time.strptime(attributes.get('Date Modified'),format) + if attributes.get('Date Added'): + s.date_added = time.strptime(attributes.get('Date Added'),format) + if attributes.get('Bit Rate'): + s.bit_rate = int(attributes.get('Bit Rate')) + if attributes.get('Sample Rate'): + s.sample_rate = int(attributes.get('Sample Rate')) + s.comments = attributes.get("Comments ") + if attributes.get('Rating'): + s.rating = int(attributes.get('Rating')) + if attributes.get('Play Count'): + s.play_count = int(attributes.get('Play Count')) + if attributes.get('Location'): + s.location = attributes.get('Location') + songs.append(s) + return songs \ No newline at end of file diff --git a/lib/pyItunes/Song.py b/lib/pyItunes/Song.py new file mode 100644 index 00000000..27d44d79 --- /dev/null +++ b/lib/pyItunes/Song.py @@ -0,0 +1,46 @@ +class Song: + """ + Song Attributes: + name (String) + artist (String) + album_arist (String) + composer = None (String) + album = None (String) + genre = None (String) + kind = None (String) + size = None (Integer) + total_time = None (Integer) + track_number = None (Integer) + year = None (Integer) + date_modified = None (Time) + date_added = None (Time) + bit_rate = None (Integer) + sample_rate = None (Integer) + comments = None (String) + rating = None (Integer) + album_rating = None (Integer) + play_count = None (Integer) + location = None (String) + """ + name = None + artist = None + album_arist = None + composer = None + album = None + genre = None + kind = None + size = None + total_time = None + track_number = None + year = None + date_modified = None + date_added = None + bit_rate = None + sample_rate = None + comments = None + rating = None + album_rating = None + play_count = None + location = None + + #title = property(getTitle,setTitle) \ No newline at end of file diff --git a/lib/pyItunes/XMLLibraryParser.py b/lib/pyItunes/XMLLibraryParser.py new file mode 100644 index 00000000..7e4b239a --- /dev/null +++ b/lib/pyItunes/XMLLibraryParser.py @@ -0,0 +1,42 @@ +import re +class XMLLibraryParser: + def __init__(self,xmlLibrary): + f = open(xmlLibrary) + s = f.read() + lines = s.split("\n") + self.dictionary = self.parser(lines) + + def getValue(self,restOfLine): + value = re.sub("<.*?>","",restOfLine) + u = unicode(value,"utf-8") + cleanValue = u.encode("ascii","xmlcharrefreplace") + return cleanValue + + def keyAndRestOfLine(self,line): + rawkey = re.search('(.*?)',line).group(0) + key = re.sub("","",rawkey) + restOfLine = re.sub(".*?","",line).strip() + return key,restOfLine + + def parser(self,lines): + dicts = 0 + songs = {} + inSong = False + for line in lines: + if re.search('',line): + dicts += 1 + if re.search('',line): + dicts -= 1 + inSong = False + songs[songkey] = temp + if dicts == 2 and re.search('(.*?)',line): + rawkey = re.search('(.*?)',line).group(0) + songkey = re.sub("","",rawkey) + inSong = True + temp = {} + if dicts == 3 and re.search('(.*?)',line): + key,restOfLine = self.keyAndRestOfLine(line) + temp[key] = self.getValue(restOfLine) + if len(songs) > 0 and dicts < 2: + return songs + return songs \ No newline at end of file diff --git a/lib/pyItunes/__init__.py b/lib/pyItunes/__init__.py new file mode 100644 index 00000000..eb66d826 --- /dev/null +++ b/lib/pyItunes/__init__.py @@ -0,0 +1,3 @@ +from lib.pyItunes.XMLLibraryParser import XMLLibraryParser +from lib.pyItunes.Library import Library +from lib.pyItunes.Song import Song \ No newline at end of file diff --git a/lib/pygithub/__init__.py b/lib/pygithub/__init__.py new file mode 100644 index 00000000..ad56e087 --- /dev/null +++ b/lib/pygithub/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2005-2008 Dustin Sallings +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +""" +github module. +""" +__all__ = ['github','ghsearch','githubsync'] diff --git a/lib/pygithub/ghsearch.py b/lib/pygithub/ghsearch.py new file mode 100644 index 00000000..586e502b --- /dev/null +++ b/lib/pygithub/ghsearch.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# +# Copyright (c) 2005-2008 Dustin Sallings +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +""" +Search script. +""" + +import sys + +import github + +def usage(): + """display the usage and exit""" + print "Usage: %s keyword [keyword...]" % (sys.argv[0]) + sys.exit(1) + +def mk_url(repo): + return "http://github.com/%s/%s" % (repo.username, repo.name) + +if __name__ == '__main__': + g = github.GitHub() + if len(sys.argv) < 2: + usage() + res = g.repos.search(' '.join(sys.argv[1:])) + + for repo in res: + try: + print "Found %s at %s" % (repo.name, mk_url(repo)) + except AttributeError: + print "Bug: Couldn't format %s" % repo.__dict__ diff --git a/lib/pygithub/github.py b/lib/pygithub/github.py new file mode 100644 index 00000000..bd9f4077 --- /dev/null +++ b/lib/pygithub/github.py @@ -0,0 +1,520 @@ +#!/usr/bin/env python +# +# Copyright (c) 2005-2008 Dustin Sallings +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +""" +Interface to github's API (v2). + +Basic usage: + +g = GitHub() + +for r in g.user.search('dustin'): + print r.name + +See the GitHub docs or README.markdown for more usage. + +Copyright (c) 2007 Dustin Sallings +""" + +# GAE friendly URL detection (theoretically) +try: + import urllib2 + default_fetcher = urllib2.urlopen +except LoadError: + pass + +import urllib +import xml +import xml.dom.minidom + +def _string_parser(x): + """Extract the data from the first child of the input.""" + return x.firstChild.data + +_types = { + 'string': _string_parser, + 'integer': lambda x: int(_string_parser(x)), + 'float': lambda x: float(_string_parser(x)), + 'datetime': _string_parser, + 'boolean': lambda x: _string_parser(x) == 'true' +} + +def _parse(el): + """Generic response parser.""" + + type = 'string' + if el.attributes and 'type' in el.attributes.keys(): + type = el.attributes['type'].value + elif el.localName in _types: + type = el.localName + elif len(el.childNodes) > 1: + # This is a container, find the child type + type = None + ch = el.firstChild + while ch and not type: + if ch.localName == 'type': + type = ch.firstChild.data + ch = ch.nextSibling + + if not type: + raise Exception("Can't parse %s, known: %s" + % (el.toxml(), repr(_types.keys()))) + + return _types[type](el) + +def parses(t): + """Parser for a specific type in the github response.""" + def f(orig): + orig.parses = t + return orig + return f + +def with_temporary_mappings(m): + """Allow temporary localized altering of type mappings.""" + def f(orig): + def every(self, *args): + global _types + o = _types.copy() + for k,v in m.items(): + if v: + _types[k] = v + else: + del _types[k] + try: + return orig(self, *args) + finally: + _types = o + return every + return f + +@parses('array') +def _parseArray(el): + rv = [] + ch = el.firstChild + while ch: + if ch.nodeType != xml.dom.Node.TEXT_NODE and ch.firstChild: + rv.append(_parse(ch)) + ch=ch.nextSibling + return rv + +class BaseResponse(object): + """Base class for XML Response Handling.""" + + def __init__(self, el): + ch = el.firstChild + while ch: + if ch.nodeType != xml.dom.Node.TEXT_NODE and ch.firstChild: + ln = ch.localName.replace('-', '_') + self.__dict__[ln] = _parse(ch) + ch=ch.nextSibling + + def __repr__(self): + return "<<%s>>" % str(self.__class__) + +class User(BaseResponse): + """A github user.""" + + parses = 'user' + + def __repr__(self): + return "<>" % self.name + +class Plan(BaseResponse): + """A github plan.""" + + parses = 'plan' + + def __repr__(self): + return "<>" % self.name + +class Repository(BaseResponse): + """A repository.""" + + parses = 'repository' + + @property + def owner_name(self): + if hasattr(self, 'owner'): + return self.owner + else: + return self.username + + def __repr__(self): + return "<>" % (self.owner_name, self.name) + +class PublicKey(BaseResponse): + """A public key.""" + + parses = 'public-key' + title = 'untitled' + + def __repr__(self): + return "<>" % self.title + +class Commit(BaseResponse): + """A commit.""" + + parses = 'commit' + + def __repr__(self): + return "<>" % self.id + +class Parent(Commit): + """A commit parent.""" + + parses = 'parent' + +class Author(User): + """A commit author.""" + + parses = 'author' + +class Committer(User): + """A commit committer.""" + + parses = 'committer' + +class Issue(BaseResponse): + """An issue within the issue tracker.""" + + parses = 'issue' + + def __repr__(self): + return "<>" % self.number + +class Label(BaseResponse): + """A Label within the issue tracker.""" + parses = 'label' + + def __repr__(self): + return "<