From 8f999111c518ede6098f46aba720dabb00b10276 Mon Sep 17 00:00:00 2001 From: Remy Date: Mon, 11 Jul 2011 14:23:11 -0700 Subject: [PATCH] Added a music scanner to read metadata from audio files rather than using folder names/xml --- config.py | 4 +- itunesimport.py | 42 +- lib/__init__.py | 1 + lib/beets/__init__.py | 19 + lib/beets/autotag/__init__.py | 599 ++++++++++ lib/beets/autotag/art.py | 77 ++ lib/beets/autotag/mb.py | 333 ++++++ lib/beets/importer.py | 651 +++++++++++ lib/beets/library.py | 1202 ++++++++++++++++++++ lib/beets/mediafile.py | 859 ++++++++++++++ lib/beets/plugins.py | 223 ++++ lib/beets/ui/__init__.py | 616 ++++++++++ lib/beets/ui/commands.py | 722 ++++++++++++ lib/beets/util/__init__.py | 255 +++++ lib/beets/util/enumeration.py | 178 +++ lib/beets/util/pipeline.py | 442 ++++++++ lib/beets/vfs.py | 48 + lib/munkres.py | 791 +++++++++++++ lib/mutagen/__init__.py | 217 ++++ lib/mutagen/_constants.py | 153 +++ lib/mutagen/_util.py | 314 ++++++ lib/mutagen/_vorbis.py | 223 ++++ lib/mutagen/apev2.py | 465 ++++++++ lib/mutagen/asf.py | 670 +++++++++++ lib/mutagen/easyid3.py | 465 ++++++++ lib/mutagen/easymp4.py | 249 ++++ lib/mutagen/flac.py | 692 ++++++++++++ lib/mutagen/id3.py | 2005 +++++++++++++++++++++++++++++++++ lib/mutagen/m4a.py | 499 ++++++++ lib/mutagen/monkeysaudio.py | 80 ++ lib/mutagen/mp3.py | 243 ++++ lib/mutagen/mp4.py | 682 +++++++++++ lib/mutagen/musepack.py | 118 ++ lib/mutagen/ogg.py | 498 ++++++++ lib/mutagen/oggflac.py | 127 +++ lib/mutagen/oggspeex.py | 123 ++ lib/mutagen/oggtheora.py | 111 ++ lib/mutagen/oggvorbis.py | 128 +++ lib/mutagen/optimfrog.py | 64 ++ lib/mutagen/trueaudio.py | 62 + lib/mutagen/wavpack.py | 57 + webServer.py | 41 +- 42 files changed, 15334 insertions(+), 14 deletions(-) create mode 100755 lib/__init__.py create mode 100644 lib/beets/__init__.py create mode 100644 lib/beets/autotag/__init__.py create mode 100644 lib/beets/autotag/art.py create mode 100644 lib/beets/autotag/mb.py create mode 100644 lib/beets/importer.py create mode 100644 lib/beets/library.py create mode 100644 lib/beets/mediafile.py create mode 100755 lib/beets/plugins.py create mode 100644 lib/beets/ui/__init__.py create mode 100755 lib/beets/ui/commands.py create mode 100644 lib/beets/util/__init__.py create mode 100644 lib/beets/util/enumeration.py create mode 100644 lib/beets/util/pipeline.py create mode 100644 lib/beets/vfs.py create mode 100755 lib/munkres.py create mode 100644 lib/mutagen/__init__.py create mode 100644 lib/mutagen/_constants.py create mode 100644 lib/mutagen/_util.py create mode 100644 lib/mutagen/_vorbis.py create mode 100644 lib/mutagen/apev2.py create mode 100644 lib/mutagen/asf.py create mode 100644 lib/mutagen/easyid3.py create mode 100644 lib/mutagen/easymp4.py create mode 100644 lib/mutagen/flac.py create mode 100644 lib/mutagen/id3.py create mode 100644 lib/mutagen/m4a.py create mode 100644 lib/mutagen/monkeysaudio.py create mode 100644 lib/mutagen/mp3.py create mode 100644 lib/mutagen/mp4.py create mode 100644 lib/mutagen/musepack.py create mode 100644 lib/mutagen/ogg.py create mode 100644 lib/mutagen/oggflac.py create mode 100644 lib/mutagen/oggspeex.py create mode 100644 lib/mutagen/oggtheora.py create mode 100644 lib/mutagen/oggvorbis.py create mode 100644 lib/mutagen/optimfrog.py create mode 100644 lib/mutagen/trueaudio.py create mode 100644 lib/mutagen/wavpack.py diff --git a/config.py b/config.py index 0049c883..3b95eb78 100644 --- a/config.py +++ b/config.py @@ -258,9 +258,9 @@ form = '''
-

Path to iTunes folder:
+

Path to Music folder:

- i.e. Music/iTunes or /Users/name/Music/iTunes + i.e. /Users/name/Music/iTunes or /Volumes/share/music

diff --git a/itunesimport.py b/itunesimport.py index bf34ef32..6b508ce3 100644 --- a/itunesimport.py +++ b/itunesimport.py @@ -8,11 +8,44 @@ import time import os import sqlite3 from headphones import FULL_PATH +from lib.beets.mediafile import MediaFile + import logger database = os.path.join(FULL_PATH, 'headphones.db') +def scanMusic(dir): + + results = [] + + for r,d,f in os.walk(dir): + for files in f: + if any(files.endswith(x) for x in (".mp3", ".flac", ".aac", ".ogg", ".ape")): + results.append(os.path.join(r,files)) + + logger.log(u'%i music files found' % len(results)) + + lst = [] + + for song in results: + try: + f = MediaFile(song) + except: + logger.log("Could not read file: '" + song + "'", logger.ERROR) + else: + if not f.artist: + pass + else: + lst.append(f.artist) + + artistlist = {}.fromkeys(lst).keys() + logger.log(u"Preparing to import %i artists" % len(artistlist)) + importartist(artistlist) + + + + def itunesImport(pathtoxml): if os.path.splitext(pathtoxml)[1] == '.xml': logger.log(u"Loading xml file from"+ pathtoxml) @@ -23,11 +56,17 @@ def itunesImport(pathtoxml): lst.append(song.artist) rawlist = {}.fromkeys(lst).keys() artistlist = [f for f in rawlist if f != None] + importartist(artistlist) else: rawlist = os.listdir(pathtoxml) logger.log(u"Loading artists from directory:" +pathtoxml) exclude = ['.ds_store', 'various artists', 'untitled folder', 'va'] artistlist = [f for f in rawlist if f.lower() not in exclude] + importartist(artistlist) + + + +def importartist(artistlist): for name in artistlist: logger.log(u"Querying MusicBrainz for: "+name) time.sleep(1) @@ -43,9 +82,6 @@ def itunesImport(pathtoxml): artist = ws.Query().getArtistById(artistid, inc) conn=sqlite3.connect(database) c=conn.cursor() - c.execute('CREATE TABLE IF NOT EXISTS artists (ArtistID TEXT UNIQUE, ArtistName TEXT, ArtistSortName TEXT, DateAdded TEXT, Status TEXT)') - c.execute('CREATE TABLE IF NOT EXISTS albums (ArtistID TEXT, ArtistName TEXT, AlbumTitle TEXT, AlbumASIN TEXT, ReleaseDate TEXT, DateAdded TEXT, AlbumID TEXT UNIQUE, Status TEXT)') - c.execute('CREATE TABLE IF NOT EXISTS tracks (ArtistID TEXT, ArtistName TEXT, AlbumTitle TEXT, AlbumASIN TEXT, AlbumID TEXT, TrackTitle TEXT, TrackDuration TEXT, TrackID TEXT)') c.execute('SELECT ArtistID from artists') artistlist = c.fetchall() if any(artistid in x for x in artistlist): diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100755 index 00000000..d3f5a12f --- /dev/null +++ b/lib/__init__.py @@ -0,0 +1 @@ + diff --git a/lib/beets/__init__.py b/lib/beets/__init__.py new file mode 100644 index 00000000..6ef1be98 --- /dev/null +++ b/lib/beets/__init__.py @@ -0,0 +1,19 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +__version__ = '1.0b9' +__author__ = 'Adrian Sampson ' + +from lib.beets import library +Library = library.Library diff --git a/lib/beets/autotag/__init__.py b/lib/beets/autotag/__init__.py new file mode 100644 index 00000000..90584fe1 --- /dev/null +++ b/lib/beets/autotag/__init__.py @@ -0,0 +1,599 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Facilities for automatically determining files' correct metadata. +""" +import os +import logging +from collections import defaultdict +import re +from lib.munkres import Munkres +from unidecode import unidecode + +from lib.beets.autotag import mb +from lib.beets import library, mediafile, plugins +from lib.beets.util import levenshtein, sorted_walk + +# Try 5 releases. In the future, this should be more dynamic: let the +# probability of continuing to the next release be inversely +# proportional to how good our current best is and how long we've +# already taken. +MAX_CANDIDATES = 5 + +# Distance parameters. +# Text distance weights: proportions on the normalized intuitive edit +# distance. +ARTIST_WEIGHT = 3.0 +ALBUM_WEIGHT = 3.0 +# The weight of the entire distance calculated for a given track. +TRACK_WEIGHT = 1.0 +# These distances are components of the track distance (that is, they +# compete against each other but not ARTIST_WEIGHT and ALBUM_WEIGHT; +# the overall TRACK_WEIGHT does that). +TRACK_TITLE_WEIGHT = 3.0 +# Used instead of a global artist penalty for various-artist matches. +TRACK_ARTIST_WEIGHT = 2.0 +# Added when the indices of tracks don't match. +TRACK_INDEX_WEIGHT = 1.0 +# Track length weights: no penalty before GRACE, maximum (WEIGHT) +# penalty at GRACE+MAX discrepancy. +TRACK_LENGTH_GRACE = 10 +TRACK_LENGTH_MAX = 30 +TRACK_LENGTH_WEIGHT = 2.0 +# MusicBrainz track ID matches. +TRACK_ID_WEIGHT = 5.0 + +# Recommendation constants. +RECOMMEND_STRONG = 'RECOMMEND_STRONG' +RECOMMEND_MEDIUM = 'RECOMMEND_MEDIUM' +RECOMMEND_NONE = 'RECOMMEND_NONE' +# Thresholds for recommendations. +STRONG_REC_THRESH = 0.04 +MEDIUM_REC_THRESH = 0.25 +REC_GAP_THRESH = 0.25 + +# Parameters for string distance function. +# Words that can be moved to the end of a string using a comma. +SD_END_WORDS = ['the', 'a', 'an'] +# Reduced weights for certain portions of the string. +SD_PATTERNS = [ + (r'^the ', 0.1), + (r'[\[\(]?(ep|single)[\]\)]?', 0.0), + (r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1), + (r'\(.*?\)', 0.3), + (r'\[.*?\]', 0.3), + (r'(, )?(pt\.|part) .+', 0.2), +] +# Replacements to use before testing distance. +SD_REPLACE = [ + (r'&', 'and'), +] + +# Artist signals that indicate "various artists". +VA_ARTISTS = (u'', u'various artists', u'va', u'unknown') + +# Autotagging exceptions. +class AutotagError(Exception): + pass + +# Global logger. +log = logging.getLogger('beets') + +def albums_in_dir(path): + """Recursively searches the given directory and returns an iterable + of (path, items) where path is a containing directory and items is + a list of Items that is probably an album. Specifically, any folder + containing any media files is an album. + """ + for root, dirs, files in sorted_walk(path): + # Get a list of items in the directory. + items = [] + for filename in files: + try: + i = library.Item.from_path(os.path.join(root, filename)) + except mediafile.FileTypeError: + pass + except mediafile.UnreadableFileError: + log.warn('unreadable file: ' + filename) + else: + items.append(i) + + # If it's nonempty, yield it. + if items: + yield root, items + +def _string_dist_basic(str1, str2): + """Basic edit distance between two strings, ignoring + non-alphanumeric characters and case. Comparisons are based on a + transliteration/lowering to ASCII characters. Normalized by string + length. + """ + str1 = unidecode(str1) + str2 = unidecode(str2) + str1 = re.sub(r'[^a-z0-9]', '', str1.lower()) + str2 = re.sub(r'[^a-z0-9]', '', str2.lower()) + if not str1 and not str2: + return 0.0 + return levenshtein(str1, str2) / float(max(len(str1), len(str2))) + +def string_dist(str1, str2): + """Gives an "intuitive" edit distance between two strings. This is + an edit distance, normalized by the string length, with a number of + tweaks that reflect intuition about text. + """ + str1 = str1.lower() + str2 = str2.lower() + + # Don't penalize strings that move certain words to the end. For + # example, "the something" should be considered equal to + # "something, the". + for word in SD_END_WORDS: + if str1.endswith(', %s' % word): + str1 = '%s %s' % (word, str1[:-len(word)-2]) + if str2.endswith(', %s' % word): + str2 = '%s %s' % (word, str2[:-len(word)-2]) + + # Perform a couple of basic normalizing substitutions. + for pat, repl in SD_REPLACE: + str1 = re.sub(pat, repl, str1) + str2 = re.sub(pat, repl, str2) + + # Change the weight for certain string portions matched by a set + # of regular expressions. We gradually change the strings and build + # up penalties associated with parts of the string that were + # deleted. + base_dist = _string_dist_basic(str1, str2) + penalty = 0.0 + for pat, weight in SD_PATTERNS: + # Get strings that drop the pattern. + case_str1 = re.sub(pat, '', str1) + case_str2 = re.sub(pat, '', str2) + + if case_str1 != str1 or case_str2 != str2: + # If the pattern was present (i.e., it is deleted in the + # the current case), recalculate the distances for the + # modified strings. + case_dist = _string_dist_basic(case_str1, case_str2) + case_delta = max(0.0, base_dist - case_dist) + if case_delta == 0.0: + continue + + # Shift our baseline strings down (to avoid rematching the + # same part of the string) and add a scaled distance + # amount to the penalties. + str1 = case_str1 + str2 = case_str2 + base_dist = case_dist + penalty += weight * case_delta + dist = base_dist + penalty + + return dist + +def _plurality(objs): + """Given a sequence of comparable objects, returns the object that + is most common in the set and if it is the only object is the set. + """ + # Calculate frequencies. + freqs = defaultdict(int) + for obj in objs: + freqs[obj] += 1 + + # Find object with maximum frequency. + max_freq = 0 + res = None + for obj, freq in freqs.items(): + if freq > max_freq: + max_freq = freq + res = obj + + return res, len(freqs) <= 1 + +def current_metadata(items): + """Returns the most likely artist and album for a set of Items. + Each is determined by tag reflected by the plurality of the Items. + """ + keys = 'artist', 'album' + likelies = {} + consensus = {} + for key in keys: + values = [getattr(item, key) for item in items] + likelies[key], consensus[key] = _plurality(values) + return likelies['artist'], likelies['album'], consensus['artist'] + +def order_items(items, trackinfo): + """Orders the items based on how they match some canonical track + information. This always produces a result if the numbers of tracks + match. + """ + # Make sure lengths match. + if len(items) != len(trackinfo): + return None + + # Construct the cost matrix. + costs = [] + for cur_item in items: + row = [] + for i, canon_item in enumerate(trackinfo): + row.append(track_distance(cur_item, canon_item, i+1)) + costs.append(row) + + # Find a minimum-cost bipartite matching. + matching = Munkres().compute(costs) + + # Order items based on the matching. + ordered_items = [None]*len(items) + for cur_idx, canon_idx in matching: + ordered_items[canon_idx] = items[cur_idx] + return ordered_items + +def track_distance(item, track_data, track_index=None, incl_artist=False): + """Determines the significance of a track metadata change. Returns + a float in [0.0,1.0]. `track_index` is the track number of the + `track_data` metadata set. If `track_index` is provided and + item.track is set, then these indices are used as a component of + the distance calculation. `incl_artist` indicates that a distance + component should be included for the track artist (i.e., for + various-artist releases). + """ + # Distance and normalization accumulators. + dist, dist_max = 0.0, 0.0 + + # Check track length. + if 'length' not in track_data: + # If there's no length to check, assume the worst. + dist += TRACK_LENGTH_WEIGHT + else: + diff = abs(item.length - track_data['length']) + diff = max(diff - TRACK_LENGTH_GRACE, 0.0) + diff = min(diff, TRACK_LENGTH_MAX) + dist += (diff / TRACK_LENGTH_MAX) * TRACK_LENGTH_WEIGHT + dist_max += TRACK_LENGTH_WEIGHT + + # Track title. + dist += string_dist(item.title, track_data['title']) * TRACK_TITLE_WEIGHT + dist_max += TRACK_TITLE_WEIGHT + + # Track artist, if included. + # Attention: MB DB does not have artist info for all compilations, + # so only check artist distance if there is actually an artist in + # the MB track data. + if incl_artist and 'artist' in track_data: + dist += string_dist(item.artist, track_data['artist']) * \ + TRACK_ARTIST_WEIGHT + dist_max += TRACK_ARTIST_WEIGHT + + # Track index. + if track_index and item.track: + if track_index != item.track: + dist += TRACK_INDEX_WEIGHT + dist_max += TRACK_INDEX_WEIGHT + + # MusicBrainz track ID. + if item.mb_trackid: + if item.mb_trackid != track_data['id']: + dist += TRACK_ID_WEIGHT + dist_max += TRACK_ID_WEIGHT + + # Plugin distances. + plugin_d, plugin_dm = plugins.track_distance(item, track_data) + dist += plugin_d + dist_max += plugin_dm + + return dist / dist_max + +def distance(items, info): + """Determines how "significant" an album metadata change would be. + Returns a float in [0.0,1.0]. The list of items must be ordered. + """ + cur_artist, cur_album, _ = current_metadata(items) + cur_artist = cur_artist or '' + cur_album = cur_album or '' + + # These accumulate the possible distance components. The final + # distance will be dist/dist_max. + dist = 0.0 + dist_max = 0.0 + + # Artist/album metadata. + if not info['va']: + dist += string_dist(cur_artist, info['artist']) * ARTIST_WEIGHT + dist_max += ARTIST_WEIGHT + dist += string_dist(cur_album, info['album']) * ALBUM_WEIGHT + dist_max += ALBUM_WEIGHT + + # Track distances. + for i, (item, track_data) in enumerate(zip(items, info['tracks'])): + dist += track_distance(item, track_data, i+1, info['va']) * \ + TRACK_WEIGHT + dist_max += TRACK_WEIGHT + + # Plugin distances. + plugin_d, plugin_dm = plugins.album_distance(items, info) + dist += plugin_d + dist_max += plugin_dm + + # Normalize distance, avoiding divide-by-zero. + if dist_max == 0.0: + return 0.0 + else: + return dist/dist_max + +def apply_item_metadata(item, track_data): + """Set an item's metadata from its matched info dictionary. + """ + item.artist = track_data['artist'] + item.title = track_data['title'] + item.mb_trackid = track_data['id'] + if 'artist_id' in track_data: + item.mb_artistid = track_data['artist_id'] + # At the moment, the other metadata is left intact (including album + # and track number). Perhaps these should be emptied? + +def apply_metadata(items, info): + """Set the items' metadata to match the data given in info. The + list of items must be ordered. + """ + for index, (item, track_data) in enumerate(zip(items, info['tracks'])): + # Album, artist, track count. + if 'artist' in track_data: + item.artist = track_data['artist'] + else: + item.artist = info['artist'] + item.albumartist = info['artist'] + item.album = info['album'] + item.tracktotal = len(items) + + # Release date. + if 'year' in info: + item.year = info['year'] + if 'month' in info: + item.month = info['month'] + if 'day' in info: + item.day = info['day'] + + # Title and track index. + item.title = track_data['title'] + item.track = index + 1 + + # MusicBrainz IDs. + item.mb_trackid = track_data['id'] + item.mb_albumid = info['album_id'] + if 'artist_id' in track_data: + item.mb_artistid = track_data['artist_id'] + else: + item.mb_artistid = info['artist_id'] + item.mb_albumartistid = info['artist_id'] + item.albumtype = info['albumtype'] + + # Compilation flag. + item.comp = info['va'] + +def match_by_id(items): + """If the items are tagged with a MusicBrainz album ID, returns an + info dict for the corresponding album. Otherwise, returns None. + """ + # Is there a consensus on the MB album ID? + albumids = [item.mb_albumid for item in items if item.mb_albumid] + if not albumids: + log.debug('No album IDs found.') + return None + + # If all album IDs are equal, look up the album. + if bool(reduce(lambda x,y: x if x==y else (), albumids)): + albumid = albumids[0] + log.debug('Searching for discovered album ID: ' + albumid) + return mb.album_for_id(albumid) + else: + log.debug('No album ID consensus.') + return None + + #fixme In the future, at the expense of performance, we could use + # other IDs (i.e., track and artist) in case the album tag isn't + # present, but that event seems very unlikely. + +def recommendation(results): + """Given a sorted list of result tuples, returns a recommendation + flag (RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE) based + on the results' distances. + """ + if not results: + # No candidates: no recommendation. + rec = RECOMMEND_NONE + else: + min_dist = results[0][0] + if min_dist < STRONG_REC_THRESH: + # Strong recommendation level. + rec = RECOMMEND_STRONG + elif len(results) == 1: + # Only a single candidate. Medium recommendation. + rec = RECOMMEND_MEDIUM + elif min_dist <= MEDIUM_REC_THRESH: + # Medium recommendation level. + rec = RECOMMEND_MEDIUM + elif results[1][0] - min_dist >= REC_GAP_THRESH: + # Gap between first two candidates is large. + rec = RECOMMEND_MEDIUM + else: + # No conclusion. + rec = RECOMMEND_NONE + return rec + +def validate_candidate(items, tuple_dict, info): + """Given a candidate info dict, attempt to add the candidate to + the output dictionary of result tuples. This involves checking + the track count, ordering the items, checking for duplicates, and + calculating the distance. + """ + log.debug('Candidate: %s - %s' % (info['artist'], info['album'])) + + # Don't duplicate. + if info['album_id'] in tuple_dict: + log.debug('Duplicate.') + return + + # Make sure the album has the correct number of tracks. + if len(items) != len(info['tracks']): + log.debug('Track count mismatch.') + return + + # Put items in order. + ordered = order_items(items, info['tracks']) + if not ordered: + log.debug('Not orderable.') + return + + # Get the change distance. + dist = distance(ordered, info) + log.debug('Success. Distance: %f' % dist) + + tuple_dict[info['album_id']] = dist, ordered, info + +def tag_album(items, timid=False, search_artist=None, search_album=None, + search_id=None): + """Bundles together the functionality used to infer tags for a + set of items comprised by an album. Returns everything relevant: + - The current artist. + - The current album. + - A list of (distance, items, info) tuples where info is a + dictionary containing the inferred tags and items is a + reordered version of the input items list. The candidates are + sorted by distance (i.e., best match first). + - A recommendation, one of RECOMMEND_STRONG, RECOMMEND_MEDIUM, + or RECOMMEND_NONE; indicating that the first candidate is + very likely, it is somewhat likely, or no conclusion could + be reached. + If search_artist and search_album or search_id are provided, then + they are used as search terms in place of the current metadata. + May raise an AutotagError if existing metadata is insufficient. + """ + # Get current metadata. + cur_artist, cur_album, artist_consensus = current_metadata(items) + log.debug('Tagging %s - %s' % (cur_artist, cur_album)) + + # The output result tuples (keyed by MB album ID). + out_tuples = {} + + # Try to find album indicated by MusicBrainz IDs. + if search_id: + log.debug('Searching for album ID: ' + search_id) + id_info = mb.album_for_id(search_id) + else: + id_info = match_by_id(items) + if id_info: + validate_candidate(items, out_tuples, id_info) + rec = recommendation(out_tuples.values()) + log.debug('Album ID match recommendation is ' + str(rec)) + if out_tuples and not timid: + # If we have a very good MBID match, return immediately. + # Otherwise, this match will compete against metadata-based + # matches. + if rec == RECOMMEND_STRONG: + log.debug('ID match.') + return cur_artist, cur_album, out_tuples.values(), rec + + # If searching by ID, don't continue to metadata search. + if search_id is not None: + if out_tuples: + return cur_artist, cur_album, out_tuples.values(), rec + else: + return cur_artist, cur_album, [], RECOMMEND_NONE + + # Search terms. + if not (search_artist and search_album): + # No explicit search terms -- use current metadata. + search_artist, search_album = cur_artist, cur_album + log.debug(u'Search terms: %s - %s' % (search_artist, search_album)) + + # Get candidate metadata from search. + if search_artist and search_album: + candidates = mb.match_album(search_artist, search_album, + len(items), MAX_CANDIDATES) + candidates = list(candidates) + else: + candidates = [] + + # Possibly add "various artists" search. + if search_album and ((not artist_consensus) or \ + (search_artist.lower() in VA_ARTISTS) or \ + any(item.comp for item in items)): + log.debug(u'Possibly Various Artists; adding matches.') + candidates.extend(mb.match_album(None, search_album, len(items), + MAX_CANDIDATES)) + + # Get candidates from plugins. + candidates.extend(plugins.candidates(items)) + + # Get the distance to each candidate. + log.debug(u'Evaluating %i candidates.' % len(candidates)) + for info in candidates: + validate_candidate(items, out_tuples, info) + + # Sort by distance. + out_tuples = out_tuples.values() + out_tuples.sort() + + rec = recommendation(out_tuples) + return cur_artist, cur_album, out_tuples, rec + +def tag_item(item, timid=False, search_artist=None, search_title=None, + search_id=None): + """Attempts to find metadata for a single track. Returns a + `(candidates, recommendation)` pair where `candidates` is a list + of `(distance, track_info)` pairs. `search_artist` and + `search_title` may be used to override the current metadata for + the purposes of the MusicBrainz title; likewise `search_id`. + """ + candidates = [] + + # First, try matching by MusicBrainz ID. + trackid = search_id or item.mb_trackid + if trackid: + log.debug('Searching for track ID: ' + trackid) + track_info = mb.track_for_id(trackid) + if track_info: + dist = track_distance(item, track_info, incl_artist=True) + candidates.append((dist, track_info)) + # If this is a good match, then don't keep searching. + rec = recommendation(candidates) + if rec == RECOMMEND_STRONG and not timid: + log.debug('Track ID match.') + return candidates, rec + + # If we're searching by ID, don't proceed. + if search_id is not None: + if candidates: + return candidates, rec + else: + return [], RECOMMEND_NONE + + # Search terms. + if not (search_artist and search_title): + search_artist, search_title = item.artist, item.title + log.debug(u'Item search terms: %s - %s' % (search_artist, search_title)) + + # Candidate metadata from search. + for track_info in mb.match_track(search_artist, search_title): + dist = track_distance(item, track_info, incl_artist=True) + candidates.append((dist, track_info)) + + # Add candidates from plugins. + for track_info in plugins.item_candidates(item): + dist = track_distance(item, track_info, incl_artist=True) + candidates.append((dist, track_info)) + + # Sort by distance and return with recommendation. + log.debug('Found %i candidates.' % len(candidates)) + candidates.sort() + rec = recommendation(candidates) + return candidates, rec diff --git a/lib/beets/autotag/art.py b/lib/beets/autotag/art.py new file mode 100644 index 00000000..90f1dd37 --- /dev/null +++ b/lib/beets/autotag/art.py @@ -0,0 +1,77 @@ +# This file is part of beets. +# Copyright 2010, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Finding album art for tagged albums.""" + +import urllib +import sys +import logging + +from lib.beets.autotag.mb import album_for_id + +# The common logger. +log = logging.getLogger('beets') + + +# Art from Amazon. + +AMAZON_URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg' +AMAZON_INDICES = (1,2) +AMAZON_CONTENT_TYPE = 'image/jpeg' +def art_for_asin(asin): + """Fetches art for an Amazon ID (ASIN) string.""" + for index in AMAZON_INDICES: + # Fetch the image. + url = AMAZON_URL % (asin, index) + try: + log.debug('Downloading art: %s' % url) + fn, headers = urllib.urlretrieve(url) + except IOError: + log.debug('error fetching art at URL %s' % url) + continue + + # Make sure it's actually an image. + if headers.gettype() == AMAZON_CONTENT_TYPE: + log.debug('Downloaded art to: %s' % fn) + return fn + + +# Main interface. + +def art_for_album(album): + """Given an album info dictionary from MusicBrainz, returns a path + to downloaded art for the album (or None if no art is found). + """ + if album['asin']: + log.debug('Fetching album art for ASIN %s.' % album['asin']) + return art_for_asin(album['asin']) + else: + log.debug('No ASIN available: no art found.') + return None + + +# Smoke test. + +if __name__ == '__main__': + aid = sys.argv[1] + album = album_for_id(aid) + if not album: + print 'album not found' + else: + fn = art_for_album(album) + if fn: + print fn + print len(open(fn).read())/1024 + else: + print 'no art found' diff --git a/lib/beets/autotag/mb.py b/lib/beets/autotag/mb.py new file mode 100644 index 00000000..df95710a --- /dev/null +++ b/lib/beets/autotag/mb.py @@ -0,0 +1,333 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Searches for albums in the MusicBrainz database. + +This is a thin layer over the official `python-musicbrainz2` module. It +abstracts away that module's object model, the server's Lucene query +syntax, and other uninteresting parts of using musicbrainz2. The +principal interface is the function `match_album`. +""" + +from __future__ import with_statement # for Python 2.5 +import re +import time +import logging +import lib.musicbrainz2.webservice as mbws +from lib.musicbrainz2.model import Release +from threading import Lock +from lib.musicbrainz2.model import VARIOUS_ARTISTS_ID + +SEARCH_LIMIT = 10 +VARIOUS_ARTISTS_ID = VARIOUS_ARTISTS_ID.rsplit('/', 1)[1] + +class ServerBusyError(Exception): pass +class BadResponseError(Exception): pass + +log = logging.getLogger('beets') + +# We hard-code IDs for artists that can't easily be searched for. +SPECIAL_CASE_ARTISTS = { + '!!!': 'f26c72d3-e52c-467b-b651-679c73d8e1a7', +} + +RELEASE_TYPES = [ + Release.TYPE_ALBUM, + Release.TYPE_SINGLE, + Release.TYPE_EP, + Release.TYPE_COMPILATION, + Release.TYPE_SOUNDTRACK, + Release.TYPE_SPOKENWORD, + Release.TYPE_INTERVIEW, + Release.TYPE_AUDIOBOOK, + Release.TYPE_LIVE, + Release.TYPE_REMIX, + Release.TYPE_OTHER +] + +RELEASE_INCLUDES = mbws.ReleaseIncludes(artist=True, tracks=True, + releaseEvents=True, labels=True, + releaseGroup=True) +TRACK_INCLUDES = mbws.TrackIncludes(artist=True) + +# MusicBrainz requires that a client does not query the server more +# than once a second. This function enforces that limit using a +# module-global variable to keep track of the last time a query was +# sent. +MAX_QUERY_RETRY = 8 +QUERY_WAIT_TIME = 1.0 +last_query_time = 0.0 +mb_lock = Lock() +def _query_wrap(fun, *args, **kwargs): + """Wait until at least `QUERY_WAIT_TIME` seconds have passed since + the last invocation of this function. Then call + fun(*args, **kwargs). If it fails due to a "server busy" message, + then try again. Tries up to `MAX_QUERY_RETRY` times before + giving up. + """ + with mb_lock: + global last_query_time + for i in range(MAX_QUERY_RETRY): + since_last_query = time.time() - last_query_time + if since_last_query < QUERY_WAIT_TIME: + time.sleep(QUERY_WAIT_TIME - since_last_query) + last_query_time = time.time() + try: + # Try the function. + res = fun(*args, **kwargs) + except mbws.WebServiceError, e: + # Server busy. Retry. + message = str(e.reason) + for errnum in (503, 504): + if 'Error %i' % errnum in message: + break + else: + # This is not the error we're looking for. + raise + except mbws.ConnectionError: + # Typically a timeout. + pass + except mbws.ResponseError, exc: + # Malformed response from server. + log.error('Bad response from MusicBrainz: ' + str(exc)) + raise BadResponseError() + else: + # Success. Return the result. + return res + # Gave up. + raise ServerBusyError() + # FIXME exponential backoff? + +def get_releases(**params): + """Given a list of parameters to ReleaseFilter, executes the + query and yields release dicts (complete with tracks). + """ + # Replace special cases. + if 'artistName' in params: + artist = params['artistName'] + if artist in SPECIAL_CASE_ARTISTS: + del params['artistName'] + params['artistId'] = SPECIAL_CASE_ARTISTS[artist] + + # Issue query. + filt = mbws.ReleaseFilter(**params) + try: + results = _query_wrap(mbws.Query().getReleases, filter=filt) + except BadResponseError: + results = () + + # Construct results. + for result in results: + release = result.release + tracks, _ = release_info(release.id) + yield release_dict(release, tracks) + +def release_info(release_id): + """Given a MusicBrainz release ID, fetch a list of tracks on the + release and the release group ID. If the release is not found, + returns None. + """ + try: + release = _query_wrap(mbws.Query().getReleaseById, release_id, + RELEASE_INCLUDES) + except BadResponseError: + release = None + + if release: + return release.getTracks(), release.getReleaseGroup().getId() + else: + return None + +def _lucene_escape(text): + """Escapes a string so it may be used verbatim in a Lucene query + string. + """ + # Regex stolen from MusicBrainz Picard. + out = re.sub(r'([+\-&|!(){}\[\]\^"~*?:\\])', r'\\\1', text) + return out.replace('\x00', '') + +def _lucene_query(criteria): + """Given a dictionary containing search criteria, produce a string + that may be used as a MusicBrainz search query. + """ + query_parts = [] + for name, value in criteria.items(): + value = _lucene_escape(value).strip().lower() + if value: + query_parts.append(u'%s:(%s)' % (name, value)) + return u' '.join(query_parts) + +def find_releases(criteria, limit=SEARCH_LIMIT): + """Get a list of release dictionaries from the MusicBrainz + database that match `criteria`. The latter is a dictionary whose + keys are MusicBrainz field names and whose values are search terms + for those fields. + + The field names are from MusicBrainz's Lucene query syntax, which + is detailed here: + http://wiki.musicbrainz.org/Text_Search_Syntax + """ + # Replace special cases. + if 'artist' in criteria: + artist = criteria['artist'] + if artist in SPECIAL_CASE_ARTISTS: + del criteria['artist'] + criteria['arid'] = SPECIAL_CASE_ARTISTS[artist] + + # Build the filter and send the query. + if any(criteria.itervalues()): + query = _lucene_query(criteria) + log.debug('album query: %s' % query) + return get_releases(limit=limit, query=query) + +def find_tracks(criteria, limit=SEARCH_LIMIT): + """Get a sequence of track dictionaries from MusicBrainz that match + `criteria`, a search term dictionary similar to the one passed to + `find_releases`. + """ + if any(criteria.itervalues()): + query = _lucene_query(criteria) + log.debug('track query: %s' % query) + filt = mbws.TrackFilter(limit=limit, query=query) + try: + results = _query_wrap(mbws.Query().getTracks, filter=filt) + except BadResponseError: + results = () + for result in results: + track = result.track + yield track_dict(track) + +def track_dict(track): + """Produces a dictionary summarizing a MusicBrainz `Track` object. + """ + t = {'title': track.title, + 'id': track.id.rsplit('/', 1)[1]} + if track.artist is not None: + # Track artists will only be present for releases with + # multiple artists. + t['artist'] = track.artist.name + t['artist_id'] = track.artist.id.rsplit('/', 1)[1] + if track.duration is not None: + # Duration not always present. + t['length'] = track.duration/(1000.0) + return t + +def release_dict(release, tracks=None): + """Takes a MusicBrainz `Release` object and returns a dictionary + containing the interesting data about that release. A list of + `Track` objects may also be provided as `tracks`; they are then + included in the resulting dictionary. + """ + # Basic info. + out = {'album': release.title, + 'album_id': release.id.rsplit('/', 1)[1], + 'artist': release.artist.name, + 'artist_id': release.artist.id.rsplit('/', 1)[1], + 'asin': release.asin, + 'albumtype': '', + } + out['va'] = out['artist_id'] == VARIOUS_ARTISTS_ID + + # Release type not always populated. + for releasetype in release.types: + if releasetype in RELEASE_TYPES: + out['albumtype'] = releasetype.split('#')[1].lower() + break + + # Release date and label. + try: + event = release.getEarliestReleaseEvent() + except: + # The python-musicbrainz2 module has a bug that will raise an + # exception when there is no release date to be found. In this + # case, we just skip adding a release date to the dict. + pass + else: + if event: + # Release date. + date_str = event.getDate() + if date_str: + date_parts = date_str.split('-') + for key in ('year', 'month', 'day'): + if date_parts: + out[key] = int(date_parts.pop(0)) + + # Label name. + label = event.getLabel() + if label: + out['label'] = label.getName() + + # Tracks. + if tracks is not None: + out['tracks'] = map(track_dict, tracks) + + return out + +def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT): + """Searches for a single album ("release" in MusicBrainz parlance) + and returns an iterator over dictionaries of information (as + returned by `release_dict`). + + The query consists of an artist name, an album name, and, + optionally, a number of tracks on the album. + """ + # Build search criteria. + criteria = {'release': album} + if artist is not None: + criteria['artist'] = artist + else: + # Various Artists search. + criteria['arid'] = VARIOUS_ARTISTS_ID + if tracks is not None: + criteria['tracks'] = str(tracks) + + # Search for the release. + return find_releases(criteria) + +def match_track(artist, title): + """Searches for a single track and returns an iterable of track + info dictionaries (as returned by `track_dict`). + """ + return find_tracks({ + 'artist': artist, + 'track': title, + }) + +def album_for_id(albumid): + """Fetches an album by its MusicBrainz ID and returns an + information dictionary. If no match is found, returns None. + """ + query = mbws.Query() + try: + album = _query_wrap(query.getReleaseById, albumid, RELEASE_INCLUDES) + except BadResponseError: + return None + except (mbws.ResourceNotFoundError, mbws.RequestError), exc: + log.debug('Album ID match failed: ' + str(exc)) + return None + return release_dict(album, album.tracks) + +def track_for_id(trackid): + """Fetches a track by its MusicBrainz ID. Returns a track info + dictionary or None if no track is found. + """ + query = mbws.Query() + try: + track = _query_wrap(query.getTrackById, trackid, TRACK_INCLUDES) + except BadResponseError: + return None + except (mbws.ResourceNotFoundError, mbws.RequestError), exc: + log.debug('Track ID match failed: ' + str(exc)) + return None + return track_dict(track) diff --git a/lib/beets/importer.py b/lib/beets/importer.py new file mode 100644 index 00000000..27ac60b0 --- /dev/null +++ b/lib/beets/importer.py @@ -0,0 +1,651 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Provides the basic, interface-agnostic workflow for importing and +autotagging music files. +""" +from __future__ import with_statement # Python 2.5 +import os +import logging +import pickle + +from lib.beets import autotag +from lib.beets import library +import lib.beets.autotag.art as beets.autotag.art +from lib.beets import plugins +from lib.beets.util import pipeline +from lib.beets.util import syspath, normpath +from lib.beets.util.enumeration import enum + +action = enum( + 'SKIP', 'ASIS', 'TRACKS', 'MANUAL', 'APPLY', 'MANUAL_ID', + name='action' +) + +QUEUE_SIZE = 128 +STATE_FILE = os.path.expanduser('~/.beetsstate') + +# Global logger. +log = logging.getLogger('beets') + +class ImportAbort(Exception): + """Raised when the user aborts the tagging operation. + """ + pass + + +# Utilities. + +def tag_log(logfile, status, path): + """Log a message about a given album to logfile. The status should + reflect the reason the album couldn't be tagged. + """ + if logfile: + print >>logfile, '%s %s' % (status, path) + +def log_choice(config, task): + """Logs the task's current choice if it should be logged. + """ + path = task.path if task.is_album else task.item.path + if task.choice_flag is action.ASIS: + tag_log(config.logfile, 'asis', path) + elif task.choice_flag is action.SKIP: + tag_log(config.logfile, 'skip', path) + +def _reopen_lib(lib): + """Because of limitations in SQLite, a given Library is bound to + the thread in which it was created. This function reopens Library + objects so that they can be used from separate threads. + """ + if isinstance(lib, library.Library): + return library.Library( + lib.path, + lib.directory, + lib.path_formats, + lib.art_filename, + ) + else: + return lib + +def _duplicate_check(lib, artist, album, recent=None): + """Check whether an album already exists in the library. `recent` + should be a set of (artist, album) pairs that will be built up + with every call to this function and checked along with the + library. + """ + if artist is None: + # As-is import with no artist. Skip check. + return False + + # Try the recent albums. + if recent is not None: + if (artist, album) in recent: + return True + recent.add((artist, album)) + + # Look in the library. + for album_cand in lib.albums(artist=artist): + if album_cand.album == album: + return True + + return False + +def _item_duplicate_check(lib, artist, title, recent=None): + """Check whether an item already exists in the library.""" + # Try recent items. + if recent is not None: + if (artist, title) in recent: + return True + recent.add((artist, title)) + + # Check the library. + item_iter = lib.items(artist=artist, title=title) + try: + item_iter.next() + except StopIteration: + return False + finally: + item_iter.close() + + return True + +# Utilities for reading and writing the beets progress file, which +# allows long tagging tasks to be resumed when they pause (or crash). +PROGRESS_KEY = 'tagprogress' +def progress_set(toppath, path): + """Record that tagging for the given `toppath` was successful up to + `path`. If path is None, then clear the progress value (indicating + that the tagging completed). + """ + try: + with open(STATE_FILE) as f: + state = pickle.load(f) + except IOError: + state = {PROGRESS_KEY: {}} + + if path is None: + # Remove progress from file. + if toppath in state[PROGRESS_KEY]: + del state[PROGRESS_KEY][toppath] + else: + state[PROGRESS_KEY][toppath] = path + + with open(STATE_FILE, 'w') as f: + pickle.dump(state, f) +def progress_get(toppath): + """Get the last successfully tagged subpath of toppath. If toppath + has no progress information, returns None. + """ + try: + with open(STATE_FILE) as f: + state = pickle.load(f) + except IOError: + return None + return state[PROGRESS_KEY].get(toppath) + + +# The configuration structure. + +class ImportConfig(object): + """Contains all the settings used during an import session. Should + be used in a "write-once" way -- everything is set up initially and + then never touched again. + """ + _fields = ['lib', 'paths', 'resume', 'logfile', 'color', 'quiet', + 'quiet_fallback', 'copy', 'write', 'art', 'delete', + 'choose_match_func', 'should_resume_func', 'threaded', + 'autot', 'singletons', 'timid', 'choose_item_func'] + def __init__(self, **kwargs): + for slot in self._fields: + setattr(self, slot, kwargs[slot]) + + # Normalize the paths. + if self.paths: + self.paths = map(normpath, self.paths) + + +# The importer task class. + +class ImportTask(object): + """Represents a single set of items to be imported along with its + intermediate state. May represent an album or a single item. + """ + def __init__(self, toppath=None, path=None, items=None): + self.toppath = toppath + self.path = path + self.items = items + self.sentinel = False + + @classmethod + def done_sentinel(cls, toppath): + """Create an ImportTask that indicates the end of a top-level + directory import. + """ + obj = cls(toppath) + obj.sentinel = True + return obj + + @classmethod + def progress_sentinel(cls, toppath, path): + """Create a task indicating that a single directory in a larger + import has finished. This is only required for singleton + imports; progress is implied for album imports. + """ + obj = cls(toppath, path) + obj.sentinel = True + return obj + + @classmethod + def item_task(cls, item): + """Creates an ImportTask for a single item.""" + obj = cls() + obj.item = item + obj.is_album = False + return obj + + def set_match(self, cur_artist, cur_album, candidates, rec): + """Sets the candidates for this album matched by the + `autotag.tag_album` method. + """ + assert not self.sentinel + self.cur_artist = cur_artist + self.cur_album = cur_album + self.candidates = candidates + self.rec = rec + self.is_album = True + + def set_null_match(self): + """Set the candidates to indicate no album match was found. + """ + self.set_match(None, None, None, None) + + def set_item_match(self, candidates, rec): + """Set the match for a single-item task.""" + assert not self.is_album + assert self.item is not None + self.item_match = (candidates, rec) + + def set_null_item_match(self): + """For single-item tasks, mark the item as having no matches. + """ + assert not self.is_album + assert self.item is not None + self.item_match = None + + def set_choice(self, choice): + """Given either an (info, items) tuple or an action constant, + indicates that an action has been selected by the user (or + automatically). + """ + assert not self.sentinel + # Not part of the task structure: + assert choice not in (action.MANUAL, action.MANUAL_ID) + assert choice != action.APPLY # Only used internally. + if choice in (action.SKIP, action.ASIS, action.TRACKS): + self.choice_flag = choice + self.info = None + else: + assert not isinstance(choice, action) + if self.is_album: + info, items = choice + self.items = items # Reordered items list. + else: + info = choice + self.info = info + self.choice_flag = action.APPLY # Implicit choice. + + def save_progress(self): + """Updates the progress state to indicate that this album has + finished. + """ + if self.sentinel and self.path is None: + # "Done" sentinel. + progress_set(self.toppath, None) + elif self.sentinel or self.is_album: + # "Directory progress" sentinel for singletons or a real + # album task, which implies the same. + progress_set(self.toppath, self.path) + + # Logical decisions. + def should_write_tags(self): + """Should new info be written to the files' metadata?""" + if self.choice_flag == action.APPLY: + return True + elif self.choice_flag in (action.ASIS, action.TRACKS, action.SKIP): + return False + else: + assert False + def should_fetch_art(self): + """Should album art be downloaded for this album?""" + return self.should_write_tags() and self.is_album + def should_infer_aa(self): + """When creating an album structure, should the album artist + field be inferred from the plurality of track artists? + """ + assert self.is_album + if self.choice_flag == action.APPLY: + # Album artist comes from the info dictionary. + return False + elif self.choice_flag == action.ASIS: + # As-is imports likely don't have an album artist. + return True + else: + assert False + def should_skip(self): + """After a choice has been made, returns True if this is a + sentinel or it has been marked for skipping. + """ + return self.sentinel or self.choice_flag == action.SKIP + + +# Full-album pipeline stages. + +def read_tasks(config): + """A generator yielding all the albums (as ImportTask objects) found + in the user-specified list of paths. In the case of a singleton + import, yields single-item tasks instead. + """ + # Look for saved progress. + progress = config.resume is not False + if progress: + resume_dirs = {} + for path in config.paths: + resume_dir = progress_get(path) + if resume_dir: + + # Either accept immediately or prompt for input to decide. + if config.resume: + do_resume = True + log.warn('Resuming interrupted import of %s' % path) + else: + do_resume = config.should_resume_func(config, path) + + if do_resume: + resume_dirs[path] = resume_dir + else: + # Clear progress; we're starting from the top. + progress_set(path, None) + + for toppath in config.paths: + # Check whether the path is to a file. + if config.singletons and not os.path.isdir(syspath(toppath)): + item = library.Item.from_path(toppath) + yield ImportTask.item_task(item) + continue + + # Produce paths under this directory. + if progress: + resume_dir = resume_dirs.get(toppath) + for path, items in autotag.albums_in_dir(toppath): + if progress and resume_dir: + # We're fast-forwarding to resume a previous tagging. + if path == resume_dir: + # We've hit the last good path! Turn off the + # fast-forwarding. + resume_dir = None + continue + + # Yield all the necessary tasks. + if config.singletons: + for item in items: + yield ImportTask.item_task(item) + yield ImportTask.progress_sentinel(toppath, path) + else: + yield ImportTask(toppath, path, items) + + # Indicate the directory is finished. + yield ImportTask.done_sentinel(toppath) + +def initial_lookup(config): + """A coroutine for performing the initial MusicBrainz lookup for an + album. It accepts lists of Items and yields + (items, cur_artist, cur_album, candidates, rec) tuples. If no match + is found, all of the yielded parameters (except items) are None. + """ + task = None + while True: + task = yield task + if task.sentinel: + continue + + log.debug('Looking up: %s' % task.path) + try: + task.set_match(*autotag.tag_album(task.items, config.timid)) + except autotag.AutotagError: + task.set_null_match() + +def user_query(config): + """A coroutine for interfacing with the user about the tagging + process. lib is the Library to import into and logfile may be + a file-like object for logging the import process. The coroutine + accepts and yields ImportTask objects. + """ + lib = _reopen_lib(config.lib) + recent = set() + task = None + while True: + task = yield task + if task.sentinel: + continue + + # Ask the user for a choice. + choice = config.choose_match_func(task, config) + task.set_choice(choice) + log_choice(config, task) + + # As-tracks: transition to singleton workflow. + if choice is action.TRACKS: + # Set up a little pipeline for dealing with the singletons. + item_tasks = [] + def emitter(): + for item in task.items: + yield ImportTask.item_task(item) + yield ImportTask.progress_sentinel(task.toppath, task.path) + def collector(): + while True: + item_task = yield + item_tasks.append(item_task) + ipl = pipeline.Pipeline((emitter(), item_lookup(config), + item_query(config), collector())) + ipl.run_sequential() + task = pipeline.multiple(item_tasks) + + # Check for duplicates if we have a match (or ASIS). + if choice is action.ASIS or isinstance(choice, tuple): + if choice is action.ASIS: + artist = task.cur_artist + album = task.cur_album + else: + artist = task.info['artist'] + album = task.info['album'] + if _duplicate_check(lib, artist, album, recent): + tag_log(config.logfile, 'duplicate', task.path) + log.warn("This album is already in the library!") + task.set_choice(action.SKIP) + +def show_progress(config): + """This stage replaces the initial_lookup and user_query stages + when the importer is run without autotagging. It displays the album + name and artist as the files are added. + """ + task = None + while True: + task = yield task + if task.sentinel: + continue + + log.info(task.path) + + # Behave as if ASIS were selected. + task.set_null_match() + task.set_choice(action.ASIS) + +def apply_choices(config): + """A coroutine for applying changes to albums during the autotag + process. + """ + lib = _reopen_lib(config.lib) + task = None + while True: + task = yield task + if task.should_skip(): + continue + + # Change metadata, move, and copy. + if task.should_write_tags(): + if task.is_album: + autotag.apply_metadata(task.items, task.info) + else: + autotag.apply_item_metadata(task.item, task.info) + items = task.items if task.is_album else [task.item] + if config.copy and config.delete: + task.old_paths = [os.path.realpath(syspath(item.path)) + for item in items] + for item in items: + if config.copy: + item.move(lib, True, task.is_album) + if config.write and task.should_write_tags(): + item.write() + + # Add items to library. We consolidate this at the end to avoid + # locking while we do the copying and tag updates. + try: + if task.is_album: + # Add an album. + album = lib.add_album(task.items, + infer_aa = task.should_infer_aa()) + task.album_id = album.id + else: + # Add tracks. + for item in items: + lib.add(item) + finally: + lib.save() + +def fetch_art(config): + """A coroutine that fetches and applies album art for albums where + appropriate. + """ + lib = _reopen_lib(config.lib) + task = None + while True: + task = yield task + if task.should_skip(): + continue + + if task.should_fetch_art(): + artpath = beets.autotag.art.art_for_album(task.info) + + # Save the art if any was found. + if artpath: + try: + album = lib.get_album(task.album_id) + album.set_art(artpath) + finally: + lib.save(False) + +def finalize(config): + """A coroutine that finishes up importer tasks. In particular, the + coroutine sends plugin events, deletes old files, and saves + progress. This is a "terminal" coroutine (it yields None). + """ + lib = _reopen_lib(config.lib) + while True: + task = yield + if task.should_skip(): + if config.resume is not False: + task.save_progress() + continue + + items = task.items if task.is_album else [task.item] + + # Announce that we've added an album. + if task.is_album: + album = lib.get_album(task.album_id) + plugins.send('album_imported', lib=lib, album=album) + else: + for item in items: + plugins.send('item_imported', lib=lib, item=item) + + # Finally, delete old files. + if config.copy and config.delete: + new_paths = [os.path.realpath(item.path) for item in items] + for old_path in task.old_paths: + # Only delete files that were actually moved. + if old_path not in new_paths: + os.remove(syspath(old_path)) + + # Update progress. + if config.resume is not False: + task.save_progress() + + +# Singleton pipeline stages. + +def item_lookup(config): + """A coroutine used to perform the initial MusicBrainz lookup for + an item task. + """ + task = None + while True: + task = yield task + if task.sentinel: + continue + + task.set_item_match(*autotag.tag_item(task.item, config.timid)) + +def item_query(config): + """A coroutine that queries the user for input on single-item + lookups. + """ + lib = _reopen_lib(config.lib) + task = None + recent = set() + while True: + task = yield task + if task.sentinel: + continue + + choice = config.choose_item_func(task, config) + task.set_choice(choice) + log_choice(config, task) + + # Duplicate check. + if task.choice_flag in (action.ASIS, action.APPLY): + if choice is action.ASIS: + artist = task.item.artist + title = task.item.title + else: + artist = task.info['artist'] + title = task.info['title'] + if _item_duplicate_check(lib, artist, title, recent): + tag_log(config.logfile, 'duplicate', task.item.path) + log.warn("This item is already in the library!") + task.set_choice(action.SKIP) + +def item_progress(config): + """Skips the lookup and query stages in a non-autotagged singleton + import. Just shows progress. + """ + task = None + log.info('Importing items:') + while True: + task = yield task + if task.sentinel: + continue + + log.info(task.item.path) + task.set_null_item_match() + task.set_choice(action.ASIS) + + +# Main driver. + +def run_import(**kwargs): + """Run an import. The keyword arguments are the same as those to + ImportConfig. + """ + config = ImportConfig(**kwargs) + + # Set up the pipeline. + stages = [read_tasks(config)] + if config.singletons: + # Singleton importer. + if config.autot: + stages += [item_lookup(config), item_query(config)] + else: + stages += [item_progress(config)] + else: + # Whole-album importer. + if config.autot: + # Only look up and query the user when autotagging. + stages += [initial_lookup(config), user_query(config)] + else: + # When not autotagging, just display progress. + stages += [show_progress(config)] + stages += [apply_choices(config)] + if config.art: + stages += [fetch_art(config)] + stages += [finalize(config)] + pl = pipeline.Pipeline(stages) + + # Run the pipeline. + try: + if config.threaded: + pl.run_parallel(QUEUE_SIZE) + else: + pl.run_sequential() + except ImportAbort: + # User aborted operation. Silently stop. + pass diff --git a/lib/beets/library.py b/lib/beets/library.py new file mode 100644 index 00000000..213d42dd --- /dev/null +++ b/lib/beets/library.py @@ -0,0 +1,1202 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +import sqlite3 +import os +import re +import shutil +import sys +from string import Template +import logging +from lib.beets.mediafile import MediaFile +from lib.beets import plugins +from lib.beets import util +from lib.beets.util import bytestring_path, syspath, normpath + +MAX_FILENAME_LENGTH = 200 + +# Fields in the "items" database table; all the metadata available for +# items in the library. These are used directly in SQL; they are +# vulnerable to injection if accessible to the user. +# Each tuple has the following values: +# - The name of the field. +# - The (SQLite) type of the field. +# - Is the field writable? +# - Does the field reflect an attribute of a MediaFile? +ITEM_FIELDS = [ + ('id', 'integer primary key', False, False), + ('path', 'blob', False, False), + ('album_id', 'int', False, False), + + ('title', 'text', True, True), + ('artist', 'text', True, True), + ('album', 'text', True, True), + ('albumartist', 'text', True, True), + ('genre', 'text', True, True), + ('composer', 'text', True, True), + ('grouping', 'text', True, True), + ('year', 'int', True, True), + ('month', 'int', True, True), + ('day', 'int', True, True), + ('track', 'int', True, True), + ('tracktotal', 'int', True, True), + ('disc', 'int', True, True), + ('disctotal', 'int', True, True), + ('lyrics', 'text', True, True), + ('comments', 'text', True, True), + ('bpm', 'int', True, True), + ('comp', 'bool', True, True), + ('mb_trackid', 'text', True, True), + ('mb_albumid', 'text', True, True), + ('mb_artistid', 'text', True, True), + ('mb_albumartistid', 'text', True, True), + ('albumtype', 'text', True, True), + + ('length', 'real', False, True), + ('bitrate', 'int', False, True), + ('format', 'text', False, True), +] +ITEM_KEYS_WRITABLE = [f[0] for f in ITEM_FIELDS if f[3] and f[2]] +ITEM_KEYS_META = [f[0] for f in ITEM_FIELDS if f[3]] +ITEM_KEYS = [f[0] for f in ITEM_FIELDS] + +# Database fields for the "albums" table. +# The third entry in each tuple indicates whether the field reflects an +# identically-named field in the items table. +ALBUM_FIELDS = [ + ('id', 'integer primary key', False), + ('artpath', 'blob', False), + + ('albumartist', 'text', True), + ('album', 'text', True), + ('genre', 'text', True), + ('year', 'int', True), + ('month', 'int', True), + ('day', 'int', True), + ('tracktotal', 'int', True), + ('disctotal', 'int', True), + ('comp', 'bool', True), + ('mb_albumid', 'text', True), + ('mb_albumartistid', 'text', True), + ('albumtype', 'text', True), +] +ALBUM_KEYS = [f[0] for f in ALBUM_FIELDS] +ALBUM_KEYS_ITEM = [f[0] for f in ALBUM_FIELDS if f[2]] + +# Default search fields for various granularities. +ARTIST_DEFAULT_FIELDS = ('artist',) +ALBUM_DEFAULT_FIELDS = ('album', 'albumartist', 'genre') +ITEM_DEFAULT_FIELDS = ARTIST_DEFAULT_FIELDS + ALBUM_DEFAULT_FIELDS + \ + ('title', 'comments') + +# Logger. +log = logging.getLogger('beets') +if not log.handlers: + log.addHandler(logging.StreamHandler()) + + +# Exceptions. + +class InvalidFieldError(Exception): + pass + + +# Library items (songs). + +class Item(object): + def __init__(self, values): + self.dirty = {} + self._fill_record(values) + self._clear_dirty() + + @classmethod + def from_path(cls, path): + """Creates a new item from the media file at the specified path. + """ + # Initiate with values that aren't read from files. + i = cls({ + 'album_id': None, + }) + i.read(path) + return i + + def _fill_record(self, values): + self.record = {} + for key in ITEM_KEYS: + try: + setattr(self, key, values[key]) + except KeyError: + setattr(self, key, None) + + def _clear_dirty(self): + self.dirty = {} + for key in ITEM_KEYS: + self.dirty[key] = False + + def __repr__(self): + return 'Item(' + repr(self.record) + ')' + + + # Item field accessors. + + def __getattr__(self, key): + """If key is an item attribute (i.e., a column in the database), + returns the record entry for that key. + """ + if key in ITEM_KEYS: + return self.record[key] + else: + raise AttributeError(key + ' is not a valid item field') + + def __setattr__(self, key, value): + """If key is an item attribute (i.e., a column in the database), + sets the record entry for that key to value. Note that to change + the attribute in the database or in the file's tags, one must + call store() or write(). + + Otherwise, performs an ordinary setattr. + """ + # Encode unicode paths and read buffers. + if key == 'path': + if isinstance(value, unicode): + value = bytestring_path(value) + elif isinstance(value, buffer): + value = str(value) + + if key in ITEM_KEYS: + if (not (key in self.record)) or (self.record[key] != value): + # don't dirty if value unchanged + self.record[key] = value + self.dirty[key] = True + else: + super(Item, self).__setattr__(key, value) + + + # Interaction with file metadata. + + def read(self, read_path=None): + """Read the metadata from the associated file. If read_path is + specified, read metadata from that file instead. + """ + if read_path is None: + read_path = self.path + else: + read_path = normpath(read_path) + f = MediaFile(syspath(read_path)) + + for key in ITEM_KEYS_META: + setattr(self, key, getattr(f, key)) + self.path = read_path + + def write(self): + """Writes the item's metadata to the associated file. + """ + f = MediaFile(syspath(self.path)) + for key in ITEM_KEYS_WRITABLE: + setattr(f, key, getattr(self, key)) + f.save() + + + # Dealing with files themselves. + + def move(self, library, copy=False, in_album=False): + """Move the item to its designated location within the library + directory (provided by destination()). Subdirectories are + created as needed. If the operation succeeds, the item's path + field is updated to reflect the new location. + + If copy is True, moving the file is copied rather than moved. + + If in_album is True, then the track is treated as part of an + album even if it does not yet have an album_id associated with + it. (This allows items to be moved before they are added to the + database, a performance optimization.) + + Passes on appropriate exceptions if directories cannot be created + or moving/copying fails. + + Note that one should almost certainly call store() and + library.save() after this method in order to keep on-disk data + consistent. + """ + dest = library.destination(self, in_album=in_album) + + # Create necessary ancestry for the move. + util.mkdirall(dest) + + if not shutil._samefile(syspath(self.path), syspath(dest)): + if copy: + # copyfile rather than copy will not copy permissions + # bits, thus possibly making the copy writable even when + # the original is read-only. + shutil.copyfile(syspath(self.path), syspath(dest)) + else: + shutil.move(syspath(self.path), syspath(dest)) + + # Either copying or moving succeeded, so update the stored path. + self.path = dest + + +# Library queries. + +class Query(object): + """An abstract class representing a query into the item database. + """ + def clause(self): + """Returns (clause, subvals) where clause is a valid sqlite + WHERE clause implementing the query and subvals is a list of + items to be substituted for ?s in the clause. + """ + raise NotImplementedError + + def match(self, item): + """Check whether this query matches a given Item. Can be used to + perform queries on arbitrary sets of Items. + """ + raise NotImplementedError + + def statement(self, columns='*'): + """Returns (query, subvals) where clause is a sqlite SELECT + statement to enact this query and subvals is a list of values + to substitute in for ?s in the query. + """ + clause, subvals = self.clause() + return ('SELECT ' + columns + ' FROM items WHERE ' + clause, subvals) + + def count(self, library): + """Returns `(num, length)` where `num` is the number of items in + the library matching this query and `length` is their total + length in seconds. + """ + clause, subvals = self.clause() + statement = 'SELECT COUNT(id), SUM(length) FROM items WHERE ' + clause + c = library.conn.execute(statement, subvals) + result = c.fetchone() + c.close() + return (result[0], result[1] or 0.0) + + def execute(self, library): + """Runs the query in the specified library, returning a + ResultIterator. + """ + c = library.conn.cursor() + stmt, subs = self.statement() + log.debug('Executing query: %s' % stmt) + c.execute(stmt, subs) + return ResultIterator(c, library) + +class FieldQuery(Query): + """An abstract query that searches in a specific field for a + pattern. + """ + def __init__(self, field, pattern): + if field not in ITEM_KEYS: + raise InvalidFieldError(field + ' is not an item key') + self.field = field + self.pattern = pattern + +class MatchQuery(FieldQuery): + """A query that looks for exact matches in an item field.""" + def clause(self): + pattern = self.pattern + if self.field == 'path' and isinstance(pattern, str): + pattern = buffer(pattern) + return self.field + " = ?", [pattern] + + def match(self, item): + return self.pattern == getattr(item, self.field) + +class SubstringQuery(FieldQuery): + """A query that matches a substring in a specific item field.""" + def clause(self): + search = '%' + (self.pattern.replace('\\','\\\\').replace('%','\\%') + .replace('_','\\_')) + '%' + clause = self.field + " like ? escape '\\'" + subvals = [search] + return clause, subvals + + def match(self, item): + return self.pattern.lower() in getattr(item, self.field).lower() + +class BooleanQuery(MatchQuery): + """Matches a boolean field. Pattern should either be a boolean or a + string reflecting a boolean. + """ + def __init__(self, field, pattern): + super(BooleanQuery, self).__init__(field, pattern) + if isinstance(pattern, basestring): + self.pattern = util.str2bool(pattern) + self.pattern = int(self.pattern) + +class SingletonQuery(Query): + """Matches either singleton or non-singleton items.""" + def __init__(self, sense): + self.sense = sense + + def clause(self): + if self.sense: + return "album_id ISNULL", () + else: + return "NOT album_id ISNULL", () + + def match(self, item): + return (not item.album_id) == self.sense + +class CollectionQuery(Query): + """An abstract query class that aggregates other queries. Can be + indexed like a list to access the sub-queries. + """ + def __init__(self, subqueries = ()): + self.subqueries = subqueries + + # is there a better way to do this? + def __len__(self): return len(self.subqueries) + def __getitem__(self, key): return self.subqueries[key] + def __iter__(self): iter(self.subqueries) + def __contains__(self, item): item in self.subqueries + + def clause_with_joiner(self, joiner): + """Returns a clause created by joining together the clauses of + all subqueries with the string joiner (padded by spaces). + """ + clause_parts = [] + subvals = [] + for subq in self.subqueries: + subq_clause, subq_subvals = subq.clause() + clause_parts.append('(' + subq_clause + ')') + subvals += subq_subvals + clause = (' ' + joiner + ' ').join(clause_parts) + return clause, subvals + + # regular expression for _parse_query, below + _pq_regex = re.compile(r'(?:^|(?<=\s))' # zero-width match for whitespace + # or beginning of string + + # non-grouping optional segment for the keyword + r'(?:' + r'(\S+?)' # the keyword + r'(?>> f = MediaFile('Lucy.mp3') + >>> f.title + u'Lucy in the Sky with Diamonds' + >>> f.artist = 'The Beatles' + >>> f.save() + +A field will always return a reasonable value of the correct type, even +if no tag is present. If no value is available, the value will be false +(e.g., zero or the empty string). +""" +import lib.mutagen +import lib.mutagen.mp3 +import lib.mutagen.oggvorbis +import lib.mutagen.mp4 +import lib.mutagen.flac +import lib.mutagen.monkeysaudio +import datetime +import re +import base64 +import imghdr +import os +from lib.beets.util.enumeration import enum + +__all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile'] + + +# Exceptions. + +# Raised for any file MediaFile can't read. +class UnreadableFileError(IOError): + pass + +# Raised for files that don't seem to have a type MediaFile supports. +class FileTypeError(UnreadableFileError): + pass + + +# Constants. + +# Human-readable type names. +TYPES = { + 'mp3': 'MP3', + 'mp4': 'AAC', + 'ogg': 'OGG', + 'flac': 'FLAC', + 'ape': 'APE', + 'wv': 'WavPack', + 'mpc': 'Musepack', +} + + +# Utility. + +def _safe_cast(out_type, val): + """Tries to covert val to out_type but will never raise an + exception. If the value can't be converted, then a sensible + default value is returned. out_type should be bool, int, or + unicode; otherwise, the value is just passed through. + """ + if out_type == int: + if val is None: + return 0 + elif isinstance(val, int) or isinstance(val, float): + # Just a number. + return int(val) + else: + # Process any other type as a string. + if not isinstance(val, basestring): + val = unicode(val) + # Get a number from the front of the string. + val = re.match('[0-9]*', val.strip()).group(0) + if not val: + return 0 + else: + return int(val) + + elif out_type == bool: + if val is None: + return False + else: + try: + # Should work for strings, bools, ints: + return bool(int(val)) + except ValueError: + return False + + elif out_type == unicode: + if val is None: + return u'' + else: + return unicode(val) + + else: + return val + + +# Flags for encoding field behavior. + +# Determine style of packing, if any. +packing = enum('SLASHED', # pair delimited by / + 'TUPLE', # a python tuple of 2 items + 'DATE', # YYYY-MM-DD + name='packing') + +class StorageStyle(object): + """Parameterizes the storage behavior of a single field for a + certain tag format. + - key: The Mutagen key used to access the field's data. + - list_elem: Store item as a single object or as first element + of a list. + - as_type: Which type the value is stored as (unicode, int, + bool, or str). + - packing: If this value is packed in a multiple-value storage + unit, which type of packing (in the packing enum). Otherwise, + None. (Makes as_type irrelevant). + - pack_pos: If the value is packed, in which position it is + stored. + - ID3 storage only: match against this 'desc' field as well + as the key. + """ + def __init__(self, key, list_elem = True, as_type = unicode, + packing = None, pack_pos = 0, id3_desc = None, + id3_frame_field = 'text'): + self.key = key + self.list_elem = list_elem + self.as_type = as_type + self.packing = packing + self.pack_pos = pack_pos + self.id3_desc = id3_desc + self.id3_frame_field = id3_frame_field + + +# Dealing with packings. + +class Packed(object): + """Makes a packed list of values subscriptable. To access the packed + output after making changes, use packed_thing.items. + """ + + def __init__(self, items, packstyle, none_val=0, out_type=int): + """Create a Packed object for subscripting the packed values in + items. The items are packed using packstyle, which is a value + from the packing enum. none_val is returned from a request when + no suitable value is found in the items. Vales are converted to + out_type before they are returned. + """ + self.items = items + self.packstyle = packstyle + self.none_val = none_val + self.out_type = out_type + + def __getitem__(self, index): + if not isinstance(index, int): + raise TypeError('index must be an integer') + + if self.items is None: + return self.none_val + + items = self.items + if self.packstyle == packing.DATE: + # Remove time information from dates. Usually delimited by + # a "T" or a space. + items = re.sub(r'[Tt ].*$', '', unicode(items)) + + # transform from a string packing into a list we can index into + if self.packstyle == packing.SLASHED: + seq = unicode(items).split('/') + elif self.packstyle == packing.DATE: + seq = unicode(items).split('-') + elif self.packstyle == packing.TUPLE: + seq = items # tuple: items is already indexable + + try: + out = seq[index] + except: + out = None + + if out is None or out == self.none_val or out == '': + return _safe_cast(self.out_type, self.none_val) + else: + return _safe_cast(self.out_type, out) + + def __setitem__(self, index, value): + if self.packstyle in (packing.SLASHED, packing.TUPLE): + # SLASHED and TUPLE are always two-item packings + length = 2 + else: + # DATE can have up to three fields + length = 3 + + # make a list of the items we'll pack + new_items = [] + for i in range(length): + if i == index: + next_item = value + else: + next_item = self[i] + new_items.append(next_item) + + if self.packstyle == packing.DATE: + # Truncate the items wherever we reach an invalid (none) + # entry. This prevents dates like 2008-00-05. + for i, item in enumerate(new_items): + if item == self.none_val or item is None: + del(new_items[i:]) # truncate + break + + if self.packstyle == packing.SLASHED: + self.items = '/'.join(map(unicode, new_items)) + elif self.packstyle == packing.DATE: + field_lengths = [4, 2, 2] # YYYY-MM-DD + elems = [] + for i, item in enumerate(new_items): + elems.append( ('%0' + str(field_lengths[i]) + 'i') % item ) + self.items = '-'.join(elems) + elif self.packstyle == packing.TUPLE: + self.items = new_items + + +# The field itself. + +class MediaField(object): + """A descriptor providing access to a particular (abstract) metadata + field. out_type is the type that users of MediaFile should see and + can be unicode, int, or bool. id3, mp4, and flac are StorageStyle + instances parameterizing the field's storage for each type. + """ + + def __init__(self, out_type = unicode, **kwargs): + """Creates a new MediaField. + - out_type: The field's semantic (exterior) type. + - kwargs: A hash whose keys are 'mp3', 'mp4', and 'etc' + and whose values are StorageStyle instances + parameterizing the field's storage for each type. + """ + self.out_type = out_type + if not set(['mp3', 'mp4', 'etc']) == set(kwargs): + raise TypeError('MediaField constructor must have keyword ' + 'arguments mp3, mp4, and etc') + self.styles = kwargs + + def _fetchdata(self, obj, style): + """Get the value associated with this descriptor's field stored + with the given StorageStyle. Unwraps from a list if necessary. + """ + # fetch the value, which may be a scalar or a list + if obj.type == 'mp3': + if style.id3_desc is not None: # also match on 'desc' field + frames = obj.mgfile.tags.getall(style.key) + entry = None + for frame in frames: + if frame.desc == style.id3_desc: + entry = getattr(frame, style.id3_frame_field) + break + if entry is None: # no desc match + return None + else: + # Get the metadata frame object. + try: + frame = obj.mgfile[style.key] + except KeyError: + return None + + entry = getattr(frame, style.id3_frame_field) + + else: # Not MP3. + try: + entry = obj.mgfile[style.key] + except KeyError: + return None + + # possibly index the list + if style.list_elem: + if entry: # List must have at least one value. + return entry[0] + else: + return None + else: + return entry + + def _storedata(self, obj, val, style): + """Store val for this descriptor's field in the tag dictionary + according to the provided StorageStyle. Store it as a + single-item list if necessary. + """ + # wrap as a list if necessary + if style.list_elem: out = [val] + else: out = val + + if obj.type == 'mp3': + # Try to match on "desc" field. + if style.id3_desc is not None: + frames = obj.mgfile.tags.getall(style.key) + + # try modifying in place + found = False + for frame in frames: + if frame.desc == style.id3_desc: + setattr(frame, style.id3_frame_field, out) + found = True + break + + # need to make a new frame? + if not found: + assert isinstance(style.id3_frame_field, str) # Keyword. + frame = lib.mutagen.id3.Frames[style.key]( + encoding=3, + desc=style.id3_desc, + **{style.id3_frame_field: val} + ) + obj.mgfile.tags.add(frame) + + # Try to match on "owner" field. + elif style.key.startswith('UFID:'): + owner = style.key.split(':', 1)[1] + frames = obj.mgfile.tags.getall(style.key) + + for frame in frames: + # Replace existing frame data. + if frame.owner == owner: + setattr(frame, style.id3_frame_field, val) + else: + # New frame. + assert isinstance(style.id3_frame_field, str) # Keyword. + frame = lib.mutagen.id3.UFID(owner=owner, + **{style.id3_frame_field: val}) + obj.mgfile.tags.setall('UFID', [frame]) + + # Just replace based on key. + else: + assert isinstance(style.id3_frame_field, str) # Keyword. + frame = lib.mutagen.id3.Frames[style.key](encoding = 3, + **{style.id3_frame_field: val}) + obj.mgfile.tags.setall(style.key, [frame]) + + else: # Not MP3. + obj.mgfile[style.key] = out + + def _styles(self, obj): + if obj.type in ('mp3', 'mp4'): + styles = self.styles[obj.type] + else: + styles = self.styles['etc'] # sane styles + + # Make sure we always return a list of styles, even when given + # a single style for convenience. + if isinstance(styles, StorageStyle): + return [styles] + else: + return styles + + def __get__(self, obj, owner): + """Retrieve the value of this metadata field. + """ + # Fetch the data using the various StorageStyles. + styles = self._styles(obj) + for style in styles: + # Use the first style that returns a reasonable value. + out = self._fetchdata(obj, style) + if out: + break + + if style.packing: + out = Packed(out, style.packing)[style.pack_pos] + + return _safe_cast(self.out_type, out) + + def __set__(self, obj, val): + """Set the value of this metadata field. + """ + # Store using every StorageStyle available. + styles = self._styles(obj) + for style in styles: + + if style.packing: + p = Packed(self._fetchdata(obj, style), style.packing) + p[style.pack_pos] = val + out = p.items + + else: # unicode, integer, or boolean scalar + out = val + + # deal with Nones according to abstract type if present + if out is None: + if self.out_type == int: + out = 0 + elif self.out_type == bool: + out = False + elif self.out_type == unicode: + out = u'' + # We trust that packed values are handled above. + + # convert to correct storage type (irrelevant for + # packed values) + if style.as_type == unicode: + if out is None: + out = u'' + else: + if self.out_type == bool: + # store bools as 1,0 instead of True,False + out = unicode(int(out)) + else: + out = unicode(out) + elif style.as_type == int: + if out is None: + out = 0 + else: + out = int(out) + elif style.as_type in (bool, str): + out = style.as_type(out) + + # store the data + self._storedata(obj, out, style) + +class CompositeDateField(object): + """A MediaFile field for conveniently accessing the year, month, and + day fields as a datetime.date object. Allows both getting and + setting of the component fields. + """ + def __init__(self, year_field, month_field, day_field): + """Create a new date field from the indicated MediaFields for + the component values. + """ + self.year_field = year_field + self.month_field = month_field + self.day_field = day_field + + def __get__(self, obj, owner): + """Return a datetime.date object whose components indicating the + smallest valid date whose components are at least as large as + the three component fields (that is, if year == 1999, month == 0, + and day == 0, then date == datetime.date(1999, 1, 1)). If the + components indicate an invalid date (e.g., if month == 47), + datetime.date.min is returned. + """ + try: + return datetime.date( + max(self.year_field.__get__(obj, owner), datetime.MINYEAR), + max(self.month_field.__get__(obj, owner), 1), + max(self.day_field.__get__(obj, owner), 1) + ) + except ValueError: # Out of range values. + return datetime.date.min + + def __set__(self, obj, val): + """Set the year, month, and day fields to match the components of + the provided datetime.date object. + """ + self.year_field.__set__(obj, val.year) + self.month_field.__set__(obj, val.month) + self.day_field.__set__(obj, val.day) + +class ImageField(object): + """A descriptor providing access to a file's embedded album art. + Holds a bytestring reflecting the image data. The image should + either be a JPEG or a PNG for cross-format compatibility. It's + probably a bad idea to use anything but these two formats. + """ + @classmethod + def _mime(cls, data): + """Return the MIME type (either image/png or image/jpeg) of the + image data (a bytestring). + """ + kind = imghdr.what(None, h=data) + if kind == 'png': + return 'image/png' + else: + # Currently just fall back to JPEG. + return 'image/jpeg' + + @classmethod + def _mp4kind(cls, data): + """Return the MPEG-4 image type code of the data. If the image + is not a PNG or JPEG, JPEG is assumed. + """ + kind = imghdr.what(None, h=data) + if kind == 'png': + return lib.mutagen.mp4.MP4Cover.FORMAT_PNG + else: + return lib.mutagen.mp4.MP4Cover.FORMAT_JPEG + + def __get__(self, obj, owner): + if obj.type == 'mp3': + # Look for APIC frames. + for frame in obj.mgfile.tags.values(): + if frame.FrameID == 'APIC': + picframe = frame + break + else: + # No APIC frame. + return None + + return picframe.data + + elif obj.type == 'mp4': + if 'covr' in obj.mgfile: + covers = obj.mgfile['covr'] + if covers: + cover = covers[0] + # cover is an MP4Cover, which is a subclass of str. + return cover + + # No cover found. + return None + + else: + # Here we're assuming everything but MP3 and MPEG-4 uses + # the Xiph/Vorbis Comments standard. This may not be valid. + # http://wiki.xiph.org/VorbisComment#Cover_art + + if 'metadata_block_picture' not in obj.mgfile: + # Try legacy COVERART tags. + if 'coverart' in obj.mgfile and obj.mgfile['coverart']: + return base64.b64decode(obj.mgfile['coverart'][0]) + return None + + for data in obj.mgfile["metadata_block_picture"]: + try: + pic = lib.mutagen.flac.Picture(base64.b64decode(data)) + break + except TypeError: + pass + else: + return None + + return pic.data + + def __set__(self, obj, val): + if val is not None: + if not isinstance(val, str): + raise ValueError('value must be a byte string or None') + + if obj.type == 'mp3': + # Clear all APIC frames. + obj.mgfile.tags.delall('APIC') + if val is None: + # If we're clearing the image, we're done. + return + + picframe = lib.mutagen.id3.APIC( + encoding = 3, + mime = self._mime(val), + type = 3, # front cover + desc = u'', + data = val, + ) + obj.mgfile['APIC'] = picframe + + elif obj.type == 'mp4': + if val is None: + if 'covr' in obj.mgfile: + del obj.mgfile['covr'] + else: + cover = lib.mutagen.mp4.MP4Cover(val, self._mp4kind(val)) + obj.mgfile['covr'] = [cover] + + else: + # Again, assuming Vorbis Comments standard. + + # Strip all art, including legacy COVERART. + if 'metadata_block_picture' in obj.mgfile: + if 'metadata_block_picture' in obj.mgfile: + del obj.mgfile['metadata_block_picture'] + if 'coverart' in obj.mgfile: + del obj.mgfile['coverart'] + if 'coverartmime' in obj.mgfile: + del obj.mgfile['coverartmime'] + + # Add new art if provided. + if val is not None: + pic = lib.mutagen.flac.Picture() + pic.data = val + pic.mime = self._mime(val) + obj.mgfile['metadata_block_picture'] = [ + base64.b64encode(pic.write()) + ] + + +# The file (a collection of fields). + +class MediaFile(object): + """Represents a multimedia file on disk and provides access to its + metadata. + """ + + def __init__(self, path): + """Constructs a new MediaFile reflecting the file at path. May + throw UnreadableFileError. + """ + self.path = path + + unreadable_exc = ( + lib.mutagen.mp3.HeaderNotFoundError, + lib.mutagen.flac.FLACNoHeaderError, + lib.mutagen.monkeysaudio.MonkeysAudioHeaderError, + lib.mutagen.mp4.MP4StreamInfoError, + lib.mutagen.oggvorbis.OggVorbisHeaderError, + ) + try: + self.mgfile = lib.mutagen.File(path) + except unreadable_exc: + raise UnreadableFileError('Mutagen could not read file') + except IOError: + raise UnreadableFileError('could not read file') + + if self.mgfile is None: # Mutagen couldn't guess the type + raise FileTypeError('file type unsupported by Mutagen') + elif type(self.mgfile).__name__ == 'M4A' or \ + type(self.mgfile).__name__ == 'MP4': + self.type = 'mp4' + elif type(self.mgfile).__name__ == 'ID3' or \ + type(self.mgfile).__name__ == 'MP3': + self.type = 'mp3' + elif type(self.mgfile).__name__ == 'FLAC': + self.type = 'flac' + elif type(self.mgfile).__name__ == 'OggVorbis': + self.type = 'ogg' + elif type(self.mgfile).__name__ == 'MonkeysAudio': + self.type = 'ape' + elif type(self.mgfile).__name__ == 'WavPack': + self.type = 'wv' + elif type(self.mgfile).__name__ == 'Musepack': + self.type = 'mpc' + else: + raise FileTypeError('file type %s unsupported by MediaFile' % + type(self.mgfile).__name__) + + # add a set of tags if it's missing + if self.mgfile.tags is None: + self.mgfile.add_tags() + + def save(self): + self.mgfile.save() + + + #### field definitions #### + + title = MediaField( + mp3 = StorageStyle('TIT2'), + mp4 = StorageStyle("\xa9nam"), + etc = StorageStyle('title'), + ) + artist = MediaField( + mp3 = StorageStyle('TPE1'), + mp4 = StorageStyle("\xa9ART"), + etc = StorageStyle('artist'), + ) + album = MediaField( + mp3 = StorageStyle('TALB'), + mp4 = StorageStyle("\xa9alb"), + etc = StorageStyle('album'), + ) + genre = MediaField( + mp3 = StorageStyle('TCON'), + mp4 = StorageStyle("\xa9gen"), + etc = StorageStyle('genre'), + ) + composer = MediaField( + mp3 = StorageStyle('TCOM'), + mp4 = StorageStyle("\xa9wrt"), + etc = StorageStyle('composer'), + ) + grouping = MediaField( + mp3 = StorageStyle('TIT1'), + mp4 = StorageStyle("\xa9grp"), + etc = StorageStyle('grouping'), + ) + year = MediaField(out_type=int, + mp3 = StorageStyle('TDRC', + packing = packing.DATE, + pack_pos = 0), + mp4 = StorageStyle("\xa9day", + packing = packing.DATE, + pack_pos = 0), + etc = [StorageStyle('date', + packing = packing.DATE, + pack_pos = 0), + StorageStyle('year')] + ) + month = MediaField(out_type=int, + mp3 = StorageStyle('TDRC', + packing = packing.DATE, + pack_pos = 1), + mp4 = StorageStyle("\xa9day", + packing = packing.DATE, + pack_pos = 1), + etc = StorageStyle('date', + packing = packing.DATE, + pack_pos = 1) + ) + day = MediaField(out_type=int, + mp3 = StorageStyle('TDRC', + packing = packing.DATE, + pack_pos = 2), + mp4 = StorageStyle("\xa9day", + packing = packing.DATE, + pack_pos = 2), + etc = StorageStyle('date', + packing = packing.DATE, + pack_pos = 2) + ) + date = CompositeDateField(year, month, day) + track = MediaField(out_type = int, + mp3 = StorageStyle('TRCK', + packing = packing.SLASHED, + pack_pos = 0), + mp4 = StorageStyle('trkn', + packing = packing.TUPLE, + pack_pos = 0), + etc = [StorageStyle('track'), + StorageStyle('tracknumber')] + ) + tracktotal = MediaField(out_type = int, + mp3 = StorageStyle('TRCK', + packing = packing.SLASHED, + pack_pos = 1), + mp4 = StorageStyle('trkn', + packing = packing.TUPLE, + pack_pos = 1), + etc = [StorageStyle('tracktotal'), + StorageStyle('trackc'), + StorageStyle('totaltracks')] + ) + disc = MediaField(out_type = int, + mp3 = StorageStyle('TPOS', + packing = packing.SLASHED, + pack_pos = 0), + mp4 = StorageStyle('disk', + packing = packing.TUPLE, + pack_pos = 0), + etc = [StorageStyle('disc'), + StorageStyle('discnumber')] + ) + disctotal = MediaField(out_type = int, + mp3 = StorageStyle('TPOS', + packing = packing.SLASHED, + pack_pos = 1), + mp4 = StorageStyle('disk', + packing = packing.TUPLE, + pack_pos = 1), + etc = [StorageStyle('disctotal'), + StorageStyle('discc'), + StorageStyle('totaldiscs')] + ) + lyrics = MediaField( + mp3 = StorageStyle('USLT', + list_elem = False, + id3_desc = u''), + mp4 = StorageStyle("\xa9lyr"), + etc = StorageStyle('lyrics') + ) + comments = MediaField( + mp3 = StorageStyle('COMM', id3_desc = u''), + mp4 = StorageStyle("\xa9cmt"), + etc = [StorageStyle('description'), + StorageStyle('comment')] + ) + bpm = MediaField(out_type = int, + mp3 = StorageStyle('TBPM'), + mp4 = StorageStyle('tmpo', as_type = int), + etc = StorageStyle('bpm') + ) + comp = MediaField(out_type = bool, + mp3 = StorageStyle('TCMP'), + mp4 = StorageStyle('cpil', + list_elem = False, + as_type = bool), + etc = StorageStyle('compilation') + ) + albumartist = MediaField( + mp3 = StorageStyle('TXXX', id3_desc=u'Album Artist'), + mp4 = StorageStyle( + '----:com.apple.iTunes:Album Artist', + as_type=str), + etc = [StorageStyle('album artist'), + StorageStyle('albumartist')] + ) + albumtype = MediaField( + mp3 = StorageStyle('TXXX', id3_desc=u'MusicBrainz Album Type'), + mp4 = StorageStyle( + '----:com.apple.iTunes:MusicBrainz Album Type', + as_type=str), + etc = StorageStyle('musicbrainz_albumtype') + ) + + # Album art. + art = ImageField() + + # MusicBrainz IDs. + mb_trackid = MediaField( + mp3 = StorageStyle('UFID:http://musicbrainz.org', + list_elem = False, + id3_frame_field = 'data'), + mp4 = StorageStyle( + '----:com.apple.iTunes:MusicBrainz Track Id', + as_type=str), + etc = StorageStyle('musicbrainz_trackid') + ) + mb_albumid = MediaField( + mp3 = StorageStyle('TXXX', id3_desc=u'MusicBrainz Album Id'), + mp4 = StorageStyle( + '----:com.apple.iTunes:MusicBrainz Album Id', + as_type=str), + etc = StorageStyle('musicbrainz_albumid') + ) + mb_artistid = MediaField( + mp3 = StorageStyle('TXXX', id3_desc=u'MusicBrainz Artist Id'), + mp4 = StorageStyle( + '----:com.apple.iTunes:MusicBrainz Artist Id', + as_type=str), + etc = StorageStyle('musicbrainz_artistid') + ) + mb_albumartistid = MediaField( + mp3 = StorageStyle('TXXX', + id3_desc=u'MusicBrainz Album Artist Id'), + mp4 = StorageStyle( + '----:com.apple.iTunes:MusicBrainz Album Artist Id', + as_type=str), + etc = StorageStyle('musicbrainz_albumartistid') + ) + + @property + def length(self): + return self.mgfile.info.length + + @property + def bitrate(self): + if hasattr(self.mgfile.info, 'bitrate'): + # Many formats provide it explicitly. + return self.mgfile.info.bitrate + else: + # Otherwise, we calculate bitrate from the file size. (This + # is the case for all of the lossless formats.) + size = os.path.getsize(self.path) + return int(size * 8 / self.length) + + @property + def format(self): + return TYPES[self.type] diff --git a/lib/beets/plugins.py b/lib/beets/plugins.py new file mode 100755 index 00000000..b7982df8 --- /dev/null +++ b/lib/beets/plugins.py @@ -0,0 +1,223 @@ +# This file is part of beets. +# Copyright 2010, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Support for beets plugins.""" + +import logging +import itertools +import traceback +from collections import defaultdict + +PLUGIN_NAMESPACE = 'beetsplug' +DEFAULT_PLUGINS = [] + +# Global logger. +log = logging.getLogger('beets') + + +# Managing the plugins themselves. + +class BeetsPlugin(object): + """The base class for all beets plugins. Plugins provide + functionality by defining a subclass of BeetsPlugin and overriding + the abstract methods defined here. + """ + def commands(self): + """Should return a list of beets.ui.Subcommand objects for + commands that should be added to beets' CLI. + """ + return () + + def track_distance(self, item, info): + """Should return a (distance, distance_max) pair to be added + to the distance value for every track comparison. + """ + return 0.0, 0.0 + + def album_distance(self, items, info): + """Should return a (distance, distance_max) pair to be added + to the distance value for every album-level comparison. + """ + return 0.0, 0.0 + + def candidates(self, items): + """Should return a sequence of MusicBrainz info dictionaries + that match the album whose items are provided. + """ + return () + + def item_candidates(self, item): + """Should return a sequence of MusicBrainz track info + dictionaries that match the item provided. + """ + return () + + def configure(self, config): + """This method is called with the ConfigParser object after + the CLI starts up. + """ + pass + + listeners = None + + @classmethod + def register_listener(cls, event, func): + """Add a function as a listener for the specified event. (An + imperative alternative to the @listen decorator.) + """ + if cls.listeners is None: + cls.listeners = defaultdict(list) + cls.listeners[event].append(func) + + @classmethod + def listen(cls, event): + """Decorator that adds a function as an event handler for the + specified event (as a string). The parameters passed to function + will vary depending on what event occurred. + + The function should respond to named parameters. + function(**kwargs) will trap all arguments in a dictionary. + Example: + + >>> @MyPlugin.listen("imported") + >>> def importListener(**kwargs): + >>> pass + """ + def helper(func): + if cls.listeners is None: + cls.listeners = defaultdict(list) + cls.listeners[event].append(func) + return func + return helper + +def load_plugins(names=()): + """Imports the modules for a sequence of plugin names. Each name + must be the name of a Python module under the "beetsplug" namespace + package in sys.path; the module indicated should contain the + BeetsPlugin subclasses desired. A default set of plugins is also + loaded. + """ + for name in itertools.chain(names, DEFAULT_PLUGINS): + modname = '%s.%s' % (PLUGIN_NAMESPACE, name) + try: + try: + __import__(modname, None, None) + except ImportError, exc: + # Again, this is hacky: + if exc.args[0].endswith(' ' + name): + log.warn('** plugin %s not found' % name) + else: + raise + except: + log.warn('** error loading plugin %s' % name) + log.warn(traceback.format_exc()) + +_instances = {} +def find_plugins(): + """Returns a list of BeetsPlugin subclass instances from all + currently loaded beets plugins. Loads the default plugin set + first. + """ + load_plugins() + plugins = [] + for cls in BeetsPlugin.__subclasses__(): + # Only instantiate each plugin class once. + if cls not in _instances: + _instances[cls] = cls() + plugins.append(_instances[cls]) + return plugins + + +# Communication with plugins. + +def commands(): + """Returns a list of Subcommand objects from all loaded plugins. + """ + out = [] + for plugin in find_plugins(): + out += plugin.commands() + return out + +def track_distance(item, info): + """Gets the track distance calculated by all loaded plugins. + Returns a (distance, distance_max) pair. + """ + dist = 0.0 + dist_max = 0.0 + for plugin in find_plugins(): + d, dm = plugin.track_distance(item, info) + dist += d + dist_max += dm + return dist, dist_max + +def album_distance(items, info): + """Returns the album distance calculated by plugins.""" + dist = 0.0 + dist_max = 0.0 + for plugin in find_plugins(): + d, dm = plugin.album_distance(items, info) + dist += d + dist_max += dm + return dist, dist_max + +def candidates(items): + """Gets MusicBrainz candidates for an album from each plugin. + """ + out = [] + for plugin in find_plugins(): + out.extend(plugin.candidates(items)) + return out + +def item_candidates(item): + """Gets MusicBrainz candidates for an item from the plugins. + """ + out = [] + for plugin in find_plugins(): + out.extend(plugin.item_candidates(item)) + return out + +def configure(config): + """Sends the configuration object to each plugin.""" + for plugin in find_plugins(): + plugin.configure(config) + + +# Event dispatch. + +# All the handlers for the event system. +# Each key of the dictionary should contain a list of functions to be +# called for any event. Functions will be called in the order they were +# added. +_event_handlers = defaultdict(list) + +def load_listeners(): + """Loads and registers event handlers from all loaded plugins. + """ + for plugin in find_plugins(): + if plugin.listeners: + for event, handlers in plugin.listeners.items(): + _event_handlers[event] += handlers + +def send(event, **arguments): + """Sends an event to all assigned event listeners. Event is the + name of the event to send, all other named arguments go to the + event handler(s). + + Returns the number of handlers called. + """ + log.debug('Sending event: %s' % event) + handlers = _event_handlers[event] + for handler in handlers: + handler(**arguments) + return len(handlers) diff --git a/lib/beets/ui/__init__.py b/lib/beets/ui/__init__.py new file mode 100644 index 00000000..169dc182 --- /dev/null +++ b/lib/beets/ui/__init__.py @@ -0,0 +1,616 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""This module contains all of the core logic for beets' command-line +interface. To invoke the CLI, just call beets.ui.main(). The actual +CLI commands are implemented in the ui.commands module. +""" +import os +import locale +import optparse +import textwrap +import ConfigParser +import sys +from difflib import SequenceMatcher +import logging +import sqlite3 +import errno + +from lib.beets import library +from lib.beets import plugins +from lib.beets import util + +# Constants. +CONFIG_PATH_VAR = 'BEETSCONFIG' +DEFAULT_CONFIG_FILE = os.path.expanduser('~/.beetsconfig') +DEFAULT_LIBRARY = '~/.beetsmusic.blb' +DEFAULT_DIRECTORY = '~/Music' +DEFAULT_PATH_FORMATS = { + 'default': '$albumartist/$album/$track $title', + 'comp': 'Compilations/$album/$track $title', + 'singleton': 'Non-Album/$artist/$title', +} +DEFAULT_ART_FILENAME = 'cover' + + +# UI exception. Commands should throw this in order to display +# nonrecoverable errors to the user. +class UserError(Exception): + pass + + +# Utilities. + +def print_(*strings): + """Like print, but rather than raising an error when a character + is not in the terminal's encoding's character set, just silently + replaces it. + """ + if strings: + if isinstance(strings[0], unicode): + txt = u' '.join(strings) + else: + txt = ' '.join(strings) + else: + txt = u'' + if isinstance(txt, unicode): + try: + encoding = locale.getdefaultlocale()[1] or 'utf8' + except ValueError: + # Invalid locale environment variable setting. To avoid + # failing entirely for no good reason, assume UTF-8. + encoding = 'utf8' + txt = txt.encode(encoding, 'replace') + print txt + +def input_options(options, require=False, prompt=None, fallback_prompt=None, + numrange=None, default=None, color=False, max_width=72): + """Prompts a user for input. The sequence of `options` defines the + choices the user has. A single-letter shortcut is inferred for each + option; the user's choice is returned as that single, lower-case + letter. The options should be provided as lower-case strings unless + a particular shortcut is desired; in that case, only that letter + should be capitalized. + + By default, the first option is the default. If `require` is + provided, then there is no default. `default` can be provided to + override this. The prompt and fallback prompt are also inferred but + can be overridden. + + If numrange is provided, it is a pair of `(high, low)` (both ints) + indicating that, in addition to `options`, the user may enter an + integer in that inclusive range. + + `max_width` specifies the maximum number of columns in the + automatically generated prompt string. + """ + # Assign single letters to each option. Also capitalize the options + # to indicate the letter. + letters = {} + display_letters = [] + capitalized = [] + first = True + for option in options: + # Is a letter already capitalized? + for letter in option: + if letter.isalpha() and letter.upper() == letter: + found_letter = letter + break + else: + # Infer a letter. + for letter in option: + if not letter.isalpha(): + continue # Don't use punctuation. + if letter not in letters: + found_letter = letter + break + else: + raise ValueError('no unambiguous lettering found') + + letters[found_letter.lower()] = option + index = option.index(found_letter) + + # Mark the option's shortcut letter for display. + if (default is None and not numrange and first) \ + or (isinstance(default, basestring) and + found_letter.lower() == default.lower()): + # The first option is the default; mark it. + show_letter = '[%s]' % found_letter.upper() + is_default = True + else: + show_letter = found_letter.upper() + is_default = False + + # Possibly colorize the letter shortcut. + if color: + color = 'turquoise' if is_default else 'blue' + show_letter = colorize(color, show_letter) + + # Insert the highlighted letter back into the word. + capitalized.append( + option[:index] + show_letter + option[index+1:] + ) + display_letters.append(found_letter.upper()) + + first = False + + # The default is just the first option if unspecified. + if default is None: + if require: + default = None + elif numrange: + default = numrange[0] + else: + default = display_letters[0].lower() + + # Make a prompt if one is not provided. + if not prompt: + prompt_parts = [] + prompt_part_lengths = [] + if numrange: + if isinstance(default, int): + default_name = str(default) + if color: + default_name = colorize('turquoise', default_name) + tmpl = '# selection (default %s)' + prompt_parts.append(tmpl % default_name) + prompt_part_lengths.append(len(tmpl % str(default))) + else: + prompt_parts.append('# selection') + prompt_part_lengths.append(prompt_parts[-1]) + prompt_parts += capitalized + prompt_part_lengths += [len(s) for s in options] + + # Wrap the query text. + prompt = '' + line_length = 0 + for i, (part, length) in enumerate(zip(prompt_parts, + prompt_part_lengths)): + # Add punctuation. + if i == len(prompt_parts) - 1: + part += '?' + else: + part += ',' + length += 1 + + # Choose either the current line or the beginning of the next. + if line_length + length + 1 > max_width: + prompt += '\n' + line_length = 0 + + if line_length != 0: + # Not the beginning of the line; need a space. + part = ' ' + part + length += 1 + + prompt += part + line_length += length + + # Make a fallback prompt too. This is displayed if the user enters + # something that is not recognized. + if not fallback_prompt: + fallback_prompt = 'Enter one of ' + if numrange: + fallback_prompt += '%i-%i, ' % numrange + fallback_prompt += ', '.join(display_letters) + ':' + + # (raw_input(prompt) was causing problems with colors.) + print prompt, + resp = raw_input() + while True: + resp = resp.strip().lower() + + # Try default option. + if default is not None and not resp: + resp = default + + # Try an integer input if available. + if numrange: + try: + resp = int(resp) + except ValueError: + pass + else: + low, high = numrange + if low <= resp <= high: + return resp + else: + resp = None + + # Try a normal letter input. + if resp: + resp = resp[0] + if resp in letters: + return resp + + # Prompt for new input. + print fallback_prompt, + resp = raw_input() + +def input_yn(prompt, require=False, color=False): + """Prompts the user for a "yes" or "no" response. The default is + "yes" unless `require` is `True`, in which case there is no default. + """ + sel = input_options( + ('y', 'n'), require, prompt, 'Enter Y or N:', color=color + ) + return sel == 'y' + +def make_query(criteria): + """Make query string for the list of criteria.""" + return ' '.join(criteria).strip() or None + +def config_val(config, section, name, default, vtype=None): + """Queries the configuration file for a value (given by the + section and name). If no value is present, returns default. + vtype optionally specifies the return type (although only bool + is supported for now). + """ + if not config.has_section(section): + config.add_section(section) + + try: + if vtype is bool: + return config.getboolean(section, name) + else: + return config.get(section, name) + except ConfigParser.NoOptionError: + return default + +def human_bytes(size): + """Formats size, a number of bytes, in a human-readable way.""" + suffices = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB', 'HB'] + for suffix in suffices: + if size < 1024: + return "%3.1f %s" % (size, suffix) + size /= 1024.0 + return "big" + +def human_seconds(interval): + """Formats interval, a number of seconds, as a human-readable time + interval. + """ + units = [ + (1, 'second'), + (60, 'minute'), + (60, 'hour'), + (24, 'day'), + (7, 'week'), + (52, 'year'), + (10, 'decade'), + ] + for i in range(len(units)-1): + increment, suffix = units[i] + next_increment, _ = units[i+1] + interval /= float(increment) + if interval < next_increment: + break + else: + # Last unit. + increment, suffix = units[-1] + interval /= float(increment) + + return "%3.1f %ss" % (interval, suffix) + +# ANSI terminal colorization code heavily inspired by pygments: +# http://dev.pocoo.org/hg/pygments-main/file/b2deea5b5030/pygments/console.py +# (pygments is by Tim Hatch, Armin Ronacher, et al.) +COLOR_ESCAPE = "\x1b[" +DARK_COLORS = ["black", "darkred", "darkgreen", "brown", "darkblue", + "purple", "teal", "lightgray"] +LIGHT_COLORS = ["darkgray", "red", "green", "yellow", "blue", + "fuchsia", "turquoise", "white"] +RESET_COLOR = COLOR_ESCAPE + "39;49;00m" +def colorize(color, text): + """Returns a string that prints the given text in the given color + in a terminal that is ANSI color-aware. The color must be something + in DARK_COLORS or LIGHT_COLORS. + """ + if color in DARK_COLORS: + escape = COLOR_ESCAPE + "%im" % (DARK_COLORS.index(color) + 30) + elif color in LIGHT_COLORS: + escape = COLOR_ESCAPE + "%i;01m" % (LIGHT_COLORS.index(color) + 30) + else: + raise ValueError('no such color %s', color) + return escape + text + RESET_COLOR + +def colordiff(a, b, highlight='red'): + """Given two strings, return the same pair of strings except with + their differences highlighted in the specified color. + """ + a_out = [] + b_out = [] + + matcher = SequenceMatcher(lambda x: False, a, b) + for op, a_start, a_end, b_start, b_end in matcher.get_opcodes(): + if op == 'equal': + # In both strings. + a_out.append(a[a_start:a_end]) + b_out.append(b[b_start:b_end]) + elif op == 'insert': + # Right only. + b_out.append(colorize(highlight, b[b_start:b_end])) + elif op == 'delete': + # Left only. + a_out.append(colorize(highlight, a[a_start:a_end])) + elif op == 'replace': + # Right and left differ. + a_out.append(colorize(highlight, a[a_start:a_end])) + b_out.append(colorize(highlight, b[b_start:b_end])) + else: + assert(False) + + return ''.join(a_out), ''.join(b_out) + + +# Subcommand parsing infrastructure. + +# This is a fairly generic subcommand parser for optparse. It is +# maintained externally here: +# http://gist.github.com/462717 +# There you will also find a better description of the code and a more +# succinct example program. + +class Subcommand(object): + """A subcommand of a root command-line application that may be + invoked by a SubcommandOptionParser. + """ + def __init__(self, name, parser=None, help='', aliases=()): + """Creates a new subcommand. name is the primary way to invoke + the subcommand; aliases are alternate names. parser is an + OptionParser responsible for parsing the subcommand's options. + help is a short description of the command. If no parser is + given, it defaults to a new, empty OptionParser. + """ + self.name = name + self.parser = parser or optparse.OptionParser() + self.aliases = aliases + self.help = help + +class SubcommandsOptionParser(optparse.OptionParser): + """A variant of OptionParser that parses subcommands and their + arguments. + """ + # A singleton command used to give help on other subcommands. + _HelpSubcommand = Subcommand('help', optparse.OptionParser(), + help='give detailed help on a specific sub-command', + aliases=('?',)) + + def __init__(self, *args, **kwargs): + """Create a new subcommand-aware option parser. All of the + options to OptionParser.__init__ are supported in addition + to subcommands, a sequence of Subcommand objects. + """ + # The subcommand array, with the help command included. + self.subcommands = list(kwargs.pop('subcommands', [])) + self.subcommands.append(self._HelpSubcommand) + + # A more helpful default usage. + if 'usage' not in kwargs: + kwargs['usage'] = """ + %prog COMMAND [ARGS...] + %prog help COMMAND""" + + # Super constructor. + optparse.OptionParser.__init__(self, *args, **kwargs) + + # Adjust the help-visible name of each subcommand. + for subcommand in self.subcommands: + subcommand.parser.prog = '%s %s' % \ + (self.get_prog_name(), subcommand.name) + + # Our root parser needs to stop on the first unrecognized argument. + self.disable_interspersed_args() + + def add_subcommand(self, cmd): + """Adds a Subcommand object to the parser's list of commands. + """ + self.subcommands.append(cmd) + + # Add the list of subcommands to the help message. + def format_help(self, formatter=None): + # Get the original help message, to which we will append. + out = optparse.OptionParser.format_help(self, formatter) + if formatter is None: + formatter = self.formatter + + # Subcommands header. + result = ["\n"] + result.append(formatter.format_heading('Commands')) + formatter.indent() + + # Generate the display names (including aliases). + # Also determine the help position. + disp_names = [] + help_position = 0 + for subcommand in self.subcommands: + name = subcommand.name + if subcommand.aliases: + name += ' (%s)' % ', '.join(subcommand.aliases) + disp_names.append(name) + + # Set the help position based on the max width. + proposed_help_position = len(name) + formatter.current_indent + 2 + if proposed_help_position <= formatter.max_help_position: + help_position = max(help_position, proposed_help_position) + + # Add each subcommand to the output. + for subcommand, name in zip(self.subcommands, disp_names): + # Lifted directly from optparse.py. + name_width = help_position - formatter.current_indent - 2 + if len(name) > name_width: + name = "%*s%s\n" % (formatter.current_indent, "", name) + indent_first = help_position + else: + name = "%*s%-*s " % (formatter.current_indent, "", + name_width, name) + indent_first = 0 + result.append(name) + help_width = formatter.width - help_position + help_lines = textwrap.wrap(subcommand.help, help_width) + result.append("%*s%s\n" % (indent_first, "", help_lines[0])) + result.extend(["%*s%s\n" % (help_position, "", line) + for line in help_lines[1:]]) + formatter.dedent() + + # Concatenate the original help message with the subcommand + # list. + return out + "".join(result) + + def _subcommand_for_name(self, name): + """Return the subcommand in self.subcommands matching the + given name. The name may either be the name of a subcommand or + an alias. If no subcommand matches, returns None. + """ + for subcommand in self.subcommands: + if name == subcommand.name or \ + name in subcommand.aliases: + return subcommand + return None + + def parse_args(self, a=None, v=None): + """Like OptionParser.parse_args, but returns these four items: + - options: the options passed to the root parser + - subcommand: the Subcommand object that was invoked + - suboptions: the options passed to the subcommand parser + - subargs: the positional arguments passed to the subcommand + """ + options, args = optparse.OptionParser.parse_args(self, a, v) + + if not args: + # No command given. + self.print_help() + self.exit() + else: + cmdname = args.pop(0) + subcommand = self._subcommand_for_name(cmdname) + if not subcommand: + self.error('unknown command ' + cmdname) + + suboptions, subargs = subcommand.parser.parse_args(args) + + if subcommand is self._HelpSubcommand: + if subargs: + # particular + cmdname = subargs[0] + helpcommand = self._subcommand_for_name(cmdname) + helpcommand.parser.print_help() + self.exit() + else: + # general + self.print_help() + self.exit() + + return options, subcommand, suboptions, subargs + + +# The root parser and its main function. + +def main(args=None, configfh=None): + """Run the main command-line interface for beets.""" + # Get the default subcommands. + from beets.ui.commands import default_commands + + # Read defaults from config file. + config = ConfigParser.SafeConfigParser() + if configfh: + configpath = None + elif CONFIG_PATH_VAR in os.environ: + configpath = os.path.expanduser(os.environ[CONFIG_PATH_VAR]) + else: + configpath = DEFAULT_CONFIG_FILE + if configpath: + configpath = util.syspath(configpath) + if os.path.exists(util.syspath(configpath)): + configfh = open(configpath) + else: + configfh = None + if configfh: + config.readfp(configfh) + + # Add plugin paths. + plugpaths = config_val(config, 'beets', 'pluginpath', '') + for plugpath in plugpaths.split(':'): + sys.path.append(os.path.expanduser(plugpath)) + # Load requested plugins. + plugnames = config_val(config, 'beets', 'plugins', '') + plugins.load_plugins(plugnames.split()) + plugins.load_listeners() + plugins.send("pluginload") + plugins.configure(config) + + # Construct the root parser. + commands = list(default_commands) + commands += plugins.commands() + parser = SubcommandsOptionParser(subcommands=commands) + parser.add_option('-l', '--library', dest='libpath', + help='library database file to use') + parser.add_option('-d', '--directory', dest='directory', + help="destination music directory") + parser.add_option('-p', '--pathformat', dest='path_format', + help="destination path format string") + parser.add_option('-v', '--verbose', dest='verbose', action='store_true', + help='print debugging information') + + # Parse the command-line! + options, subcommand, suboptions, subargs = parser.parse_args(args) + + # Open library file. + libpath = options.libpath or \ + config_val(config, 'beets', 'library', DEFAULT_LIBRARY) + directory = options.directory or \ + config_val(config, 'beets', 'directory', DEFAULT_DIRECTORY) + legacy_path_format = config_val(config, 'beets', 'path_format', None) + if options.path_format: + # If given, -p overrides all path format settings + path_formats = {'default': options.path_format} + else: + if legacy_path_format: + # Old path formats override the default values. + path_formats = {'default': legacy_path_format} + else: + # If no legacy path format, use the defaults instead. + path_formats = DEFAULT_PATH_FORMATS + if config.has_section('paths'): + path_formats.update(config.items('paths')) + art_filename = \ + config_val(config, 'beets', 'art_filename', DEFAULT_ART_FILENAME) + db_path = os.path.expanduser(libpath) + try: + lib = library.Library(db_path, + directory, + path_formats, + art_filename) + except sqlite3.OperationalError: + raise UserError("database file %s could not be opened" % db_path) + + # Configure the logger. + log = logging.getLogger('beets') + if options.verbose: + log.setLevel(logging.DEBUG) + else: + log.setLevel(logging.INFO) + + # Invoke the subcommand. + try: + subcommand.func(lib, config, suboptions, subargs) + except UserError, exc: + message = exc.args[0] if exc.args else None + subcommand.parser.error(message) + except IOError, exc: + if exc.errno == errno.EPIPE: + # "Broken pipe". End silently. + pass + else: + raise diff --git a/lib/beets/ui/commands.py b/lib/beets/ui/commands.py new file mode 100755 index 00000000..c2e0aab4 --- /dev/null +++ b/lib/beets/ui/commands.py @@ -0,0 +1,722 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""This module provides the default commands for beets' command-line +interface. +""" +from __future__ import with_statement # Python 2.5 +import logging +import sys +import os +import time + +from lib.beets import ui +from lib.beets.ui import print_ +from lib.beets import autotag +import lib.beets.autotag.art as beets.autotag.art +from lib.beets import plugins +from lib.beets import importer +from lib.beets.util import syspath, normpath + +# Global logger. +log = logging.getLogger('beets') + +# The list of default subcommands. This is populated with Subcommand +# objects that can be fed to a SubcommandsOptionParser. +default_commands = [] + + +# import: Autotagger and importer. + +DEFAULT_IMPORT_COPY = True +DEFAULT_IMPORT_WRITE = True +DEFAULT_IMPORT_DELETE = False +DEFAULT_IMPORT_AUTOT = True +DEFAULT_IMPORT_TIMID = False +DEFAULT_IMPORT_ART = True +DEFAULT_IMPORT_QUIET = False +DEFAULT_IMPORT_QUIET_FALLBACK = 'skip' +DEFAULT_IMPORT_RESUME = None # "ask" +DEFAULT_THREADED = True +DEFAULT_COLOR = True + +VARIOUS_ARTISTS = u'Various Artists' + +# Importer utilities and support. + +def dist_string(dist, color): + """Formats a distance (a float) as a similarity percentage string. + The string is colorized if color is True. + """ + out = '%.1f%%' % ((1 - dist) * 100) + if color: + if dist <= autotag.STRONG_REC_THRESH: + out = ui.colorize('green', out) + elif dist <= autotag.MEDIUM_REC_THRESH: + out = ui.colorize('yellow', out) + else: + out = ui.colorize('red', out) + return out + +def show_change(cur_artist, cur_album, items, info, dist, color=True): + """Print out a representation of the changes that will be made if + tags are changed from (cur_artist, cur_album, items) to info with + distance dist. + """ + def show_album(artist, album): + if artist: + print_(' %s - %s' % (artist, album)) + elif album: + print_(' %s' % album) + else: + print_(' (unknown album)') + + # Identify the album in question. + if cur_artist != info['artist'] or \ + (cur_album != info['album'] and info['album'] != VARIOUS_ARTISTS): + artist_l, artist_r = cur_artist or '', info['artist'] + album_l, album_r = cur_album or '', info['album'] + if artist_r == VARIOUS_ARTISTS: + # Hide artists for VA releases. + artist_l, artist_r = u'', u'' + + if color: + artist_l, artist_r = ui.colordiff(artist_l, artist_r) + album_l, album_r = ui.colordiff(album_l, album_r) + + print_("Correcting tags from:") + show_album(artist_l, album_l) + print_("To:") + show_album(artist_r, album_r) + else: + print_("Tagging: %s - %s" % (info['artist'], info['album'])) + + # Distance/similarity. + print_('(Similarity: %s)' % dist_string(dist, color)) + + # Tracks. + for i, (item, track_data) in enumerate(zip(items, info['tracks'])): + cur_track = str(item.track) + new_track = str(i+1) + cur_title = item.title + new_title = track_data['title'] + + # Possibly colorize changes. + if color: + cur_title, new_title = ui.colordiff(cur_title, new_title) + if cur_track != new_track: + cur_track = ui.colorize('red', cur_track) + new_track = ui.colorize('red', new_track) + + if cur_title != new_title and cur_track != new_track: + print_(" * %s (%s) -> %s (%s)" % ( + cur_title, cur_track, new_title, new_track + )) + elif cur_title != new_title: + print_(" * %s -> %s" % (cur_title, new_title)) + elif cur_track != new_track: + print_(" * %s (%s -> %s)" % (item.title, cur_track, new_track)) + +def show_item_change(item, info, dist, color): + """Print out the change that would occur by tagging `item` with the + metadata from `info`. + """ + cur_artist, new_artist = item.artist, info['artist'] + cur_title, new_title = item.title, info['title'] + + if cur_artist != new_artist or cur_title != new_title: + if color: + cur_artist, new_artist = ui.colordiff(cur_artist, new_artist) + cur_title, new_title = ui.colordiff(cur_title, new_title) + + print_("Correcting track tags from:") + print_(" %s - %s" % (cur_artist, cur_title)) + print_("To:") + print_(" %s - %s" % (new_artist, new_title)) + + else: + print_("Tagging track: %s - %s" % (cur_artist, cur_title)) + + print_('(Similarity: %s)' % dist_string(dist, color)) + +def should_resume(config, path): + return ui.input_yn("Import of the directory:\n%s" + "\nwas interrupted. Resume (Y/n)?" % path) + +def _quiet_fall_back(config): + """Show the user that the default action is being taken because + we're in quiet mode and the recommendation is not strong. + """ + if config.quiet_fallback == importer.action.SKIP: + print_('Skipping.') + elif config.quiet_fallback == importer.action.ASIS: + print_('Importing as-is.') + else: + assert(False) + return config.quiet_fallback + +def choose_candidate(candidates, singleton, rec, color, timid, + cur_artist=None, cur_album=None, item=None): + """Given a sorted list of candidates, ask the user for a selection + of which candidate to use. Applies to both full albums and + singletons (tracks). For albums, the candidates are `(dist, items, + info)` triples and `cur_artist` and `cur_album` must be provided. + For singletons, the candidates are `(dist, info)` pairs and `item` + must be provided. + + Returns the result of the choice, which may SKIP, ASIS, TRACKS, or + MANUAL or a candidate. For albums, a candidate is a `(info, items)` + pair; for items, it is just an `info` dictionary. + """ + # Sanity check. + if singleton: + assert item is not None + else: + assert cur_artist is not None + assert cur_album is not None + + # Zero candidates. + if not candidates: + print_("No match found.") + if singleton: + opts = ('Use as-is', 'Skip', 'Enter search', 'enter Id', + 'aBort') + else: + opts = ('Use as-is', 'as Tracks', 'Skip', 'Enter search', + 'enter Id', 'aBort') + sel = ui.input_options(opts, color=color) + if sel == 'u': + return importer.action.ASIS + elif sel == 't': + assert not singleton + return importer.action.TRACKS + elif sel == 'e': + return importer.action.MANUAL + elif sel == 's': + return importer.action.SKIP + elif sel == 'b': + raise importer.ImportAbort() + elif sel == 'i': + return importer.action.MANUAL_ID + else: + assert False + + # Is the change good enough? + bypass_candidates = False + if rec != autotag.RECOMMEND_NONE: + if singleton: + dist, info = candidates[0] + else: + dist, items, info = candidates[0] + bypass_candidates = True + + while True: + # Display and choose from candidates. + if not bypass_candidates: + # Display list of candidates. + if singleton: + print_('Finding tags for track "%s - %s".' % + (item.artist, item.title)) + print_('Candidates:') + for i, (dist, info) in enumerate(candidates): + print_('%i. %s - %s (%s)' % (i+1, info['artist'], + info['title'], dist_string(dist, color))) + else: + print_('Finding tags for album "%s - %s".' % + (cur_artist, cur_album)) + print_('Candidates:') + for i, (dist, items, info) in enumerate(candidates): + print_('%i. %s - %s (%s)' % (i+1, info['artist'], + info['album'], dist_string(dist, color))) + + # Ask the user for a choice. + if singleton: + opts = ('Skip', 'Use as-is', 'Enter search', 'enter Id', + 'aBort') + else: + opts = ('Skip', 'Use as-is', 'as Tracks', 'Enter search', + 'enter Id', 'aBort') + sel = ui.input_options(opts, numrange=(1, len(candidates)), + color=color) + if sel == 's': + return importer.action.SKIP + elif sel == 'u': + return importer.action.ASIS + elif sel == 'e': + return importer.action.MANUAL + elif sel == 't': + assert not singleton + return importer.action.TRACKS + elif sel == 'b': + raise importer.ImportAbort() + elif sel == 'i': + return importer.action.MANUAL_ID + else: # Numerical selection. + if singleton: + dist, info = candidates[sel-1] + else: + dist, items, info = candidates[sel-1] + bypass_candidates = False + + # Show what we're about to do. + if singleton: + show_item_change(item, info, dist, color) + else: + show_change(cur_artist, cur_album, items, info, dist, color) + + # Exact match => tag automatically if we're not in timid mode. + if rec == autotag.RECOMMEND_STRONG and not timid: + if singleton: + return info + else: + return info, items + + # Ask for confirmation. + if singleton: + opts = ('Apply', 'More candidates', 'Skip', 'Use as-is', + 'Enter search', 'enter Id', 'aBort') + else: + opts = ('Apply', 'More candidates', 'Skip', 'Use as-is', + 'as Tracks', 'Enter search', 'enter Id', 'aBort') + sel = ui.input_options(opts, color=color) + if sel == 'a': + if singleton: + return info + else: + return info, items + elif sel == 'm': + pass + elif sel == 's': + return importer.action.SKIP + elif sel == 'u': + return importer.action.ASIS + elif sel == 't': + assert not singleton + return importer.action.TRACKS + elif sel == 'e': + return importer.action.MANUAL + elif sel == 'b': + raise importer.ImportAbort() + elif sel == 'i': + return importer.action.MANUAL_ID + +def manual_search(singleton): + """Input either an artist and album (for full albums) or artist and + track name (for singletons) for manual search. + """ + artist = raw_input('Artist: ').decode(sys.stdin.encoding) + name = raw_input('Track: ' if singleton else 'Album: ') \ + .decode(sys.stdin.encoding) + return artist.strip(), name.strip() + +def manual_id(singleton): + """Input a MusicBrainz ID, either for an album or a track. + """ + prompt = 'Enter MusicBrainz %s ID: ' % ('track' if singleton else 'album') + return raw_input(prompt).decode(sys.stdin.encoding).strip() + +def choose_match(task, config): + """Given an initial autotagging of items, go through an interactive + dance with the user to ask for a choice of metadata. Returns an + (info, items) pair, ASIS, or SKIP. + """ + # Show what we're tagging. + print_() + print_(task.path) + + if config.quiet: + # No input; just make a decision. + if task.rec == autotag.RECOMMEND_STRONG: + dist, items, info = task.candidates[0] + show_change(task.cur_artist, task.cur_album, items, info, dist, + config.color) + return info, items + else: + return _quiet_fall_back(config) + + # Loop until we have a choice. + candidates, rec = task.candidates, task.rec + while True: + # Ask for a choice from the user. + choice = choose_candidate(candidates, False, rec, config.color, + config.timid, task.cur_artist, + task.cur_album) + + # Choose which tags to use. + if choice in (importer.action.SKIP, importer.action.ASIS, + importer.action.TRACKS): + # Pass selection to main control flow. + return choice + elif choice is importer.action.MANUAL: + # Try again with manual search terms. + search_artist, search_album = manual_search(False) + try: + _, _, candidates, rec = \ + autotag.tag_album(task.items, config.timid, search_artist, + search_album) + except autotag.AutotagError: + candidates, rec = None, None + elif choice is importer.action.MANUAL_ID: + # Try a manually-entered ID. + search_id = manual_id(False) + try: + _, _, candidates, rec = \ + autotag.tag_album(task.items, config.timid, + search_id=search_id) + except autotag.AutotagError: + candidates, rec = None, None + else: + # We have a candidate! Finish tagging. Here, choice is + # an (info, items) pair as desired. + assert not isinstance(choice, importer.action) + return choice + +def choose_item(task, config): + """Ask the user for a choice about tagging a single item. Returns + either an action constant or a track info dictionary. + """ + print_() + print_(task.item.path) + candidates, rec = task.item_match + + if config.quiet: + # Quiet mode; make a decision. + if rec == autotag.RECOMMEND_STRONG: + dist, track_info = candidates[0] + show_item_change(task.item, track_info, dist, config.color) + return track_info + else: + return _quiet_fall_back(config) + + while True: + # Ask for a choice. + choice = choose_candidate(candidates, True, rec, config.color, + config.timid, item=task.item) + + if choice in (importer.action.SKIP, importer.action.ASIS): + return choice + elif choice == importer.action.TRACKS: + assert False # TRACKS is only legal for albums. + elif choice == importer.action.MANUAL: + # Continue in the loop with a new set of candidates. + search_artist, search_title = manual_search(True) + candidates, rec = autotag.tag_item(task.item, config.timid, + search_artist, search_title) + elif choice == importer.action.MANUAL_ID: + # Ask for a track ID. + search_id = manual_id(True) + candidates, rec = autotag.tag_item(task.item, config.timid, + search_id=search_id) + else: + # Chose a candidate. + assert not isinstance(choice, importer.action) + return choice + +# The import command. + +def import_files(lib, paths, copy, write, autot, logpath, art, threaded, + color, delete, quiet, resume, quiet_fallback, singletons, + timid): + """Import the files in the given list of paths, tagging each leaf + directory as an album. If copy, then the files are copied into + the library folder. If write, then new metadata is written to the + files themselves. If not autot, then just import the files + without attempting to tag. If logpath is provided, then untaggable + albums will be logged there. If art, then attempt to download + cover art for each album. If threaded, then accelerate autotagging + imports by running them in multiple threads. If color, then + ANSI-colorize some terminal output. If delete, then old files are + deleted when they are copied. If quiet, then the user is + never prompted for input; instead, the tagger just skips anything + it is not confident about. resume indicates whether interrupted + imports can be resumed and is either a boolean or None. + quiet_fallback should be either ASIS or SKIP and indicates what + should happen in quiet mode when the recommendation is not strong. + """ + # Check the user-specified directories. + for path in paths: + if not singletons and not os.path.isdir(syspath(path)): + raise ui.UserError('not a directory: ' + path) + elif singletons and not os.path.exists(syspath(path)): + raise ui.UserError('no such file: ' + path) + + # Check parameter consistency. + if quiet and timid: + raise ui.UserError("can't be both quiet and timid") + + # Open the log. + if logpath: + logpath = normpath(logpath) + logfile = open(syspath(logpath), 'a') + print >>logfile, 'import started', time.asctime() + else: + logfile = None + + # Never ask for input in quiet mode. + if resume is None and quiet: + resume = False + + # Perform the import. + importer.run_import( + lib = lib, + paths = paths, + resume = resume, + logfile = logfile, + color = color, + quiet = quiet, + quiet_fallback = quiet_fallback, + copy = copy, + write = write, + art = art, + delete = delete, + threaded = threaded, + autot = autot, + choose_match_func = choose_match, + should_resume_func = should_resume, + singletons = singletons, + timid = timid, + choose_item_func = choose_item, + ) + + # If we were logging, close the file. + if logfile: + print >>logfile, '' + logfile.close() + + # Emit event. + plugins.send('import', lib=lib, paths=paths) + +import_cmd = ui.Subcommand('import', help='import new music', + aliases=('imp', 'im')) +import_cmd.parser.add_option('-c', '--copy', action='store_true', + default=None, help="copy tracks into library directory (default)") +import_cmd.parser.add_option('-C', '--nocopy', action='store_false', + dest='copy', help="don't copy tracks (opposite of -c)") +import_cmd.parser.add_option('-w', '--write', action='store_true', + default=None, help="write new metadata to files' tags (default)") +import_cmd.parser.add_option('-W', '--nowrite', action='store_false', + dest='write', help="don't write metadata (opposite of -w)") +import_cmd.parser.add_option('-a', '--autotag', action='store_true', + dest='autotag', help="infer tags for imported files (default)") +import_cmd.parser.add_option('-A', '--noautotag', action='store_false', + dest='autotag', + help="don't infer tags for imported files (opposite of -a)") +import_cmd.parser.add_option('-p', '--resume', action='store_true', + default=None, help="resume importing if interrupted") +import_cmd.parser.add_option('-P', '--noresume', action='store_false', + dest='resume', help="do not try to resume importing") +import_cmd.parser.add_option('-r', '--art', action='store_true', + default=None, help="try to download album art") +import_cmd.parser.add_option('-R', '--noart', action='store_false', + dest='art', help="don't album art (opposite of -r)") +import_cmd.parser.add_option('-q', '--quiet', action='store_true', + dest='quiet', help="never prompt for input: skip albums instead") +import_cmd.parser.add_option('-l', '--log', dest='logpath', + help='file to log untaggable albums for later review') +import_cmd.parser.add_option('-s', '--singletons', action='store_true', + help='import individual tracks instead of full albums') +import_cmd.parser.add_option('-t', '--timid', dest='timid', + action='store_true', help='always confirm all actions') +def import_func(lib, config, opts, args): + copy = opts.copy if opts.copy is not None else \ + ui.config_val(config, 'beets', 'import_copy', + DEFAULT_IMPORT_COPY, bool) + write = opts.write if opts.write is not None else \ + ui.config_val(config, 'beets', 'import_write', + DEFAULT_IMPORT_WRITE, bool) + delete = ui.config_val(config, 'beets', 'import_delete', + DEFAULT_IMPORT_DELETE, bool) + autot = opts.autotag if opts.autotag is not None else DEFAULT_IMPORT_AUTOT + art = opts.art if opts.art is not None else \ + ui.config_val(config, 'beets', 'import_art', + DEFAULT_IMPORT_ART, bool) + threaded = ui.config_val(config, 'beets', 'threaded', + DEFAULT_THREADED, bool) + color = ui.config_val(config, 'beets', 'color', DEFAULT_COLOR, bool) + quiet = opts.quiet if opts.quiet is not None else DEFAULT_IMPORT_QUIET + quiet_fallback_str = ui.config_val(config, 'beets', 'import_quiet_fallback', + DEFAULT_IMPORT_QUIET_FALLBACK) + singletons = opts.singletons + timid = opts.timid if opts.timid is not None else \ + ui.config_val(config, 'beets', 'import_timid', + DEFAULT_IMPORT_TIMID, bool) + logpath = opts.logpath if opts.logpath is not None else \ + ui.config_val(config, 'beets', 'import_log', None) + + # Resume has three options: yes, no, and "ask" (None). + resume = opts.resume if opts.resume is not None else \ + ui.config_val(config, 'beets', 'import_resume', DEFAULT_IMPORT_RESUME) + if isinstance(resume, basestring): + if resume.lower() in ('yes', 'true', 't', 'y', '1'): + resume = True + elif resume.lower() in ('no', 'false', 'f', 'n', '0'): + resume = False + else: + resume = None + + if quiet_fallback_str == 'asis': + quiet_fallback = importer.action.ASIS + else: + quiet_fallback = importer.action.SKIP + import_files(lib, args, copy, write, autot, logpath, art, threaded, + color, delete, quiet, resume, quiet_fallback, singletons, + timid) +import_cmd.func = import_func +default_commands.append(import_cmd) + + +# list: Query and show library contents. + +def list_items(lib, query, album, path): + """Print out items in lib matching query. If album, then search for + albums instead of single items. If path, print the matched objects' + paths instead of human-readable information about them. + """ + if album: + for album in lib.albums(query): + if path: + print_(album.item_dir()) + else: + print_(album.albumartist + u' - ' + album.album) + else: + for item in lib.items(query): + if path: + print_(item.path) + else: + print_(item.artist + u' - ' + item.album + u' - ' + item.title) + +list_cmd = ui.Subcommand('list', help='query the library', aliases=('ls',)) +list_cmd.parser.add_option('-a', '--album', action='store_true', + help='show matching albums instead of tracks') +list_cmd.parser.add_option('-p', '--path', action='store_true', + help='print paths for matched items or albums') +def list_func(lib, config, opts, args): + list_items(lib, ui.make_query(args), opts.album, opts.path) +list_cmd.func = list_func +default_commands.append(list_cmd) + + +# remove: Remove items from library, delete files. + +def remove_items(lib, query, album, delete=False): + """Remove items matching query from lib. If album, then match and + remove whole albums. If delete, also remove files from disk. + """ + # Get the matching items. + if album: + albums = list(lib.albums(query)) + items = [] + for al in albums: + items += al.items() + else: + items = list(lib.items(query)) + + if not items: + print_('No matching items found.') + return + + # Show all the items. + for item in items: + print_(item.artist + ' - ' + item.album + ' - ' + item.title) + + # Confirm with user. + print_() + if delete: + prompt = 'Really DELETE %i files (y/n)?' % len(items) + else: + prompt = 'Really remove %i items from the library (y/n)?' % \ + len(items) + if not ui.input_yn(prompt, True): + return + + # Remove (and possibly delete) items. + if album: + for al in albums: + al.remove(delete) + else: + for item in items: + lib.remove(item, delete) + + lib.save() + +remove_cmd = ui.Subcommand('remove', + help='remove matching items from the library', aliases=('rm',)) +remove_cmd.parser.add_option("-d", "--delete", action="store_true", + help="also remove files from disk") +remove_cmd.parser.add_option('-a', '--album', action='store_true', + help='match albums instead of tracks') +def remove_func(lib, config, opts, args): + remove_items(lib, ui.make_query(args), opts.album, opts.delete) +remove_cmd.func = remove_func +default_commands.append(remove_cmd) + + +# stats: Show library/query statistics. + +def show_stats(lib, query): + """Shows some statistics about the matched items.""" + items = lib.items(query) + + total_size = 0 + total_time = 0.0 + total_items = 0 + artists = set() + albums = set() + + for item in items: + #fixme This is approximate, so people might complain that + # this total size doesn't match "du -sh". Could fix this + # by putting total file size in the database. + total_size += int(item.length * item.bitrate / 8) + total_time += item.length + total_items += 1 + artists.add(item.artist) + albums.add(item.album) + + print_("""Tracks: %i +Total time: %s +Total size: %s +Artists: %i +Albums: %i""" % ( + total_items, + ui.human_seconds(total_time), + ui.human_bytes(total_size), + len(artists), len(albums) + )) + +stats_cmd = ui.Subcommand('stats', + help='show statistics about the library or a query') +def stats_func(lib, config, opts, args): + show_stats(lib, ui.make_query(args)) +stats_cmd.func = stats_func +default_commands.append(stats_cmd) + + +# version: Show current beets version. + +def show_version(lib, config, opts, args): + print 'beets version %s' % beets.__version__ + # Show plugins. + names = [] + for plugin in plugins.find_plugins(): + modname = plugin.__module__ + names.append(modname.split('.')[-1]) + if names: + print 'plugins:', ', '.join(names) + else: + print 'no plugins loaded' +version_cmd = ui.Subcommand('version', + help='output version information') +version_cmd.func = show_version +default_commands.append(version_cmd) diff --git a/lib/beets/util/__init__.py b/lib/beets/util/__init__.py new file mode 100644 index 00000000..45f94522 --- /dev/null +++ b/lib/beets/util/__init__.py @@ -0,0 +1,255 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Miscellaneous utility functions.""" +import os +import sys +import re + +MAX_FILENAME_LENGTH = 200 + +def normpath(path): + """Provide the canonical form of the path suitable for storing in + the database. + """ + return os.path.normpath(os.path.abspath(os.path.expanduser(path))) + +def ancestry(path, pathmod=None): + """Return a list consisting of path's parent directory, its + grandparent, and so on. For instance: + >>> ancestry('/a/b/c') + ['/', '/a', '/a/b'] + """ + pathmod = pathmod or os.path + out = [] + last_path = None + while path: + path = pathmod.dirname(path) + + if path == last_path: + break + last_path = path + + if path: # don't yield '' + out.insert(0, path) + return out + +def sorted_walk(path): + """Like os.walk, but yields things in sorted, breadth-first + order. + """ + # Make sure the path isn't a Unicode string. + path = bytestring_path(path) + + # Get all the directories and files at this level. + dirs = [] + files = [] + for base in os.listdir(path): + cur = os.path.join(path, base) + if os.path.isdir(syspath(cur)): + dirs.append(base) + else: + files.append(base) + + # Sort lists and yield the current level. + dirs.sort() + files.sort() + yield (path, dirs, files) + + # Recurse into directories. + for base in dirs: + cur = os.path.join(path, base) + # yield from _sorted_walk(cur) + for res in sorted_walk(cur): + yield res + +def mkdirall(path): + """Make all the enclosing directories of path (like mkdir -p on the + parent). + """ + for ancestor in ancestry(path): + if not os.path.isdir(syspath(ancestor)): + os.mkdir(syspath(ancestor)) + +def prune_dirs(path, root): + """If path is an empty directory, then remove it. Recursively + remove path's ancestry up to root (which is never removed) where + there are empty directories. If path is not contained in root, then + nothing is removed. + """ + path = normpath(path) + root = normpath(root) + + ancestors = ancestry(path) + if root in ancestors: + # Only remove directories below the root. + ancestors = ancestors[ancestors.index(root)+1:] + + # Traverse upward from path. + ancestors.append(path) + ancestors.reverse() + for directory in ancestors: + try: + os.rmdir(syspath(directory)) + except OSError: + break + +def components(path, pathmod=None): + """Return a list of the path components in path. For instance: + >>> components('/a/b/c') + ['a', 'b', 'c'] + """ + pathmod = pathmod or os.path + comps = [] + ances = ancestry(path, pathmod) + for anc in ances: + comp = pathmod.basename(anc) + if comp: + comps.append(comp) + else: # root + comps.append(anc) + + last = pathmod.basename(path) + if last: + comps.append(last) + + return comps + +def bytestring_path(path): + """Given a path, which is either a str or a unicode, returns a str + path (ensuring that we never deal with Unicode pathnames). + """ + # Pass through bytestrings. + if isinstance(path, str): + return path + + # Try to encode with default encodings, but fall back to UTF8. + encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + try: + return path.encode(encoding) + except UnicodeError: + return path.encode('utf8') + +def syspath(path, pathmod=None): + """Convert a path for use by the operating system. In particular, + paths on Windows must receive a magic prefix and must be converted + to unicode before they are sent to the OS. + """ + pathmod = pathmod or os.path + windows = pathmod.__name__ == 'ntpath' + + # Don't do anything if we're not on windows + if not windows: + return path + + if not isinstance(path, unicode): + # Try to decode with default encodings, but fall back to UTF8. + encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + try: + path = path.decode(encoding, 'replace') + except UnicodeError: + path = path.decode('utf8', 'replace') + + # Add the magic prefix if it isn't already there + if not path.startswith(u'\\\\?\\'): + path = u'\\\\?\\' + path + + return path + +def soft_remove(path): + """Remove the file if it exists.""" + path = syspath(path) + if os.path.exists(path): + os.remove(path) + +# Note: POSIX actually supports \ and : -- I just think they're +# a pain. And ? has caused problems for some. +CHAR_REPLACE = [ + (re.compile(r'[\\/\?]|^\.'), '_'), + (re.compile(r':'), '-'), +] +CHAR_REPLACE_WINDOWS = re.compile('["\*<>\|]|^\.|\.$| +$'), '_' +def sanitize_path(path, pathmod=None): + """Takes a path and makes sure that it is legal. Returns a new path. + Only works with fragments; won't work reliably on Windows when a + path begins with a drive letter. Path separators (including altsep!) + should already be cleaned from the path components. + """ + pathmod = pathmod or os.path + windows = pathmod.__name__ == 'ntpath' + + comps = components(path, pathmod) + if not comps: + return '' + for i, comp in enumerate(comps): + # Replace special characters. + for regex, repl in CHAR_REPLACE: + comp = regex.sub(repl, comp) + if windows: + regex, repl = CHAR_REPLACE_WINDOWS + comp = regex.sub(repl, comp) + + # Truncate each component. + comp = comp[:MAX_FILENAME_LENGTH] + + comps[i] = comp + return pathmod.join(*comps) + +def sanitize_for_path(value, pathmod, key=None): + """Sanitize the value for inclusion in a path: replace separators + with _, etc. Doesn't guarantee that the whole path will be valid; + you should still call sanitize_path on the complete path. + """ + if isinstance(value, basestring): + for sep in (pathmod.sep, pathmod.altsep): + if sep: + value = value.replace(sep, u'_') + elif key in ('track', 'tracktotal', 'disc', 'disctotal'): + # pad with zeros + value = u'%02i' % value + elif key == 'bitrate': + # Bitrate gets formatted as kbps. + value = u'%ikbps' % (value / 1000) + else: + value = unicode(value) + return value + +def str2bool(value): + """Returns a boolean reflecting a human-entered string.""" + if value.lower() in ('yes', '1', 'true', 't', 'y'): + return True + else: + return False + +def levenshtein(s1, s2): + """A nice DP edit distance implementation from Wikibooks: + http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/ + Levenshtein_distance#Python + """ + if len(s1) < len(s2): + return levenshtein(s2, s1) + if not s1: + return len(s2) + + previous_row = xrange(len(s2) + 1) + for i, c1 in enumerate(s1): + current_row = [i + 1] + for j, c2 in enumerate(s2): + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (c1 != c2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + + return previous_row[-1] diff --git a/lib/beets/util/enumeration.py b/lib/beets/util/enumeration.py new file mode 100644 index 00000000..794a0624 --- /dev/null +++ b/lib/beets/util/enumeration.py @@ -0,0 +1,178 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""A metaclass for enumerated types that really are types. + +You can create enumerations with `enum(values, [name])` and they work +how you would expect them to. + + >>> from enumeration import enum + >>> Direction = enum('north east south west', name='Direction') + >>> Direction.west + Direction.west + >>> Direction.west == Direction.west + True + >>> Direction.west == Direction.east + False + >>> isinstance(Direction.west, Direction) + True + >>> Direction[3] + Direction.west + >>> Direction['west'] + Direction.west + >>> Direction.west.name + 'west' + >>> Direction.north < Direction.west + True + +Enumerations are classes; their instances represent the possible values +of the enumeration. Because Python classes must have names, you may +provide a `name` parameter to `enum`; if you don't, a meaningless one +will be chosen for you. +""" +import random + +class Enumeration(type): + """A metaclass whose classes are enumerations. + + The `values` attribute of the class is used to populate the + enumeration. Values may either be a list of enumerated names or a + string containing a space-separated list of names. When the class + is created, it is instantiated for each name value in `values`. + Each such instance is the name of the enumerated item as the sole + argument. + + The `Enumerated` class is a good choice for a superclass. + """ + + def __init__(cls, name, bases, dic): + super(Enumeration, cls).__init__(name, bases, dic) + + if 'values' not in dic: + # Do nothing if no values are provided (i.e., with + # Enumerated itself). + return + + # May be called with a single string, in which case we split on + # whitespace for convenience. + values = dic['values'] + if isinstance(values, basestring): + values = values.split() + + # Create the Enumerated instances for each value. We have to use + # super's __setattr__ here because we disallow setattr below. + super(Enumeration, cls).__setattr__('_items_dict', {}) + super(Enumeration, cls).__setattr__('_items_list', []) + for value in values: + item = cls(value, len(cls._items_list)) + cls._items_dict[value] = item + cls._items_list.append(item) + + def __getattr__(cls, key): + try: + return cls._items_dict[key] + except KeyError: + raise AttributeError("enumeration '" + cls.__name__ + + "' has no item '" + key + "'") + + def __setattr__(cls, key, val): + raise TypeError("enumerations do not support attribute assignment") + + def __getitem__(cls, key): + if isinstance(key, int): + return cls._items_list[key] + else: + return getattr(cls, key) + + def __len__(cls): + return len(cls._items_list) + + def __iter__(cls): + return iter(cls._items_list) + + def __nonzero__(cls): + # Ensures that __len__ doesn't get called before __init__ by + # pydoc. + return True + +class Enumerated(object): + """An item in an enumeration. + + Contains instance methods inherited by enumerated objects. The + metaclass is preset to `Enumeration` for your convenience. + + Instance attributes: + name -- The name of the item. + index -- The index of the item in its enumeration. + + >>> from enumeration import Enumerated + >>> class Garment(Enumerated): + ... values = 'hat glove belt poncho lederhosen suspenders' + ... def wear(self): + ... print 'now wearing a ' + self.name + ... + >>> Garment.poncho.wear() + now wearing a poncho + """ + + __metaclass__ = Enumeration + + def __init__(self, name, index): + self.name = name + self.index = index + + def __str__(self): + return type(self).__name__ + '.' + self.name + + def __repr__(self): + return str(self) + + def __cmp__(self, other): + if type(self) is type(other): + # Note that we're assuming that the items are direct + # instances of the same Enumeration (i.e., no fancy + # subclassing), which is probably okay. + return cmp(self.index, other.index) + else: + return NotImplemented + +def enum(*values, **kwargs): + """Shorthand for creating a new Enumeration class. + + Call with enumeration values as a list, a space-delimited string, or + just an argument list. To give the class a name, pass it as the + `name` keyword argument. Otherwise, a name will be chosen for you. + + The following are all equivalent: + + enum('pinkie ring middle index thumb') + enum('pinkie', 'ring', 'middle', 'index', 'thumb') + enum(['pinkie', 'ring', 'middle', 'index', 'thumb']) + """ + + if ('name' not in kwargs) or kwargs['name'] is None: + # Create a probably-unique name. It doesn't really have to be + # unique, but getting distinct names each time helps with + # identification in debugging. + name = 'Enumeration' + hex(random.randint(0,0xfffffff))[2:].upper() + else: + name = kwargs['name'] + + if len(values) == 1: + # If there's only one value, we have a couple of alternate calling + # styles. + if isinstance(values[0], basestring) or hasattr(values[0], '__iter__'): + values = values[0] + + return type(name, (Enumerated,), {'values': values}) diff --git a/lib/beets/util/pipeline.py b/lib/beets/util/pipeline.py new file mode 100644 index 00000000..c0dd7108 --- /dev/null +++ b/lib/beets/util/pipeline.py @@ -0,0 +1,442 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Simple but robust implementation of generator/coroutine-based +pipelines in Python. The pipelines may be run either sequentially +(single-threaded) or in parallel (one thread per pipeline stage). + +This implementation supports pipeline bubbles (indications that the +processing for a certain item should abort). To use them, yield the +BUBBLE constant from any stage coroutine except the last. + +In the parallel case, the implementation transparently handles thread +shutdown when the processing is complete and when a stage raises an +exception. KeyboardInterrupts (^C) are also handled. + +When running a parallel pipeline, it is also possible to use +multiple coroutines for the same pipeline stage; this lets you speed +up a bottleneck stage by dividing its work among multiple threads. +To do so, pass an iterable of coroutines to the Pipeline constructor +in place of any single coroutine. +""" +from __future__ import with_statement # for Python 2.5 +import Queue +from threading import Thread, Lock +import sys +import types + +BUBBLE = '__PIPELINE_BUBBLE__' +POISON = '__PIPELINE_POISON__' + +DEFAULT_QUEUE_SIZE = 16 + +def _invalidate_queue(q, val=None, sync=True): + """Breaks a Queue such that it never blocks, always has size 1, + and has no maximum size. get()ing from the queue returns `val`, + which defaults to None. `sync` controls whether a lock is + required (because it's not reentrant!). + """ + def _qsize(len=len): + return 1 + def _put(item): + pass + def _get(): + return val + + if sync: + q.mutex.acquire() + + try: + q.maxsize = 0 + q._qsize = _qsize + q._put = _put + q._get = _get + q.not_empty.notifyAll() + q.not_full.notifyAll() + + finally: + if sync: + q.mutex.release() + +class CountedQueue(Queue.Queue): + """A queue that keeps track of the number of threads that are + still feeding into it. The queue is poisoned when all threads are + finished with the queue. + """ + def __init__(self, maxsize=0): + Queue.Queue.__init__(self, maxsize) + self.nthreads = 0 + self.poisoned = False + + def acquire(self): + """Indicate that a thread will start putting into this queue. + Should not be called after the queue is already poisoned. + """ + with self.mutex: + assert not self.poisoned + assert self.nthreads >= 0 + self.nthreads += 1 + + def release(self): + """Indicate that a thread that was putting into this queue has + exited. If this is the last thread using the queue, the queue + is poisoned. + """ + with self.mutex: + self.nthreads -= 1 + assert self.nthreads >= 0 + if self.nthreads == 0: + # All threads are done adding to this queue. Poison it + # when it becomes empty. + self.poisoned = True + + # Replacement _get invalidates when no items remain. + _old_get = self._get + def _get(): + out = _old_get() + if not self.queue: + _invalidate_queue(self, POISON, False) + return out + + if self.queue: + # Items remain. + self._get = _get + else: + # No items. Invalidate immediately. + _invalidate_queue(self, POISON, False) + +class MultiMessage(object): + """A message yielded by a pipeline stage encapsulating multiple + values to be sent to the next stage. + """ + def __init__(self, messages): + self.messages = messages +def multiple(messages): + """Yield multiple([message, ..]) from a pipeline stage to send + multiple values to the next pipeline stage. + """ + return MultiMessage(messages) + +def _allmsgs(obj): + """Returns a list of all the messages encapsulated in obj. If obj + is a MultiMessage, returns its enclosed messages. If obj is BUBBLE, + returns an empty list. Otherwise, returns a list containing obj. + """ + if isinstance(obj, MultiMessage): + return obj.messages + elif obj == BUBBLE: + return [] + else: + return [obj] + +class PipelineThread(Thread): + """Abstract base class for pipeline-stage threads.""" + def __init__(self, all_threads): + super(PipelineThread, self).__init__() + self.abort_lock = Lock() + self.abort_flag = False + self.all_threads = all_threads + self.exc_info = None + + def abort(self): + """Shut down the thread at the next chance possible. + """ + with self.abort_lock: + self.abort_flag = True + + # Ensure that we are not blocking on a queue read or write. + if hasattr(self, 'in_queue'): + _invalidate_queue(self.in_queue) + if hasattr(self, 'out_queue'): + _invalidate_queue(self.out_queue) + + def abort_all(self, exc_info): + """Abort all other threads in the system for an exception. + """ + self.exc_info = exc_info + for thread in self.all_threads: + thread.abort() + +class FirstPipelineThread(PipelineThread): + """The thread running the first stage in a parallel pipeline setup. + The coroutine should just be a generator. + """ + def __init__(self, coro, out_queue, all_threads): + super(FirstPipelineThread, self).__init__(all_threads) + self.coro = coro + self.out_queue = out_queue + self.out_queue.acquire() + + self.abort_lock = Lock() + self.abort_flag = False + + def run(self): + try: + while True: + with self.abort_lock: + if self.abort_flag: + return + + # Get the value from the generator. + try: + msg = self.coro.next() + except StopIteration: + break + + # Send messages to the next stage. + for msg in _allmsgs(msg): + with self.abort_lock: + if self.abort_flag: + return + self.out_queue.put(msg) + + except: + self.abort_all(sys.exc_info()) + return + + # Generator finished; shut down the pipeline. + self.out_queue.release() + +class MiddlePipelineThread(PipelineThread): + """A thread running any stage in the pipeline except the first or + last. + """ + def __init__(self, coro, in_queue, out_queue, all_threads): + super(MiddlePipelineThread, self).__init__(all_threads) + self.coro = coro + self.in_queue = in_queue + self.out_queue = out_queue + self.out_queue.acquire() + + def run(self): + try: + # Prime the coroutine. + self.coro.next() + + while True: + with self.abort_lock: + if self.abort_flag: + return + + # Get the message from the previous stage. + msg = self.in_queue.get() + if msg is POISON: + break + + with self.abort_lock: + if self.abort_flag: + return + + # Invoke the current stage. + out = self.coro.send(msg) + + # Send messages to next stage. + for msg in _allmsgs(out): + with self.abort_lock: + if self.abort_flag: + return + self.out_queue.put(msg) + + except: + self.abort_all(sys.exc_info()) + return + + # Pipeline is shutting down normally. + self.out_queue.release() + +class LastPipelineThread(PipelineThread): + """A thread running the last stage in a pipeline. The coroutine + should yield nothing. + """ + def __init__(self, coro, in_queue, all_threads): + super(LastPipelineThread, self).__init__(all_threads) + self.coro = coro + self.in_queue = in_queue + + def run(self): + # Prime the coroutine. + self.coro.next() + + try: + while True: + with self.abort_lock: + if self.abort_flag: + return + + # Get the message from the previous stage. + msg = self.in_queue.get() + if msg is POISON: + break + + with self.abort_lock: + if self.abort_flag: + return + + # Send to consumer. + self.coro.send(msg) + + except: + self.abort_all(sys.exc_info()) + return + +class Pipeline(object): + """Represents a staged pattern of work. Each stage in the pipeline + is a coroutine that receives messages from the previous stage and + yields messages to be sent to the next stage. + """ + def __init__(self, stages): + """Makes a new pipeline from a list of coroutines. There must + be at least two stages. + """ + if len(stages) < 2: + raise ValueError('pipeline must have at least two stages') + self.stages = [] + for stage in stages: + if isinstance(stage, types.GeneratorType): + # Default to one thread per stage. + self.stages.append((stage,)) + else: + self.stages.append(stage) + + def run_sequential(self): + """Run the pipeline sequentially in the current thread. The + stages are run one after the other. Only the first coroutine + in each stage is used. + """ + coros = [stage[0] for stage in self.stages] + + # "Prime" the coroutines. + for coro in coros[1:]: + coro.next() + + # Begin the pipeline. + for out in coros[0]: + msgs = _allmsgs(out) + for coro in coros[1:]: + next_msgs = [] + for msg in msgs: + out = coro.send(msg) + next_msgs.extend(_allmsgs(out)) + msgs = next_msgs + + def run_parallel(self, queue_size=DEFAULT_QUEUE_SIZE): + """Run the pipeline in parallel using one thread per stage. The + messages between the stages are stored in queues of the given + size. + """ + queues = [CountedQueue(queue_size) for i in range(len(self.stages)-1)] + threads = [] + + # Set up first stage. + for coro in self.stages[0]: + threads.append(FirstPipelineThread(coro, queues[0], threads)) + + # Middle stages. + for i in range(1, len(self.stages)-1): + for coro in self.stages[i]: + threads.append(MiddlePipelineThread( + coro, queues[i-1], queues[i], threads + )) + + # Last stage. + for coro in self.stages[-1]: + threads.append( + LastPipelineThread(coro, queues[-1], threads) + ) + + # Start threads. + for thread in threads: + thread.start() + + # Wait for termination. The final thread lasts the longest. + try: + # Using a timeout allows us to receive KeyboardInterrupt + # exceptions during the join(). + while threads[-1].isAlive(): + threads[-1].join(1) + + except: + # Stop all the threads immediately. + for thread in threads: + thread.abort() + raise + + finally: + # Make completely sure that all the threads have finished + # before we return. They should already be either finished, + # in normal operation, or aborted, in case of an exception. + for thread in threads[:-1]: + thread.join() + + for thread in threads: + exc_info = thread.exc_info + if exc_info: + # Make the exception appear as it was raised originally. + raise exc_info[0], exc_info[1], exc_info[2] + +# Smoke test. +if __name__ == '__main__': + import time + + # Test a normally-terminating pipeline both in sequence and + # in parallel. + def produce(): + for i in range(5): + print 'generating %i' % i + time.sleep(1) + yield i + def work(): + num = yield + while True: + print 'processing %i' % num + time.sleep(2) + num = yield num*2 + def consume(): + while True: + num = yield + time.sleep(1) + print 'received %i' % num + ts_start = time.time() + Pipeline([produce(), work(), consume()]).run_sequential() + ts_seq = time.time() + Pipeline([produce(), work(), consume()]).run_parallel() + ts_par = time.time() + Pipeline([produce(), (work(), work()), consume()]).run_parallel() + ts_end = time.time() + print 'Sequential time:', ts_seq - ts_start + print 'Parallel time:', ts_par - ts_seq + print 'Multiply-parallel time:', ts_end - ts_par + print + + # Test a pipeline that raises an exception. + def exc_produce(): + for i in range(10): + print 'generating %i' % i + time.sleep(1) + yield i + def exc_work(): + num = yield + while True: + print 'processing %i' % num + time.sleep(3) + if num == 3: + raise Exception() + num = yield num * 2 + def exc_consume(): + while True: + num = yield + #if num == 4: + # raise Exception() + print 'received %i' % num + Pipeline([exc_produce(), exc_work(), exc_consume()]).run_parallel(1) diff --git a/lib/beets/vfs.py b/lib/beets/vfs.py new file mode 100644 index 00000000..614bc8f5 --- /dev/null +++ b/lib/beets/vfs.py @@ -0,0 +1,48 @@ +# This file is part of beets. +# Copyright 2011, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""A simple utility for constructing filesystem-like trees from beets +libraries. +""" +from collections import namedtuple +from lib.beets import util + +Node = namedtuple('Node', ['files', 'dirs']) + +def _insert(node, path, itemid): + """Insert an item into a virtual filesystem node.""" + if len(path) == 1: + # Last component. Insert file. + node.files[path[0]] = itemid + else: + # In a directory. + dirname = path[0] + rest = path[1:] + if dirname not in node.dirs: + node.dirs[dirname] = Node({}, {}) + _insert(node.dirs[dirname], rest, itemid) + +def libtree(lib): + """Generates a filesystem-like directory tree for the files + contained in `lib`. Filesystem nodes are (files, dirs) named + tuples in which both components are dictionaries. The first + maps filenames to Item ids. The second maps directory names to + child node tuples. + """ + root = Node({}, {}) + for item in lib.items(): + dest = lib.destination(item, fragment=True) + parts = util.components(dest) + _insert(root, parts, item.id) + return root diff --git a/lib/munkres.py b/lib/munkres.py new file mode 100755 index 00000000..a79ceef3 --- /dev/null +++ b/lib/munkres.py @@ -0,0 +1,791 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- + +# Documentation is intended to be processed by Epydoc. + +""" +Introduction +============ + +The Munkres module provides an implementation of the Munkres algorithm +(also called the Hungarian algorithm or the Kuhn-Munkres algorithm), +useful for solving the Assignment Problem. + +Assignment Problem +================== + +Let *C* be an *n*\ x\ *n* matrix representing the costs of each of *n* workers +to perform any of *n* jobs. The assignment problem is to assign jobs to +workers in a way that minimizes the total cost. Since each worker can perform +only one job and each job can be assigned to only one worker the assignments +represent an independent set of the matrix *C*. + +One way to generate the optimal set is to create all permutations of +the indexes necessary to traverse the matrix so that no row and column +are used more than once. For instance, given this matrix (expressed in +Python):: + + matrix = [[5, 9, 1], + [10, 3, 2], + [8, 7, 4]] + +You could use this code to generate the traversal indexes:: + + def permute(a, results): + if len(a) == 1: + results.insert(len(results), a) + + else: + for i in range(0, len(a)): + element = a[i] + a_copy = [a[j] for j in range(0, len(a)) if j != i] + subresults = [] + permute(a_copy, subresults) + for subresult in subresults: + result = [element] + subresult + results.insert(len(results), result) + + results = [] + permute(range(len(matrix)), results) # [0, 1, 2] for a 3x3 matrix + +After the call to permute(), the results matrix would look like this:: + + [[0, 1, 2], + [0, 2, 1], + [1, 0, 2], + [1, 2, 0], + [2, 0, 1], + [2, 1, 0]] + +You could then use that index matrix to loop over the original cost matrix +and calculate the smallest cost of the combinations:: + + n = len(matrix) + minval = sys.maxint + for row in range(n): + cost = 0 + for col in range(n): + cost += matrix[row][col] + minval = min(cost, minval) + + print minval + +While this approach works fine for small matrices, it does not scale. It +executes in O(*n*!) time: Calculating the permutations for an *n*\ x\ *n* +matrix requires *n*! operations. For a 12x12 matrix, that's 479,001,600 +traversals. Even if you could manage to perform each traversal in just one +millisecond, it would still take more than 133 hours to perform the entire +traversal. A 20x20 matrix would take 2,432,902,008,176,640,000 operations. At +an optimistic millisecond per operation, that's more than 77 million years. + +The Munkres algorithm runs in O(*n*\ ^3) time, rather than O(*n*!). This +package provides an implementation of that algorithm. + +This version is based on +http://www.public.iastate.edu/~ddoty/HungarianAlgorithm.html. + +This version was written for Python by Brian Clapper from the (Ada) algorithm +at the above web site. (The ``Algorithm::Munkres`` Perl version, in CPAN, was +clearly adapted from the same web site.) + +Usage +===== + +Construct a Munkres object:: + + from munkres import Munkres + + m = Munkres() + +Then use it to compute the lowest cost assignment from a cost matrix. Here's +a sample program:: + + from munkres import Munkres, print_matrix + + matrix = [[5, 9, 1], + [10, 3, 2], + [8, 7, 4]] + m = Munkres() + indexes = m.compute(matrix) + print_matrix(matrix, msg='Lowest cost through this matrix:') + total = 0 + for row, column in indexes: + value = matrix[row][column] + total += value + print '(%d, %d) -> %d' % (row, column, value) + print 'total cost: %d' % total + +Running that program produces:: + + Lowest cost through this matrix: + [5, 9, 1] + [10, 3, 2] + [8, 7, 4] + (0, 0) -> 5 + (1, 1) -> 3 + (2, 2) -> 4 + total cost=12 + +The instantiated Munkres object can be used multiple times on different +matrices. + +Non-square Cost Matrices +======================== + +The Munkres algorithm assumes that the cost matrix is square. However, it's +possible to use a rectangular matrix if you first pad it with 0 values to make +it square. This module automatically pads rectangular cost matrices to make +them square. + +Notes: + +- The module operates on a *copy* of the caller's matrix, so any padding will + not be seen by the caller. +- The cost matrix must be rectangular or square. An irregular matrix will + *not* work. + +Calculating Profit, Rather than Cost +==================================== + +The cost matrix is just that: A cost matrix. The Munkres algorithm finds +the combination of elements (one from each row and column) that results in +the smallest cost. It's also possible to use the algorithm to maximize +profit. To do that, however, you have to convert your profit matrix to a +cost matrix. The simplest way to do that is to subtract all elements from a +large value. For example:: + + from munkres import Munkres, print_matrix + + matrix = [[5, 9, 1], + [10, 3, 2], + [8, 7, 4]] + cost_matrix = [] + for row in matrix: + cost_row = [] + for col in row: + cost_row += [sys.maxint - col] + cost_matrix += [cost_row] + + m = Munkres() + indexes = m.compute(cost_matrix) + print_matrix(matrix, msg='Highest profit through this matrix:') + total = 0 + for row, column in indexes: + value = matrix[row][column] + total += value + print '(%d, %d) -> %d' % (row, column, value) + + print 'total profit=%d' % total + +Running that program produces:: + + Highest profit through this matrix: + [5, 9, 1] + [10, 3, 2] + [8, 7, 4] + (0, 1) -> 9 + (1, 0) -> 10 + (2, 2) -> 4 + total profit=23 + +The ``munkres`` module provides a convenience method for creating a cost +matrix from a profit matrix. Since it doesn't know whether the matrix contains +floating point numbers, decimals, or integers, you have to provide the +conversion function; but the convenience method takes care of the actual +creation of the cost matrix:: + + import munkres + + cost_matrix = munkres.make_cost_matrix(matrix, + lambda cost: sys.maxint - cost) + +So, the above profit-calculation program can be recast as:: + + from munkres import Munkres, print_matrix, make_cost_matrix + + matrix = [[5, 9, 1], + [10, 3, 2], + [8, 7, 4]] + cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost) + m = Munkres() + indexes = m.compute(cost_matrix) + print_matrix(matrix, msg='Lowest cost through this matrix:') + total = 0 + for row, column in indexes: + value = matrix[row][column] + total += value + print '(%d, %d) -> %d' % (row, column, value) + print 'total profit=%d' % total + +References +========== + +1. http://www.public.iastate.edu/~ddoty/HungarianAlgorithm.html + +2. Harold W. Kuhn. The Hungarian Method for the assignment problem. + *Naval Research Logistics Quarterly*, 2:83-97, 1955. + +3. Harold W. Kuhn. Variants of the Hungarian method for assignment + problems. *Naval Research Logistics Quarterly*, 3: 253-258, 1956. + +4. Munkres, J. Algorithms for the Assignment and Transportation Problems. + *Journal of the Society of Industrial and Applied Mathematics*, + 5(1):32-38, March, 1957. + +5. http://en.wikipedia.org/wiki/Hungarian_algorithm + +Copyright and License +===================== + +This software is released under a BSD license, adapted from + + +Copyright (c) 2008 Brian M. Clapper +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name "clapper.org" nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" + +__docformat__ = 'restructuredtext' + +# --------------------------------------------------------------------------- +# Imports +# --------------------------------------------------------------------------- + +import sys + +# --------------------------------------------------------------------------- +# Exports +# --------------------------------------------------------------------------- + +__all__ = ['Munkres', 'make_cost_matrix'] + +# --------------------------------------------------------------------------- +# Globals +# --------------------------------------------------------------------------- + +# Info about the module +__version__ = "1.0.5.4" +__author__ = "Brian Clapper, bmc@clapper.org" +__url__ = "http://software.clapper.org/munkres/" +__copyright__ = "(c) 2008 Brian M. Clapper" +__license__ = "BSD-style license" + +# --------------------------------------------------------------------------- +# Classes +# --------------------------------------------------------------------------- + +class Munkres: + """ + Calculate the Munkres solution to the classical assignment problem. + See the module documentation for usage. + """ + + def __init__(self): + """Create a new instance""" + self.C = None + self.row_covered = [] + self.col_covered = [] + self.n = 0 + self.Z0_r = 0 + self.Z0_c = 0 + self.marked = None + self.path = None + + def make_cost_matrix(profit_matrix, inversion_function): + """ + **DEPRECATED** + + Please use the module function ``make_cost_matrix()``. + """ + import munkres + return munkres.make_cost_matrix(profit_matrix, inversion_function) + + make_cost_matrix = staticmethod(make_cost_matrix) + + def pad_matrix(self, matrix, pad_value=0): + """ + Pad a possibly non-square matrix to make it square. + + :Parameters: + matrix : list of lists + matrix to pad + + pad_value : int + value to use to pad the matrix + + :rtype: list of lists + :return: a new, possibly padded, matrix + """ + max_columns = 0 + total_rows = len(matrix) + + for row in matrix: + max_columns = max(max_columns, len(row)) + + total_rows = max(max_columns, total_rows) + + new_matrix = [] + for row in matrix: + row_len = len(row) + new_row = row[:] + if total_rows > row_len: + # Row too short. Pad it. + new_row += [0] * (total_rows - row_len) + new_matrix += [new_row] + + while len(new_matrix) < total_rows: + new_matrix += [[0] * total_rows] + + return new_matrix + + def compute(self, cost_matrix): + """ + Compute the indexes for the lowest-cost pairings between rows and + columns in the database. Returns a list of (row, column) tuples + that can be used to traverse the matrix. + + :Parameters: + cost_matrix : list of lists + The cost matrix. If this cost matrix is not square, it + will be padded with zeros, via a call to ``pad_matrix()``. + (This method does *not* modify the caller's matrix. It + operates on a copy of the matrix.) + + **WARNING**: This code handles square and rectangular + matrices. It does *not* handle irregular matrices. + + :rtype: list + :return: A list of ``(row, column)`` tuples that describe the lowest + cost path through the matrix + + """ + self.C = self.pad_matrix(cost_matrix) + self.n = len(self.C) + self.original_length = len(cost_matrix) + self.original_width = len(cost_matrix[0]) + self.row_covered = [False for i in range(self.n)] + self.col_covered = [False for i in range(self.n)] + self.Z0_r = 0 + self.Z0_c = 0 + self.path = self.__make_matrix(self.n * 2, 0) + self.marked = self.__make_matrix(self.n, 0) + + done = False + step = 1 + + steps = { 1 : self.__step1, + 2 : self.__step2, + 3 : self.__step3, + 4 : self.__step4, + 5 : self.__step5, + 6 : self.__step6 } + + while not done: + try: + func = steps[step] + step = func() + except KeyError: + done = True + + # Look for the starred columns + results = [] + for i in range(self.original_length): + for j in range(self.original_width): + if self.marked[i][j] == 1: + results += [(i, j)] + + return results + + def __copy_matrix(self, matrix): + """Return an exact copy of the supplied matrix""" + return copy.deepcopy(matrix) + + def __make_matrix(self, n, val): + """Create an *n*x*n* matrix, populating it with the specific value.""" + matrix = [] + for i in range(n): + matrix += [[val for j in range(n)]] + return matrix + + def __step1(self): + """ + For each row of the matrix, find the smallest element and + subtract it from every element in its row. Go to Step 2. + """ + C = self.C + n = self.n + for i in range(n): + minval = min(self.C[i]) + # Find the minimum value for this row and subtract that minimum + # from every element in the row. + for j in range(n): + self.C[i][j] -= minval + + return 2 + + def __step2(self): + """ + Find a zero (Z) in the resulting matrix. If there is no starred + zero in its row or column, star Z. Repeat for each element in the + matrix. Go to Step 3. + """ + n = self.n + for i in range(n): + for j in range(n): + if (self.C[i][j] == 0) and \ + (not self.col_covered[j]) and \ + (not self.row_covered[i]): + self.marked[i][j] = 1 + self.col_covered[j] = True + self.row_covered[i] = True + + self.__clear_covers() + return 3 + + def __step3(self): + """ + Cover each column containing a starred zero. If K columns are + covered, the starred zeros describe a complete set of unique + assignments. In this case, Go to DONE, otherwise, Go to Step 4. + """ + n = self.n + count = 0 + for i in range(n): + for j in range(n): + if self.marked[i][j] == 1: + self.col_covered[j] = True + count += 1 + + if count >= n: + step = 7 # done + else: + step = 4 + + return step + + def __step4(self): + """ + Find a noncovered zero and prime it. If there is no starred zero + in the row containing this primed zero, Go to Step 5. Otherwise, + cover this row and uncover the column containing the starred + zero. Continue in this manner until there are no uncovered zeros + left. Save the smallest uncovered value and Go to Step 6. + """ + step = 0 + done = False + row = -1 + col = -1 + star_col = -1 + while not done: + (row, col) = self.__find_a_zero() + if row < 0: + done = True + step = 6 + else: + self.marked[row][col] = 2 + star_col = self.__find_star_in_row(row) + if star_col >= 0: + col = star_col + self.row_covered[row] = True + self.col_covered[col] = False + else: + done = True + self.Z0_r = row + self.Z0_c = col + step = 5 + + return step + + def __step5(self): + """ + Construct a series of alternating primed and starred zeros as + follows. Let Z0 represent the uncovered primed zero found in Step 4. + Let Z1 denote the starred zero in the column of Z0 (if any). + Let Z2 denote the primed zero in the row of Z1 (there will always + be one). Continue until the series terminates at a primed zero + that has no starred zero in its column. Unstar each starred zero + of the series, star each primed zero of the series, erase all + primes and uncover every line in the matrix. Return to Step 3 + """ + count = 0 + path = self.path + path[count][0] = self.Z0_r + path[count][1] = self.Z0_c + done = False + while not done: + row = self.__find_star_in_col(path[count][1]) + if row >= 0: + count += 1 + path[count][0] = row + path[count][1] = path[count-1][1] + else: + done = True + + if not done: + col = self.__find_prime_in_row(path[count][0]) + count += 1 + path[count][0] = path[count-1][0] + path[count][1] = col + + self.__convert_path(path, count) + self.__clear_covers() + self.__erase_primes() + return 3 + + def __step6(self): + """ + Add the value found in Step 4 to every element of each covered + row, and subtract it from every element of each uncovered column. + Return to Step 4 without altering any stars, primes, or covered + lines. + """ + minval = self.__find_smallest() + for i in range(self.n): + for j in range(self.n): + if self.row_covered[i]: + self.C[i][j] += minval + if not self.col_covered[j]: + self.C[i][j] -= minval + return 4 + + def __find_smallest(self): + """Find the smallest uncovered value in the matrix.""" + minval = sys.maxint + for i in range(self.n): + for j in range(self.n): + if (not self.row_covered[i]) and (not self.col_covered[j]): + if minval > self.C[i][j]: + minval = self.C[i][j] + return minval + + def __find_a_zero(self): + """Find the first uncovered element with value 0""" + row = -1 + col = -1 + i = 0 + n = self.n + done = False + + while not done: + j = 0 + while True: + if (self.C[i][j] == 0) and \ + (not self.row_covered[i]) and \ + (not self.col_covered[j]): + row = i + col = j + done = True + j += 1 + if j >= n: + break + i += 1 + if i >= n: + done = True + + return (row, col) + + def __find_star_in_row(self, row): + """ + Find the first starred element in the specified row. Returns + the column index, or -1 if no starred element was found. + """ + col = -1 + for j in range(self.n): + if self.marked[row][j] == 1: + col = j + break + + return col + + def __find_star_in_col(self, col): + """ + Find the first starred element in the specified row. Returns + the row index, or -1 if no starred element was found. + """ + row = -1 + for i in range(self.n): + if self.marked[i][col] == 1: + row = i + break + + return row + + def __find_prime_in_row(self, row): + """ + Find the first prime element in the specified row. Returns + the column index, or -1 if no starred element was found. + """ + col = -1 + for j in range(self.n): + if self.marked[row][j] == 2: + col = j + break + + return col + + def __convert_path(self, path, count): + for i in range(count+1): + if self.marked[path[i][0]][path[i][1]] == 1: + self.marked[path[i][0]][path[i][1]] = 0 + else: + self.marked[path[i][0]][path[i][1]] = 1 + + def __clear_covers(self): + """Clear all covered matrix cells""" + for i in range(self.n): + self.row_covered[i] = False + self.col_covered[i] = False + + def __erase_primes(self): + """Erase all prime markings""" + for i in range(self.n): + for j in range(self.n): + if self.marked[i][j] == 2: + self.marked[i][j] = 0 + +# --------------------------------------------------------------------------- +# Functions +# --------------------------------------------------------------------------- + +def make_cost_matrix(profit_matrix, inversion_function): + """ + Create a cost matrix from a profit matrix by calling + 'inversion_function' to invert each value. The inversion + function must take one numeric argument (of any type) and return + another numeric argument which is presumed to be the cost inverse + of the original profit. + + This is a static method. Call it like this: + + .. python:: + + cost_matrix = Munkres.make_cost_matrix(matrix, inversion_func) + + For example: + + .. python:: + + cost_matrix = Munkres.make_cost_matrix(matrix, lambda x : sys.maxint - x) + + :Parameters: + profit_matrix : list of lists + The matrix to convert from a profit to a cost matrix + + inversion_function : function + The function to use to invert each entry in the profit matrix + + :rtype: list of lists + :return: The converted matrix + """ + cost_matrix = [] + for row in profit_matrix: + cost_matrix.append([inversion_function(value) for value in row]) + return cost_matrix + +def print_matrix(matrix, msg=None): + """ + Convenience function: Displays the contents of a matrix of integers. + + :Parameters: + matrix : list of lists + Matrix to print + + msg : str + Optional message to print before displaying the matrix + """ + import math + + if msg is not None: + print msg + + # Calculate the appropriate format width. + width = 0 + for row in matrix: + for val in row: + width = max(width, int(math.log10(val)) + 1) + + # Make the format string + format = '%%%dd' % width + + # Print the matrix + for row in matrix: + sep = '[' + for val in row: + sys.stdout.write(sep + format % val) + sep = ', ' + sys.stdout.write(']\n') + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +if __name__ == '__main__': + + + matrices = [ + # Square + ([[400, 150, 400], + [400, 450, 600], + [300, 225, 300]], + 850 # expected cost + ), + + # Rectangular variant + ([[400, 150, 400, 1], + [400, 450, 600, 2], + [300, 225, 300, 3]], + 452 # expected cost + ), + + # Square + ([[10, 10, 8], + [ 9, 8, 1], + [ 9, 7, 4]], + 18 + ), + + # Rectangular variant + ([[10, 10, 8, 11], + [ 9, 8, 1, 1], + [ 9, 7, 4, 10]], + 15 + ), + ] + + m = Munkres() + for cost_matrix, expected_total in matrices: + print_matrix(cost_matrix, msg='cost matrix') + indexes = m.compute(cost_matrix) + total_cost = 0 + for r, c in indexes: + x = cost_matrix[r][c] + total_cost += x + print '(%d, %d) -> %d' % (r, c, x) + print 'lowest cost=%d' % total_cost + assert expected_total == total_cost + diff --git a/lib/mutagen/__init__.py b/lib/mutagen/__init__.py new file mode 100644 index 00000000..8eedf2d8 --- /dev/null +++ b/lib/mutagen/__init__.py @@ -0,0 +1,217 @@ +#! /usr/bin/env python +# +# mutagen aims to be an all purpose media tagging library +# Copyright (C) 2005 Michael Urman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# $Id: __init__.py 4348 2008-12-02 02:41:15Z piman $ +# + +"""Mutagen aims to be an all purpose tagging library. + + import mutagen.[format] + metadata = mutagen.[format].Open(filename) + +metadata acts like a dictionary of tags in the file. Tags are generally a +list of string-like values, but may have additional methods available +depending on tag or format. They may also be entirely different objects +for certain keys, again depending on format. +""" + +version = (1, 20) +version_string = ".".join(map(str, version)) + +import warnings + +from lib.mutagen import _util + +class Metadata(object): + """An abstract dict-like object. + + Metadata is the base class for many of the tag objects in Mutagen. + """ + + def __init__(self, *args, **kwargs): + if args or kwargs: + self.load(*args, **kwargs) + + def load(self, *args, **kwargs): + raise NotImplementedError + + def save(self, filename=None): + raise NotImplementedError + + def delete(self, filename=None): + raise NotImplementedError + +class FileType(_util.DictMixin): + """An abstract object wrapping tags and audio stream information. + + Attributes: + info -- stream information (length, bitrate, sample rate) + tags -- metadata tags, if any + + Each file format has different potential tags and stream + information. + + FileTypes implement an interface very similar to Metadata; the + dict interface, save, load, and delete calls on a FileType call + the appropriate methods on its tag data. + """ + + info = None + tags = None + filename = None + _mimes = ["application/octet-stream"] + + def __init__(self, filename=None, *args, **kwargs): + if filename is None: + warnings.warn("FileType constructor requires a filename", + DeprecationWarning) + else: + self.load(filename, *args, **kwargs) + + def load(self, filename, *args, **kwargs): + raise NotImplementedError + + def __getitem__(self, key): + """Look up a metadata tag key. + + If the file has no tags at all, a KeyError is raised. + """ + if self.tags is None: raise KeyError, key + else: return self.tags[key] + + def __setitem__(self, key, value): + """Set a metadata tag. + + If the file has no tags, an appropriate format is added (but + not written until save is called). + """ + if self.tags is None: + self.add_tags() + self.tags[key] = value + + def __delitem__(self, key): + """Delete a metadata tag key. + + If the file has no tags at all, a KeyError is raised. + """ + if self.tags is None: raise KeyError, key + else: del(self.tags[key]) + + def keys(self): + """Return a list of keys in the metadata tag. + + If the file has no tags at all, an empty list is returned. + """ + if self.tags is None: return [] + else: return self.tags.keys() + + def delete(self, filename=None): + """Remove tags from a file.""" + if self.tags is not None: + if filename is None: + filename = self.filename + else: + warnings.warn( + "delete(filename=...) is deprecated, reload the file", + DeprecationWarning) + return self.tags.delete(filename) + + def save(self, filename=None, **kwargs): + """Save metadata tags.""" + if filename is None: + filename = self.filename + else: + warnings.warn( + "save(filename=...) is deprecated, reload the file", + DeprecationWarning) + if self.tags is not None: + return self.tags.save(filename, **kwargs) + else: raise ValueError("no tags in file") + + def pprint(self): + """Print stream information and comment key=value pairs.""" + stream = "%s (%s)" % (self.info.pprint(), self.mime[0]) + try: tags = self.tags.pprint() + except AttributeError: + return stream + else: return stream + ((tags and "\n" + tags) or "") + + def add_tags(self): + raise NotImplementedError + + def __get_mime(self): + mimes = [] + for Kind in type(self).__mro__: + for mime in getattr(Kind, '_mimes', []): + if mime not in mimes: + mimes.append(mime) + return mimes + + mime = property(__get_mime) + +def File(filename, options=None, easy=False): + """Guess the type of the file and try to open it. + + The file type is decided by several things, such as the first 128 + bytes (which usually contains a file type identifier), the + filename extension, and the presence of existing tags. + + If no appropriate type could be found, None is returned. + """ + + if options is None: + from lib.mutagen.asf import ASF + from lib.mutagen.apev2 import APEv2File + from lib.mutagen.flac import FLAC + if easy: + from lib.mutagen.easyid3 import EasyID3FileType as ID3FileType + else: + from lib.mutagen.id3 import ID3FileType + if easy: + from lib.mutagen.mp3 import EasyMP3 as MP3 + else: + from lib.mutagen.mp3 import MP3 + from lib.mutagen.oggflac import OggFLAC + from lib.mutagen.oggspeex import OggSpeex + from lib.mutagen.oggtheora import OggTheora + from lib.mutagen.oggvorbis import OggVorbis + if easy: + from lib.mutagen.trueaudio import EasyTrueAudio as TrueAudio + else: + from lib.mutagen.trueaudio import TrueAudio + from lib.mutagen.wavpack import WavPack + if easy: + from lib.mutagen.easymp4 import EasyMP4 as MP4 + else: + from lib.mutagen.mp4 import MP4 + from lib.mutagen.musepack import Musepack + from lib.mutagen.monkeysaudio import MonkeysAudio + from lib.mutagen.optimfrog import OptimFROG + options = [MP3, TrueAudio, OggTheora, OggSpeex, OggVorbis, OggFLAC, + FLAC, APEv2File, MP4, ID3FileType, WavPack, Musepack, + MonkeysAudio, OptimFROG, ASF] + + if not options: + return None + + fileobj = file(filename, "rb") + try: + header = fileobj.read(128) + # Sort by name after score. Otherwise import order affects + # Kind sort order, which affects treatment of things with + # equals scores. + results = [(Kind.score(filename, fileobj, header), Kind.__name__) + for Kind in options] + finally: + fileobj.close() + results = zip(results, options) + results.sort() + (score, name), Kind = results[-1] + if score > 0: return Kind(filename) + else: return None diff --git a/lib/mutagen/_constants.py b/lib/mutagen/_constants.py new file mode 100644 index 00000000..2381e979 --- /dev/null +++ b/lib/mutagen/_constants.py @@ -0,0 +1,153 @@ +"""Constants used by Mutagen.""" + +GENRES = [ + u"Blues", + u"Classic Rock", + u"Country", + u"Dance", + u"Disco", + u"Funk", + u"Grunge", + u"Hip-Hop", + u"Jazz", + u"Metal", + u"New Age", + u"Oldies", + u"Other", + u"Pop", + u"R&B", + u"Rap", + u"Reggae", + u"Rock", + u"Techno", + u"Industrial", + u"Alternative", + u"Ska", + u"Death Metal", + u"Pranks", + u"Soundtrack", + u"Euro-Techno", + u"Ambient", + u"Trip-Hop", + u"Vocal", + u"Jazz+Funk", + u"Fusion", + u"Trance", + u"Classical", + u"Instrumental", + u"Acid", + u"House", + u"Game", + u"Sound Clip", + u"Gospel", + u"Noise", + u"Alt. Rock", + u"Bass", + u"Soul", + u"Punk", + u"Space", + u"Meditative", + u"Instrumental Pop", + u"Instrumental Rock", + u"Ethnic", + u"Gothic", + u"Darkwave", + u"Techno-Industrial", + u"Electronic", + u"Pop-Folk", + u"Eurodance", + u"Dream", + u"Southern Rock", + u"Comedy", + u"Cult", + u"Gangsta", + u"Top 40", + u"Christian Rap", + u"Pop/Funk", + u"Jungle", + u"Native American", + u"Cabaret", + u"New Wave", + u"Psychadelic", + u"Rave", + u"Showtunes", + u"Trailer", + u"Lo-Fi", + u"Tribal", + u"Acid Punk", + u"Acid Jazz", + u"Polka", + u"Retro", + u"Musical", + u"Rock & Roll", + u"Hard Rock", + u"Folk", + u"Folk/Rock", + u"National Folk", + u"Swing", + u"Fusion", + u"Bebob", + u"Latin", + u"Revival", + u"Celtic", + u"Bluegrass", + u"Avantgarde", + u"Gothic Rock", + u"Progressive Rock", + u"Psychadelic Rock", + u"Symphonic Rock", + u"Slow Rock", + u"Big Band", + u"Chorus", + u"Easy Listening", + u"Acoustic", + u"Humour", + u"Speech", + u"Chanson", + u"Opera", + u"Chamber Music", + u"Sonata", + u"Symphony", + u"Booty Bass", + u"Primus", + u"Porn Groove", + u"Satire", + u"Slow Jam", + u"Club", + u"Tango", + u"Samba", + u"Folklore", + u"Ballad", + u"Power Ballad", + u"Rhythmic Soul", + u"Freestyle", + u"Duet", + u"Punk Rock", + u"Drum Solo", + u"A Capella", + u"Euro-House", + u"Dance Hall", + u"Goa", + u"Drum & Bass", + u"Club-House", + u"Hardcore", + u"Terror", + u"Indie", + u"BritPop", + u"Negerpunk", + u"Polsk Punk", + u"Beat", + u"Christian Gangsta Rap", + u"Heavy Metal", + u"Black Metal", + u"Crossover", + u"Contemporary Christian", + u"Christian Rock", + u"Merengue", + u"Salsa", + u"Thrash Metal", + u"Anime", + u"Jpop", + u"Synthpop" + ] +"""The ID3v1 genre list.""" diff --git a/lib/mutagen/_util.py b/lib/mutagen/_util.py new file mode 100644 index 00000000..79f8f846 --- /dev/null +++ b/lib/mutagen/_util.py @@ -0,0 +1,314 @@ +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: _util.py 4218 2007-12-02 06:11:20Z piman $ + +"""Utility classes for Mutagen. + +You should not rely on the interfaces here being stable. They are +intended for internal use in Mutagen only. +""" + +import struct + +from fnmatch import fnmatchcase + +class DictMixin(object): + """Implement the dict API using keys() and __*item__ methods. + + Similar to UserDict.DictMixin, this takes a class that defines + __getitem__, __setitem__, __delitem__, and keys(), and turns it + into a full dict-like object. + + UserDict.DictMixin is not suitable for this purpose because it's + an old-style class. + + This class is not optimized for very large dictionaries; many + functions have linear memory requirements. I recommend you + override some of these functions if speed is required. + """ + + def __iter__(self): + return iter(self.keys()) + + def has_key(self, key): + try: self[key] + except KeyError: return False + else: return True + __contains__ = has_key + + iterkeys = lambda self: iter(self.keys()) + + def values(self): + return map(self.__getitem__, self.keys()) + itervalues = lambda self: iter(self.values()) + + def items(self): + return zip(self.keys(), self.values()) + iteritems = lambda s: iter(s.items()) + + def clear(self): + map(self.__delitem__, self.keys()) + + def pop(self, key, *args): + if len(args) > 1: + raise TypeError("pop takes at most two arguments") + try: value = self[key] + except KeyError: + if args: return args[0] + else: raise + del(self[key]) + return value + + def popitem(self): + try: + key = self.keys()[0] + return key, self.pop(key) + except IndexError: raise KeyError("dictionary is empty") + + def update(self, other=None, **kwargs): + if other is None: + self.update(kwargs) + other = {} + + try: map(self.__setitem__, other.keys(), other.values()) + except AttributeError: + for key, value in other: + self[key] = value + + def setdefault(self, key, default=None): + try: return self[key] + except KeyError: + self[key] = default + return default + + def get(self, key, default=None): + try: return self[key] + except KeyError: return default + + def __repr__(self): + return repr(dict(self.items())) + + def __cmp__(self, other): + if other is None: return 1 + else: return cmp(dict(self.items()), other) + + def __len__(self): + return len(self.keys()) + +class DictProxy(DictMixin): + def __init__(self, *args, **kwargs): + self.__dict = {} + super(DictProxy, self).__init__(*args, **kwargs) + + def __getitem__(self, key): + return self.__dict[key] + + def __setitem__(self, key, value): + self.__dict[key] = value + + def __delitem__(self, key): + del(self.__dict[key]) + + def keys(self): + return self.__dict.keys() + +class cdata(object): + """C character buffer to Python numeric type conversions.""" + + from struct import error + + short_le = staticmethod(lambda data: struct.unpack('h', data)[0]) + ushort_be = staticmethod(lambda data: struct.unpack('>H', data)[0]) + + int_le = staticmethod(lambda data: struct.unpack('i', data)[0]) + uint_be = staticmethod(lambda data: struct.unpack('>I', data)[0]) + + longlong_le = staticmethod(lambda data: struct.unpack('q', data)[0]) + ulonglong_be = staticmethod(lambda data: struct.unpack('>Q', data)[0]) + + to_short_le = staticmethod(lambda data: struct.pack('h', data)) + to_ushort_be = staticmethod(lambda data: struct.pack('>H', data)) + + to_int_le = staticmethod(lambda data: struct.pack('i', data)) + to_uint_be = staticmethod(lambda data: struct.pack('>I', data)) + + to_longlong_le = staticmethod(lambda data: struct.pack('q', data)) + to_ulonglong_be = staticmethod(lambda data: struct.pack('>Q', data)) + + bitswap = ''.join([chr(sum([((val >> i) & 1) << (7-i) for i in range(8)])) + for val in range(256)]) + del(i) + del(val) + + test_bit = staticmethod(lambda value, n: bool((value >> n) & 1)) + +def lock(fileobj): + """Lock a file object 'safely'. + + That means a failure to lock because the platform doesn't + support fcntl or filesystem locks is not considered a + failure. This call does block. + + Returns whether or not the lock was successful, or + raises an exception in more extreme circumstances (full + lock table, invalid file). + """ + try: import fcntl + except ImportError: + return False + else: + try: fcntl.lockf(fileobj, fcntl.LOCK_EX) + except IOError: + # FIXME: There's possibly a lot of complicated + # logic that needs to go here in case the IOError + # is EACCES or EAGAIN. + return False + else: + return True + +def unlock(fileobj): + """Unlock a file object. + + Don't call this on a file object unless a call to lock() + returned true. + """ + # If this fails there's a mismatched lock/unlock pair, + # so we definitely don't want to ignore errors. + import fcntl + fcntl.lockf(fileobj, fcntl.LOCK_UN) + +def insert_bytes(fobj, size, offset, BUFFER_SIZE=2**16): + """Insert size bytes of empty space starting at offset. + + fobj must be an open file object, open rb+ or + equivalent. Mutagen tries to use mmap to resize the file, but + falls back to a significantly slower method if mmap fails. + """ + assert 0 < size + assert 0 <= offset + locked = False + fobj.seek(0, 2) + filesize = fobj.tell() + movesize = filesize - offset + fobj.write('\x00' * size) + fobj.flush() + try: + try: + import mmap + map = mmap.mmap(fobj.fileno(), filesize + size) + try: map.move(offset + size, offset, movesize) + finally: map.close() + except (ValueError, EnvironmentError, ImportError): + # handle broken mmap scenarios + locked = lock(fobj) + fobj.truncate(filesize) + + fobj.seek(0, 2) + padsize = size + # Don't generate an enormous string if we need to pad + # the file out several megs. + while padsize: + addsize = min(BUFFER_SIZE, padsize) + fobj.write("\x00" * addsize) + padsize -= addsize + + fobj.seek(filesize, 0) + while movesize: + # At the start of this loop, fobj is pointing at the end + # of the data we need to move, which is of movesize length. + thismove = min(BUFFER_SIZE, movesize) + # Seek back however much we're going to read this frame. + fobj.seek(-thismove, 1) + nextpos = fobj.tell() + # Read it, so we're back at the end. + data = fobj.read(thismove) + # Seek back to where we need to write it. + fobj.seek(-thismove + size, 1) + # Write it. + fobj.write(data) + # And seek back to the end of the unmoved data. + fobj.seek(nextpos) + movesize -= thismove + + fobj.flush() + finally: + if locked: + unlock(fobj) + +def delete_bytes(fobj, size, offset, BUFFER_SIZE=2**16): + """Delete size bytes of empty space starting at offset. + + fobj must be an open file object, open rb+ or + equivalent. Mutagen tries to use mmap to resize the file, but + falls back to a significantly slower method if mmap fails. + """ + locked = False + assert 0 < size + assert 0 <= offset + fobj.seek(0, 2) + filesize = fobj.tell() + movesize = filesize - offset - size + assert 0 <= movesize + try: + if movesize > 0: + fobj.flush() + try: + import mmap + map = mmap.mmap(fobj.fileno(), filesize) + try: map.move(offset, offset + size, movesize) + finally: map.close() + except (ValueError, EnvironmentError, ImportError): + # handle broken mmap scenarios + locked = lock(fobj) + fobj.seek(offset + size) + buf = fobj.read(BUFFER_SIZE) + while buf: + fobj.seek(offset) + fobj.write(buf) + offset += len(buf) + fobj.seek(offset + size) + buf = fobj.read(BUFFER_SIZE) + fobj.truncate(filesize - size) + fobj.flush() + finally: + if locked: + unlock(fobj) + +def utf8(data): + """Convert a basestring to a valid UTF-8 str.""" + if isinstance(data, str): + return data.decode("utf-8", "replace").encode("utf-8") + elif isinstance(data, unicode): + return data.encode("utf-8") + else: raise TypeError("only unicode/str types can be converted to UTF-8") + +def dict_match(d, key, default=None): + try: + return d[key] + except KeyError: + for pattern, value in d.iteritems(): + if fnmatchcase(key, pattern): + return value + return default diff --git a/lib/mutagen/_vorbis.py b/lib/mutagen/_vorbis.py new file mode 100644 index 00000000..1d88860c --- /dev/null +++ b/lib/mutagen/_vorbis.py @@ -0,0 +1,223 @@ +# Vorbis comment support for Mutagen +# Copyright 2005-2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. + +"""Read and write Vorbis comment data. + +Vorbis comments are freeform key/value pairs; keys are +case-insensitive ASCII and values are Unicode strings. A key may have +multiple values. + +The specification is at http://www.xiph.org/vorbis/doc/v-comment.html. +""" + +import sys + +from cStringIO import StringIO + +import lib.mutagen +from lib.mutagen._util import DictMixin, cdata + +try: set +except NameError: + from sets import Set as set + +def is_valid_key(key): + """Return true if a string is a valid Vorbis comment key. + + Valid Vorbis comment keys are printable ASCII between 0x20 (space) + and 0x7D ('}'), excluding '='. + """ + for c in key: + if c < " " or c > "}" or c == "=": return False + else: return bool(key) +istag = is_valid_key + +class error(IOError): pass +class VorbisUnsetFrameError(error): pass +class VorbisEncodingError(error): pass + +class VComment(lib.mutagen.Metadata, list): + """A Vorbis comment parser, accessor, and renderer. + + All comment ordering is preserved. A VComment is a list of + key/value pairs, and so any Python list method can be used on it. + + Vorbis comments are always wrapped in something like an Ogg Vorbis + bitstream or a FLAC metadata block, so this loads string data or a + file-like object, not a filename. + + Attributes: + vendor -- the stream 'vendor' (i.e. writer); default 'Mutagen' + """ + + vendor = u"Mutagen " + lib.mutagen.version_string + + def __init__(self, data=None, *args, **kwargs): + # Collect the args to pass to load, this lets child classes + # override just load and get equivalent magic for the + # constructor. + if data is not None: + if isinstance(data, str): + data = StringIO(data) + elif not hasattr(data, 'read'): + raise TypeError("VComment requires string data or a file-like") + self.load(data, *args, **kwargs) + + def load(self, fileobj, errors='replace', framing=True): + """Parse a Vorbis comment from a file-like object. + + Keyword arguments: + errors: + 'strict', 'replace', or 'ignore'. This affects Unicode decoding + and how other malformed content is interpreted. + framing -- if true, fail if a framing bit is not present + + Framing bits are required by the Vorbis comment specification, + but are not used in FLAC Vorbis comment blocks. + + """ + try: + vendor_length = cdata.uint_le(fileobj.read(4)) + self.vendor = fileobj.read(vendor_length).decode('utf-8', errors) + count = cdata.uint_le(fileobj.read(4)) + for i in range(count): + length = cdata.uint_le(fileobj.read(4)) + try: string = fileobj.read(length).decode('utf-8', errors) + except (OverflowError, MemoryError): + raise error("cannot read %d bytes, too large" % length) + try: tag, value = string.split('=', 1) + except ValueError, err: + if errors == "ignore": + continue + elif errors == "replace": + tag, value = u"unknown%d" % i, string + else: + raise VorbisEncodingError, str(err), sys.exc_info()[2] + try: tag = tag.encode('ascii', errors) + except UnicodeEncodeError: + raise VorbisEncodingError, "invalid tag name %r" % tag + else: + if is_valid_key(tag): self.append((tag, value)) + if framing and not ord(fileobj.read(1)) & 0x01: + raise VorbisUnsetFrameError("framing bit was unset") + except (cdata.error, TypeError): + raise error("file is not a valid Vorbis comment") + + def validate(self): + """Validate keys and values. + + Check to make sure every key used is a valid Vorbis key, and + that every value used is a valid Unicode or UTF-8 string. If + any invalid keys or values are found, a ValueError is raised. + """ + + if not isinstance(self.vendor, unicode): + try: self.vendor.decode('utf-8') + except UnicodeDecodeError: raise ValueError + + for key, value in self: + try: + if not is_valid_key(key): raise ValueError + except: raise ValueError("%r is not a valid key" % key) + if not isinstance(value, unicode): + try: value.encode("utf-8") + except: raise ValueError("%r is not a valid value" % value) + else: return True + + def clear(self): + """Clear all keys from the comment.""" + del(self[:]) + + def write(self, framing=True): + """Return a string representation of the data. + + Validation is always performed, so calling this function on + invalid data may raise a ValueError. + + Keyword arguments: + framing -- if true, append a framing bit (see load) + """ + + self.validate() + + f = StringIO() + f.write(cdata.to_uint_le(len(self.vendor.encode('utf-8')))) + f.write(self.vendor.encode('utf-8')) + f.write(cdata.to_uint_le(len(self))) + for tag, value in self: + comment = "%s=%s" % (tag, value.encode('utf-8')) + f.write(cdata.to_uint_le(len(comment))) + f.write(comment) + if framing: f.write("\x01") + return f.getvalue() + + def pprint(self): + return "\n".join(["%s=%s" % (k.lower(), v) for k, v in self]) + +class VCommentDict(VComment, DictMixin): + """A VComment that looks like a dictionary. + + This object differs from a dictionary in two ways. First, + len(comment) will still return the number of values, not the + number of keys. Secondly, iterating through the object will + iterate over (key, value) pairs, not keys. Since a key may have + multiple values, the same value may appear multiple times while + iterating. + + Since Vorbis comment keys are case-insensitive, all keys are + normalized to lowercase ASCII. + """ + + def __getitem__(self, key): + """A list of values for the key. + + This is a copy, so comment['title'].append('a title') will not + work. + + """ + key = key.lower().encode('ascii') + values = [value for (k, value) in self if k.lower() == key] + if not values: raise KeyError, key + else: return values + + def __delitem__(self, key): + """Delete all values associated with the key.""" + key = key.lower().encode('ascii') + to_delete = filter(lambda x: x[0].lower() == key, self) + if not to_delete:raise KeyError, key + else: map(self.remove, to_delete) + + def __contains__(self, key): + """Return true if the key has any values.""" + key = key.lower().encode('ascii') + for k, value in self: + if k.lower() == key: return True + else: return False + + def __setitem__(self, key, values): + """Set a key's value or values. + + Setting a value overwrites all old ones. The value may be a + list of Unicode or UTF-8 strings, or a single Unicode or UTF-8 + string. + + """ + key = key.lower().encode('ascii') + if not isinstance(values, list): + values = [values] + try: del(self[key]) + except KeyError: pass + for value in values: + self.append((key, value)) + + def keys(self): + """Return all keys in the comment.""" + return self and list(set([k.lower() for k, v in self])) + + def as_dict(self): + """Return a copy of the comment data in a real dict.""" + return dict((key, self[key]) for key in self.keys()) diff --git a/lib/mutagen/apev2.py b/lib/mutagen/apev2.py new file mode 100644 index 00000000..d7e67010 --- /dev/null +++ b/lib/mutagen/apev2.py @@ -0,0 +1,465 @@ +# An APEv2 tag reader +# +# Copyright 2005 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: apev2.py 4008 2007-04-21 04:02:07Z piman $ + +"""APEv2 reading and writing. + +The APEv2 format is most commonly used with Musepack files, but is +also the format of choice for WavPack and other formats. Some MP3s +also have APEv2 tags, but this can cause problems with many MP3 +decoders and taggers. + +APEv2 tags, like Vorbis comments, are freeform key=value pairs. APEv2 +keys can be any ASCII string with characters from 0x20 to 0x7E, +between 2 and 255 characters long. Keys are case-sensitive, but +readers are recommended to be case insensitive, and it is forbidden to +multiple keys which differ only in case. Keys are usually stored +title-cased (e.g. 'Artist' rather than 'artist'). + +APEv2 values are slightly more structured than Vorbis comments; values +are flagged as one of text, binary, or an external reference (usually +a URI). + +Based off the format specification found at +http://wiki.hydrogenaudio.org/index.php?title=APEv2_specification. +""" + +__all__ = ["APEv2", "APEv2File", "Open", "delete"] + +import struct +from cStringIO import StringIO + +def is_valid_apev2_key(key): + return (2 <= len(key) <= 255 and min(key) >= ' ' and max(key) <= '~' and + key not in ["OggS", "TAG", "ID3", "MP+"]) + +# There are three different kinds of APE tag values. +# "0: Item contains text information coded in UTF-8 +# 1: Item contains binary information +# 2: Item is a locator of external stored information [e.g. URL] +# 3: reserved" +TEXT, BINARY, EXTERNAL = range(3) + +HAS_HEADER = 1L << 31 +HAS_NO_FOOTER = 1L << 30 +IS_HEADER = 1L << 29 + +class error(IOError): pass +class APENoHeaderError(error, ValueError): pass +class APEUnsupportedVersionError(error, ValueError): pass +class APEBadItemError(error, ValueError): pass + +from lib.mutagen import Metadata, FileType +from lib.mutagen._util import DictMixin, cdata, utf8, delete_bytes + +class _APEv2Data(object): + # Store offsets of the important parts of the file. + start = header = data = footer = end = None + # Footer or header; seek here and read 32 to get version/size/items/flags + metadata = None + # Actual tag data + tag = None + + version = None + size = None + items = None + flags = 0 + + # The tag is at the start rather than the end. A tag at both + # the start and end of the file (i.e. the tag is the whole file) + # is not considered to be at the start. + is_at_start = False + + def __init__(self, fileobj): + self.__find_metadata(fileobj) + self.metadata = max(self.header, self.footer) + if self.metadata is None: return + self.__fill_missing(fileobj) + self.__fix_brokenness(fileobj) + if self.data is not None: + fileobj.seek(self.data) + self.tag = fileobj.read(self.size) + + def __find_metadata(self, fileobj): + # Try to find a header or footer. + + # Check for a simple footer. + try: fileobj.seek(-32, 2) + except IOError: + fileobj.seek(0, 2) + return + if fileobj.read(8) == "APETAGEX": + fileobj.seek(-8, 1) + self.footer = self.metadata = fileobj.tell() + return + + # Check for an APEv2 tag followed by an ID3v1 tag at the end. + try: + fileobj.seek(-128, 2) + if fileobj.read(3) == "TAG": + + fileobj.seek(-35, 1) # "TAG" + header length + if fileobj.read(8) == "APETAGEX": + fileobj.seek(-8, 1) + self.footer = fileobj.tell() + return + + # ID3v1 tag at the end, maybe preceded by Lyrics3v2. + # (http://www.id3.org/lyrics3200.html) + # (header length - "APETAGEX") - "LYRICS200" + fileobj.seek(15, 1) + if fileobj.read(9) == 'LYRICS200': + fileobj.seek(-15, 1) # "LYRICS200" + size tag + try: offset = int(fileobj.read(6)) + except ValueError: + raise IOError + + fileobj.seek(-32 - offset - 6, 1) + if fileobj.read(8) == "APETAGEX": + fileobj.seek(-8, 1) + self.footer = fileobj.tell() + return + + except IOError: + pass + + # Check for a tag at the start. + fileobj.seek(0, 0) + if fileobj.read(8) == "APETAGEX": + self.is_at_start = True + self.header = 0 + + def __fill_missing(self, fileobj): + fileobj.seek(self.metadata + 8) + self.version = fileobj.read(4) + self.size = cdata.uint_le(fileobj.read(4)) + self.items = cdata.uint_le(fileobj.read(4)) + self.flags = cdata.uint_le(fileobj.read(4)) + + if self.header is not None: + self.data = self.header + 32 + # If we're reading the header, the size is the header + # offset + the size, which includes the footer. + self.end = self.data + self.size + fileobj.seek(self.end - 32, 0) + if fileobj.read(8) == "APETAGEX": + self.footer = self.end - 32 + elif self.footer is not None: + self.end = self.footer + 32 + self.data = self.end - self.size + if self.flags & HAS_HEADER: + self.header = self.data - 32 + else: + self.header = self.data + else: raise APENoHeaderError("No APE tag found") + + def __fix_brokenness(self, fileobj): + # Fix broken tags written with PyMusepack. + if self.header is not None: start = self.header + else: start = self.data + fileobj.seek(start) + + while start > 0: + # Clean up broken writing from pre-Mutagen PyMusepack. + # It didn't remove the first 24 bytes of header. + try: fileobj.seek(-24, 1) + except IOError: + break + else: + if fileobj.read(8) == "APETAGEX": + fileobj.seek(-8, 1) + start = fileobj.tell() + else: break + self.start = start + +class APEv2(DictMixin, Metadata): + """A file with an APEv2 tag. + + ID3v1 tags are silently ignored and overwritten. + """ + + filename = None + + def __init__(self, *args, **kwargs): + self.__casemap = {} + self.__dict = {} + super(APEv2, self).__init__(*args, **kwargs) + # Internally all names are stored as lowercase, but the case + # they were set with is remembered and used when saving. This + # is roughly in line with the standard, which says that keys + # are case-sensitive but two keys differing only in case are + # not allowed, and recommends case-insensitive + # implementations. + + def pprint(self): + """Return tag key=value pairs in a human-readable format.""" + items = self.items() + items.sort() + return "\n".join(["%s=%s" % (k, v.pprint()) for k, v in items]) + + def load(self, filename): + """Load tags from a filename.""" + self.filename = filename + fileobj = file(filename, "rb") + try: + data = _APEv2Data(fileobj) + finally: + fileobj.close() + if data.tag: + self.clear() + self.__casemap.clear() + self.__parse_tag(data.tag, data.items) + else: + raise APENoHeaderError("No APE tag found") + + def __parse_tag(self, tag, count): + fileobj = StringIO(tag) + + for i in range(count): + size = cdata.uint_le(fileobj.read(4)) + flags = cdata.uint_le(fileobj.read(4)) + + # Bits 1 and 2 bits are flags, 0-3 + # Bit 0 is read/write flag, ignored + kind = (flags & 6) >> 1 + if kind == 3: + raise APEBadItemError("value type must be 0, 1, or 2") + key = value = fileobj.read(1) + while key[-1:] != '\x00' and value: + value = fileobj.read(1) + key += value + if key[-1:] == "\x00": + key = key[:-1] + value = fileobj.read(size) + self[key] = APEValue(value, kind) + + def __getitem__(self, key): + if not is_valid_apev2_key(key): + raise KeyError("%r is not a valid APEv2 key" % key) + return self.__dict[key.lower()] + + def __delitem__(self, key): + if not is_valid_apev2_key(key): + raise KeyError("%r is not a valid APEv2 key" % key) + del(self.__dict[key.lower()]) + + def __setitem__(self, key, value): + """'Magic' value setter. + + This function tries to guess at what kind of value you want to + store. If you pass in a valid UTF-8 or Unicode string, it + treats it as a text value. If you pass in a list, it treats it + as a list of string/Unicode values. If you pass in a string + that is not valid UTF-8, it assumes it is a binary value. + + If you need to force a specific type of value (e.g. binary + data that also happens to be valid UTF-8, or an external + reference), use the APEValue factory and set the value to the + result of that: + from lib.mutagen.apev2 import APEValue, EXTERNAL + tag['Website'] = APEValue('http://example.org', EXTERNAL) + """ + + if not is_valid_apev2_key(key): + raise KeyError("%r is not a valid APEv2 key" % key) + + if not isinstance(value, _APEValue): + # let's guess at the content if we're not already a value... + if isinstance(value, unicode): + # unicode? we've got to be text. + value = APEValue(utf8(value), TEXT) + elif isinstance(value, list): + # list? text. + value = APEValue("\0".join(map(utf8, value)), TEXT) + else: + try: dummy = value.decode("utf-8") + except UnicodeError: + # invalid UTF8 text, probably binary + value = APEValue(value, BINARY) + else: + # valid UTF8, probably text + value = APEValue(value, TEXT) + self.__casemap[key.lower()] = key + self.__dict[key.lower()] = value + + def keys(self): + return [self.__casemap.get(key, key) for key in self.__dict.keys()] + + def save(self, filename=None): + """Save changes to a file. + + If no filename is given, the one most recently loaded is used. + + Tags are always written at the end of the file, and include + a header and a footer. + """ + + filename = filename or self.filename + try: + fileobj = file(filename, "r+b") + except IOError: + fileobj = file(filename, "w+b") + data = _APEv2Data(fileobj) + + if data.is_at_start: + delete_bytes(fileobj, data.end - data.start, data.start) + elif data.start is not None: + fileobj.seek(data.start) + # Delete an ID3v1 tag if present, too. + fileobj.truncate() + fileobj.seek(0, 2) + + # "APE tags items should be sorted ascending by size... This is + # not a MUST, but STRONGLY recommended. Actually the items should + # be sorted by importance/byte, but this is not feasible." + tags = [v._internal(k) for k, v in self.items()] + tags.sort(lambda a, b: cmp(len(a), len(b))) + num_tags = len(tags) + tags = "".join(tags) + + header = "APETAGEX%s%s" %( + # version, tag size, item count, flags + struct.pack("<4I", 2000, len(tags) + 32, num_tags, + HAS_HEADER | IS_HEADER), + "\0" * 8) + fileobj.write(header) + + fileobj.write(tags) + + footer = "APETAGEX%s%s" %( + # version, tag size, item count, flags + struct.pack("<4I", 2000, len(tags) + 32, num_tags, + HAS_HEADER), + "\0" * 8) + fileobj.write(footer) + fileobj.close() + + def delete(self, filename=None): + """Remove tags from a file.""" + filename = filename or self.filename + fileobj = file(filename, "r+b") + try: + data = _APEv2Data(fileobj) + if data.start is not None and data.size is not None: + delete_bytes(fileobj, data.end - data.start, data.start) + finally: + fileobj.close() + self.clear() + +Open = APEv2 + +def delete(filename): + """Remove tags from a file.""" + try: APEv2(filename).delete() + except APENoHeaderError: pass + +def APEValue(value, kind): + """APEv2 tag value factory. + + Use this if you need to specify the value's type manually. Binary + and text data are automatically detected by APEv2.__setitem__. + """ + if kind == TEXT: return APETextValue(value, kind) + elif kind == BINARY: return APEBinaryValue(value, kind) + elif kind == EXTERNAL: return APEExtValue(value, kind) + else: raise ValueError("kind must be TEXT, BINARY, or EXTERNAL") + +class _APEValue(object): + def __init__(self, value, kind): + self.kind = kind + self.value = value + + def __len__(self): + return len(self.value) + def __str__(self): + return self.value + + # Packed format for an item: + # 4B: Value length + # 4B: Value type + # Key name + # 1B: Null + # Key value + def _internal(self, key): + return "%s%s\0%s" %( + struct.pack("<2I", len(self.value), self.kind << 1), + key, self.value) + + def __repr__(self): + return "%s(%r, %d)" % (type(self).__name__, self.value, self.kind) + +class APETextValue(_APEValue): + """An APEv2 text value. + + Text values are Unicode/UTF-8 strings. They can be accessed like + strings (with a null seperating the values), or arrays of strings.""" + + def __unicode__(self): + return unicode(str(self), "utf-8") + + def __iter__(self): + """Iterate over the strings of the value (not the characters)""" + return iter(unicode(self).split("\0")) + + def __getitem__(self, index): + return unicode(self).split("\0")[index] + + def __len__(self): + return self.value.count("\0") + 1 + + def __cmp__(self, other): + return cmp(unicode(self), other) + + def __setitem__(self, index, value): + values = list(self) + values[index] = value.encode("utf-8") + self.value = "\0".join(values).encode("utf-8") + + def pprint(self): + return " / ".join(self) + +class APEBinaryValue(_APEValue): + """An APEv2 binary value.""" + + def pprint(self): return "[%d bytes]" % len(self) + +class APEExtValue(_APEValue): + """An APEv2 external value. + + External values are usually URI or IRI strings. + """ + def pprint(self): return "[External] %s" % unicode(self) + +class APEv2File(FileType): + class _Info(object): + length = 0 + bitrate = 0 + def __init__(self, fileobj): pass + pprint = staticmethod(lambda: "Unknown format with APEv2 tag.") + + def load(self, filename): + self.filename = filename + self.info = self._Info(file(filename, "rb")) + try: self.tags = APEv2(filename) + except error: self.tags = None + + def add_tags(self): + if self.tags is None: + self.tags = APEv2() + else: + raise ValueError("%r already has tags: %r" % (self, self.tags)) + + def score(filename, fileobj, header): + try: fileobj.seek(-160, 2) + except IOError: + fileobj.seek(0) + footer = fileobj.read() + filename = filename.lower() + return (("APETAGEX" in footer) - header.startswith("ID3")) + score = staticmethod(score) diff --git a/lib/mutagen/asf.py b/lib/mutagen/asf.py new file mode 100644 index 00000000..d7adfeb2 --- /dev/null +++ b/lib/mutagen/asf.py @@ -0,0 +1,670 @@ +# Copyright 2006-2007 Lukas Lalinsky +# Copyright 2005-2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: asf.py 4224 2007-12-03 09:01:49Z luks $ + +"""Read and write ASF (Window Media Audio) files.""" + +__all__ = ["ASF", "Open"] + +import struct +from lib.mutagen import FileType, Metadata +from lib.mutagen._util import insert_bytes, delete_bytes, DictMixin + +class error(IOError): pass +class ASFError(error): pass +class ASFHeaderError(error): pass + + +class ASFInfo(object): + """ASF stream information.""" + + def __init__(self): + self.length = 0.0 + self.sample_rate = 0 + self.bitrate = 0 + self.channels = 0 + + def pprint(self): + s = "Windows Media Audio %d bps, %s Hz, %d channels, %.2f seconds" % ( + self.bitrate, self.sample_rate, self.channels, self.length) + return s + + +class ASFTags(list, DictMixin, Metadata): + """Dictionary containing ASF attributes.""" + + def pprint(self): + return "\n".join(["%s=%s" % (k, v) for k, v in self]) + + def __getitem__(self, key): + """A list of values for the key. + + This is a copy, so comment['title'].append('a title') will not + work. + + """ + values = [value for (k, value) in self if k == key] + if not values: raise KeyError, key + else: return values + + def __delitem__(self, key): + """Delete all values associated with the key.""" + to_delete = filter(lambda x: x[0] == key, self) + if not to_delete: raise KeyError, key + else: map(self.remove, to_delete) + + def __contains__(self, key): + """Return true if the key has any values.""" + for k, value in self: + if k == key: return True + else: return False + + def __setitem__(self, key, values): + """Set a key's value or values. + + Setting a value overwrites all old ones. The value may be a + list of Unicode or UTF-8 strings, or a single Unicode or UTF-8 + string. + + """ + if not isinstance(values, list): + values = [values] + try: del(self[key]) + except KeyError: pass + for value in values: + if key in _standard_attribute_names: + value = unicode(value) + elif not isinstance(value, ASFBaseAttribute): + if isinstance(value, basestring): + value = ASFUnicodeAttribute(value) + elif isinstance(value, bool): + value = ASFBoolAttribute(value) + elif isinstance(value, int): + value = ASFDWordAttribute(value) + elif isinstance(value, long): + value = ASFQWordAttribute(value) + self.append((key, value)) + + def keys(self): + """Return all keys in the comment.""" + return self and set(zip(*self)[0]) + + def as_dict(self): + """Return a copy of the comment data in a real dict.""" + d = {} + for key, value in self: + d.setdefault(key, []).append(value) + return d + + +class ASFBaseAttribute(object): + """Generic attribute.""" + TYPE = None + + def __init__(self, value=None, data=None, language=None, + stream=None, **kwargs): + self.language = language + self.stream = stream + if data: + self.value = self.parse(data, **kwargs) + else: + self.value = value + + def data_size(self): + raise NotImplementedError + + def __repr__(self): + name = "%s(%r" % (type(self).__name__, self.value) + if self.language: + name += ", language=%d" % self.language + if self.stream: + name += ", stream=%d" % self.stream + name += ")" + return name + + def render(self, name): + name = name.encode("utf-16-le") + "\x00\x00" + data = self._render() + return (struct.pack(" 0: + texts.append(data[pos:end].decode("utf-16-le").strip("\x00")) + else: + texts.append(None) + pos = end + title, author, copyright, desc, rating = texts + for key, value in dict( + Title=title, + Author=author, + Copyright=copyright, + Description=desc, + Rating=rating).items(): + if value is not None: + asf.tags[key] = value + + def render(self, asf): + def render_text(name): + value = asf.tags.get(name, []) + if value: + return value[0].encode("utf-16-le") + "\x00\x00" + else: + return "" + texts = map(render_text, _standard_attribute_names) + data = struct.pack(" 0xFFFF + if (value.language is None and value.stream is None and + name not in self.to_extended_content_description and + not large_value): + self.to_extended_content_description[name] = value + elif (value.language is None and value.stream is not None and + name not in self.to_metadata and not large_value): + self.to_metadata[name] = value + else: + self.to_metadata_library.append((name, value)) + + # Add missing objects + if not self.content_description_obj: + self.content_description_obj = \ + ContentDescriptionObject() + self.objects.append(self.content_description_obj) + if not self.extended_content_description_obj: + self.extended_content_description_obj = \ + ExtendedContentDescriptionObject() + self.objects.append(self.extended_content_description_obj) + if not self.header_extension_obj: + self.header_extension_obj = \ + HeaderExtensionObject() + self.objects.append(self.header_extension_obj) + if not self.metadata_obj: + self.metadata_obj = \ + MetadataObject() + self.header_extension_obj.objects.append(self.metadata_obj) + if not self.metadata_library_obj: + self.metadata_library_obj = \ + MetadataLibraryObject() + self.header_extension_obj.objects.append(self.metadata_library_obj) + + # Render the header + data = "".join([obj.render(self) for obj in self.objects]) + data = (HeaderObject.GUID + + struct.pack(" self.size: + insert_bytes(fileobj, size - self.size, self.size) + if size < self.size: + delete_bytes(fileobj, self.size - size, 0) + fileobj.seek(0) + fileobj.write(data) + finally: + fileobj.close() + + def __read_file(self, fileobj): + header = fileobj.read(30) + if len(header) != 30 or header[:16] != HeaderObject.GUID: + raise ASFHeaderError, "Not an ASF file." + + self.extended_content_description_obj = None + self.content_description_obj = None + self.header_extension_obj = None + self.metadata_obj = None + self.metadata_library_obj = None + + self.size, self.num_objects = struct.unpack(" u'\x7f': + enc = 3 + id3.add(lib.mutagen.id3.TXXX(encoding=enc, text=value, desc=desc)) + else: + frame.text = value + + def deleter(id3, key): + del(id3[frameid]) + + cls.RegisterKey(key, getter, setter, deleter) + RegisterTXXXKey = classmethod(RegisterTXXXKey) + + def __init__(self, filename=None): + self.__id3 = ID3() + self.load = self.__id3.load + self.save = self.__id3.save + self.delete = self.__id3.delete + if filename is not None: + self.load(filename) + + filename = property(lambda s: s.__id3.filename, + lambda s, fn: setattr(s.__id3, 'filename', fn)) + + _size = property(lambda s: s._id3.size, + lambda s, fn: setattr(s.__id3, '_size', fn)) + + def __getitem__(self, key): + key = key.lower() + func = dict_match(self.Get, key, self.GetFallback) + if func is not None: + return func(self.__id3, key) + else: + raise EasyID3KeyError("%r is not a valid key" % key) + + def __setitem__(self, key, value): + key = key.lower() + if isinstance(value, basestring): + value = [value] + func = dict_match(self.Set, key, self.SetFallback) + if func is not None: + return func(self.__id3, key, value) + else: + raise EasyID3KeyError("%r is not a valid key" % key) + + def __delitem__(self, key): + key = key.lower() + func = dict_match(self.Delete, key, self.DeleteFallback) + if func is not None: + return func(self.__id3, key) + else: + raise EasyID3KeyError("%r is not a valid key" % key) + + def keys(self): + keys = [] + for key in self.Get.keys(): + if key in self.List: + keys.extend(self.List[key](self.__id3, key)) + elif key in self: + keys.append(key) + if self.ListFallback is not None: + keys.extend(self.ListFallback(self.__id3, "")) + return keys + + def pprint(self): + """Print tag key=value pairs.""" + strings = [] + for key in sorted(self.keys()): + values = self[key] + for value in values: + strings.append("%s=%s" % (key, value)) + return "\n".join(strings) + +Open = EasyID3 + +def genre_get(id3, key): + return id3["TCON"].genres + +def genre_set(id3, key, value): + try: + frame = id3["TCON"] + except KeyError: + id3.add(lib.mutagen.id3.TCON(encoding=3, text=value)) + else: + frame.encoding = 3 + frame.genres = value + +def genre_delete(id3, key): + del(id3["TCON"]) + +def date_get(id3, key): + return [stamp.text for stamp in id3["TDRC"].text] + +def date_set(id3, key, value): + id3.add(lib.mutagen.id3.TDRC(encoding=3, text=value)) + +def date_delete(id3, key): + del(id3["TDRC"]) + +def performer_get(id3, key): + people = [] + wanted_role = key.split(":", 1)[1] + try: + mcl = id3["TMCL"] + except KeyError: + raise KeyError(key) + for role, person in mcl.people: + if role == wanted_role: + people.append(person) + if people: + return people + else: + raise KeyError(key) + +def performer_set(id3, key, value): + wanted_role = key.split(":", 1)[1] + try: + mcl = id3["TMCL"] + except KeyError: + mcl = lib.mutagen.id3.TMCL(encoding=3, people=[]) + id3.add(mcl) + mcl.encoding = 3 + people = [p for p in mcl.people if p[0] != wanted_role] + for v in value: + people.append((wanted_role, v)) + mcl.people = people + +def performer_delete(id3, key): + wanted_role = key.split(":", 1)[1] + try: + mcl = id3["TMCL"] + except KeyError: + raise KeyError(key) + people = [p for p in mcl.people if p[0] != wanted_role] + if people == mcl.people: + raise KeyError(key) + elif people: + mcl.people = people + else: + del(id3["TMCL"]) + +def performer_list(id3, key): + try: mcl = id3["TMCL"] + except KeyError: + return [] + else: + return list(set("performer:" + p[0] for p in mcl.people)) + +def musicbrainz_trackid_get(id3, key): + return [id3["UFID:http://musicbrainz.org"].data.decode('ascii')] + +def musicbrainz_trackid_set(id3, key, value): + if len(value) != 1: + raise ValueError("only one track ID may be set per song") + value = value[0].encode('ascii') + try: + frame = id3["UFID:http://musicbrainz.org"] + except KeyError: + frame = lib.mutagen.id3.UFID(owner="http://musicbrainz.org", data=value) + id3.add(frame) + else: + frame.data = value + +def musicbrainz_trackid_delete(id3, key): + del(id3["UFID:http://musicbrainz.org"]) + +def website_get(id3, key): + urls = [frame.url for frame in id3.getall("WOAR")] + if urls: + return urls + else: + raise EasyID3KeyError(key) + +def website_set(id3, key, value): + id3.delall("WOAR") + for v in value: + id3.add(lib.mutagen.id3.WOAR(url=v)) + +def website_delete(id3, key): + id3.delall("WOAR") + +def gain_get(id3, key): + try: + frame = id3["RVA2:" + key[11:-5]] + except KeyError: + raise EasyID3KeyError(key) + else: + return [u"%+f dB" % frame.gain] + +def gain_set(id3, key, value): + if len(value) != 1: + raise ValueError("there must be exactly one gain value, not %r.", value) + gain = float(value[0].split()[0]) + try: + frame = id3["RVA2:" + key[11:-5]] + except KeyError: + frame = lib.mutagen.id3.RVA2(desc=key[11:-5], gain=0, peak=0, channel=1) + id3.add(frame) + frame.gain = gain + +def gain_delete(id3, key): + try: + frame = id3["RVA2:" + key[11:-5]] + except KeyError: + pass + else: + if frame.peak: + frame.gain = 0.0 + else: + del(id3["RVA2:" + key[11:-5]]) + +def peak_get(id3, key): + try: + frame = id3["RVA2:" + key[11:-5]] + except KeyError: + raise EasyID3KeyError(key) + else: + return [u"%f" % frame.peak] + +def peak_set(id3, key, value): + if len(value) != 1: + raise ValueError("there must be exactly one peak value, not %r.", value) + peak = float(value[0]) + if peak >= 2 or peak < 0: + raise ValueError("peak must be => 0 and < 2.") + try: + frame = id3["RVA2:" + key[11:-5]] + except KeyError: + frame = lib.mutagen.id3.RVA2(desc=key[11:-5], gain=0, peak=0, channel=1) + id3.add(frame) + frame.peak = peak + +def peak_delete(id3, key): + try: + frame = id3["RVA2:" + key[11:-5]] + except KeyError: + pass + else: + if frame.gain: + frame.peak = 0.0 + else: + del(id3["RVA2:" + key[11:-5]]) + +def peakgain_list(id3, key): + keys = [] + for frame in id3.getall("RVA2"): + keys.append("replaygain_%s_gain" % frame.desc) + keys.append("replaygain_%s_peak" % frame.desc) + return keys + +for frameid, key in { + "TALB": "album", + "TBPM": "bpm", + "TCMP": "compilation", # iTunes extension + "TCOM": "composer", + "TCOP": "copyright", + "TENC": "encodedby", + "TEXT": "lyricist", + "TLEN": "length", + "TMED": "media", + "TMOO": "mood", + "TIT2": "title", + "TIT3": "version", + "TPE1": "artist", + "TPE2": "performer", + "TPE3": "conductor", + "TPE4": "arranger", + "TPOS": "discnumber", + "TPUB": "organization", + "TRCK": "tracknumber", + "TOLY": "author", + "TSO2": "albumartistsort", # iTunes extension + "TSOA": "albumsort", + "TSOC": "composersort", # iTunes extension + "TSOP": "artistsort", + "TSOT": "titlesort", + "TSRC": "isrc", + "TSST": "discsubtitle", + }.iteritems(): + EasyID3.RegisterTextKey(key, frameid) + +EasyID3.RegisterKey("genre", genre_get, genre_set, genre_delete) +EasyID3.RegisterKey("date", date_get, date_set, date_delete) +EasyID3.RegisterKey( + "performer:*", performer_get, performer_set, performer_delete, + performer_list) +EasyID3.RegisterKey("musicbrainz_trackid", musicbrainz_trackid_get, + musicbrainz_trackid_set, musicbrainz_trackid_delete) +EasyID3.RegisterKey("website", website_get, website_set, website_delete) +EasyID3.RegisterKey("website", website_get, website_set, website_delete) +EasyID3.RegisterKey( + "replaygain_*_gain", gain_get, gain_set, gain_delete, peakgain_list) +EasyID3.RegisterKey("replaygain_*_peak", peak_get, peak_set, peak_delete) + +# At various times, information for this came from +# http://musicbrainz.org/docs/specs/metadata_tags.html +# http://bugs.musicbrainz.org/ticket/1383 +# http://musicbrainz.org/doc/MusicBrainzTag +for desc, key in { + u"MusicBrainz Artist Id": "musicbrainz_artistid", + u"MusicBrainz Album Id": "musicbrainz_albumid", + u"MusicBrainz Album Artist Id": "musicbrainz_albumartistid", + u"MusicBrainz TRM Id": "musicbrainz_trmid", + u"MusicIP PUID": "musicip_puid", + u"MusicMagic Fingerprint": "musicip_fingerprint", + u"MusicBrainz Album Status": "musicbrainz_albumstatus", + u"MusicBrainz Album Type": "musicbrainz_albumtype", + u"MusicBrainz Album Release Country": "releasecountry", + u"MusicBrainz Disc Id": "musicbrainz_discid", + u"ASIN": "asin", + u"ALBUMARTISTSORT": "albumartistsort", + u"BARCODE": "barcode", + }.iteritems(): + EasyID3.RegisterTXXXKey(key, desc) + +class EasyID3FileType(ID3FileType): + """Like ID3FileType, but uses EasyID3 for tags.""" + ID3 = EasyID3 diff --git a/lib/mutagen/easymp4.py b/lib/mutagen/easymp4.py new file mode 100644 index 00000000..bcec19a9 --- /dev/null +++ b/lib/mutagen/easymp4.py @@ -0,0 +1,249 @@ +import lib.mutagen.mp4 + +from lib.mutagen import Metadata +from lib.mutagen._util import DictMixin, dict_match, utf8 +from libmutagen.mp4 import MP4, MP4Tags, error, delete + +__all__ = ["EasyMP4Tags", "EasyMP4", "delete", "error"] + +class EasyMP4KeyError(error, KeyError, ValueError): + pass + +class EasyMP4Tags(DictMixin, Metadata): + """A file with MPEG-4 iTunes metadata. + + Like Vorbis comments, EasyMP4Tags keys are case-insensitive ASCII + strings, and values are a list of Unicode strings (and these lists + are always of length 0 or 1). If you need access to the full MP4 + metadata feature set, you should use MP4, not EasyMP4. + """ + + Set = {} + Get = {} + Delete = {} + List = {} + + def __init__(self, *args, **kwargs): + self.__mp4 = MP4Tags(*args, **kwargs) + self.load = self.__mp4.load + self.save = self.__mp4.save + self.delete = self.__mp4.delete + + filename = property(lambda s: s.__mp4.filename, + lambda s, fn: setattr(s.__mp4, 'filename', fn)) + + def RegisterKey(cls, key, + getter=None, setter=None, deleter=None, lister=None): + """Register a new key mapping. + + A key mapping is four functions, a getter, setter, deleter, + and lister. The key may be either a string or a glob pattern. + + The getter, deleted, and lister receive an MP4Tags instance + and the requested key name. The setter also receives the + desired value, which will be a list of strings. + + The getter, setter, and deleter are used to implement __getitem__, + __setitem__, and __delitem__. + + The lister is used to implement keys(). It should return a + list of keys that are actually in the MP4 instance, provided + by its associated getter. + """ + key = key.lower() + if getter is not None: + cls.Get[key] = getter + if setter is not None: + cls.Set[key] = setter + if deleter is not None: + cls.Delete[key] = deleter + if lister is not None: + cls.List[key] = lister + RegisterKey = classmethod(RegisterKey) + + def RegisterTextKey(cls, key, atomid): + """Register a text key. + + If the key you need to register is a simple one-to-one mapping + of MP4 atom name to EasyMP4Tags key, then you can use this + function: + EasyMP4Tags.RegisterTextKey("artist", "\xa9ART") + """ + def getter(tags, key): + return tags[atomid] + + def setter(tags, key, value): + tags[atomid] = value + + def deleter(tags, key): + del(tags[atomid]) + + cls.RegisterKey(key, getter, setter, deleter) + RegisterTextKey = classmethod(RegisterTextKey) + + def RegisterIntKey(cls, key, atomid, min_value=0, max_value=2**16-1): + """Register a scalar integer key. + """ + + def getter(tags, key): + return map(unicode, tags[atomid]) + + def setter(tags, key, value): + clamp = lambda x: int(min(max(min_value, x), max_value)) + tags[atomid] = map(clamp, map(int, value)) + + def deleter(tags, key): + del(tags[atomid]) + + cls.RegisterKey(key, getter, setter, deleter) + RegisterIntKey = classmethod(RegisterIntKey) + + def RegisterIntPairKey(cls, key, atomid, min_value=0, max_value=2**16-1): + def getter(tags, key): + ret = [] + for (track, total) in tags[atomid]: + if total: + ret.append(u"%d/%d" % (track, total)) + else: + ret.append(unicode(track)) + return ret + + def setter(tags, key, value): + clamp = lambda x: int(min(max(min_value, x), max_value)) + data = [] + for v in value: + try: + tracks, total = v.split("/") + tracks = clamp(int(tracks)) + total = clamp(int(total)) + except (ValueError, TypeError): + tracks = clamp(int(v)) + total = min_value + data.append((tracks, total)) + tags[atomid] = data + + def deleter(tags, key): + del(tags[atomid]) + + cls.RegisterKey(key, getter, setter, deleter) + RegisterIntPairKey = classmethod(RegisterIntPairKey) + + def RegisterFreeformKey(cls, key, name, mean="com.apple.iTunes"): + """Register a text key. + + If the key you need to register is a simple one-to-one mapping + of MP4 freeform atom (----) and name to EasyMP4Tags key, then + you can use this function: + EasyMP4Tags.RegisterFreeformKey( + "musicbrainz_artistid", "MusicBrainz Artist Id") + """ + atomid = "----:%s:%s" % (mean, name) + + def getter(tags, key): + return [s.decode("utf-8", "replace") for s in tags[atomid]] + + def setter(tags, key, value): + tags[atomid] = map(utf8, value) + + def deleter(tags, key): + del(tags[atomid]) + + cls.RegisterKey(key, getter, setter, deleter) + RegisterFreeformKey = classmethod(RegisterFreeformKey) + + def __getitem__(self, key): + key = key.lower() + func = dict_match(self.Get, key) + if func is not None: + return func(self.__mp4, key) + else: + raise EasyMP4KeyError("%r is not a valid key" % key) + + def __setitem__(self, key, value): + key = key.lower() + if isinstance(value, basestring): + value = [value] + func = dict_match(self.Set, key) + if func is not None: + return func(self.__mp4, key, value) + else: + raise EasyMP4KeyError("%r is not a valid key" % key) + + def __delitem__(self, key): + key = key.lower() + func = dict_match(self.Delete, key) + if func is not None: + return func(self.__mp4, key) + else: + raise EasyMP4KeyError("%r is not a valid key" % key) + + def keys(self): + keys = [] + for key in self.Get.keys(): + if key in self.List: + keys.extend(self.List[key](self.__mp4, key)) + elif key in self: + keys.append(key) + return keys + + def pprint(self): + """Print tag key=value pairs.""" + strings = [] + for key in sorted(self.keys()): + values = self[key] + for value in values: + strings.append("%s=%s" % (key, value)) + return "\n".join(strings) + +for atomid, key in { + '\xa9nam': 'title', + '\xa9alb': 'album', + '\xa9ART': 'artist', + 'aART': 'albumartist', + '\xa9day': 'date', + '\xa9cmt': 'comment', + 'desc': 'description', + '\xa9grp': 'grouping', + '\xa9gen': 'genre', + 'cprt': 'copyright', + 'soal': 'albumsort', + 'soaa': 'albumartistsort', + 'soar': 'artistsort', + 'sonm': 'titlesort', + 'soco': 'composersort', + }.items(): + EasyMP4Tags.RegisterTextKey(key, atomid) + +for name, key in { + 'MusicBrainz Artist Id': 'musicbrainz_artistid', + 'MusicBrainz Track Id': 'musicbrainz_trackid', + 'MusicBrainz Album Id': 'musicbrainz_albumid', + 'MusicBrainz Album Artist Id': 'musicbrainz_albumartistid', + 'MusicIP PUID': 'musicip_puid', + 'MusicBrainz Album Status': 'musicbrainz_albumstatus', + 'MusicBrainz Album Type': 'musicbrainz_albumtype', + 'MusicBrainz Release Country': 'releasecountry', + }.items(): + EasyMP4Tags.RegisterFreeformKey(key, name) + +for name, key in { + "tmpo": "bpm", + }.items(): + EasyMP4Tags.RegisterIntKey(key, name) + +for name, key in { + "trkn": "tracknumber", + "disk": "discnumber", + }.items(): + EasyMP4Tags.RegisterIntPairKey(key, name) + +class EasyMP4(MP4): + """Like MP4, but uses EasyMP4Tags for tags.""" + MP4Tags = EasyMP4Tags + + Get = EasyMP4Tags.Get + Set = EasyMP4Tags.Set + Delete = EasyMP4Tags.Delete + List = EasyMP4Tags.List + RegisterTextKey = EasyMP4Tags.RegisterTextKey + RegisterKey = EasyMP4Tags.RegisterKey diff --git a/lib/mutagen/flac.py b/lib/mutagen/flac.py new file mode 100644 index 00000000..00bce539 --- /dev/null +++ b/lib/mutagen/flac.py @@ -0,0 +1,692 @@ +# FLAC comment support for Mutagen +# Copyright 2005 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. + +"""Read and write FLAC Vorbis comments and stream information. + +Read more about FLAC at http://flac.sourceforge.net. + +FLAC supports arbitrary metadata blocks. The two most interesting ones +are the FLAC stream information block, and the Vorbis comment block; +these are also the only ones Mutagen can currently read. + +This module does not handle Ogg FLAC files. + +Based off documentation available at +http://flac.sourceforge.net/format.html +""" + +__all__ = ["FLAC", "Open", "delete"] + +import struct +from cStringIO import StringIO +from _vorbis import VCommentDict +from lib.mutagen import FileType +from lib.mutagen._util import insert_bytes +from lib.mutagen.id3 import BitPaddedInt + +class error(IOError): pass +class FLACNoHeaderError(error): pass +class FLACVorbisError(ValueError, error): pass + +def to_int_be(string): + """Convert an arbitrarily-long string to a long using big-endian + byte order.""" + return reduce(lambda a, b: (a << 8) + ord(b), string, 0L) + +class MetadataBlock(object): + """A generic block of FLAC metadata. + + This class is extended by specific used as an ancestor for more specific + blocks, and also as a container for data blobs of unknown blocks. + + Attributes: + data -- raw binary data for this block + """ + + def __init__(self, data): + """Parse the given data string or file-like as a metadata block. + The metadata header should not be included.""" + if data is not None: + if isinstance(data, str): data = StringIO(data) + elif not hasattr(data, 'read'): + raise TypeError( + "StreamInfo requires string data or a file-like") + self.load(data) + + def load(self, data): self.data = data.read() + def write(self): return self.data + + def writeblocks(blocks): + """Render metadata block as a byte string.""" + data = [] + codes = [[block.code, block.write()] for block in blocks] + codes[-1][0] |= 128 + for code, datum in codes: + byte = chr(code) + if len(datum) > 2**24: + raise error("block is too long to write") + length = struct.pack(">I", len(datum))[-3:] + data.append(byte + length + datum) + return "".join(data) + writeblocks = staticmethod(writeblocks) + + def group_padding(blocks): + """Consolidate FLAC padding metadata blocks. + + The overall size of the rendered blocks does not change, so + this adds several bytes of padding for each merged block.""" + paddings = filter(lambda x: isinstance(x, Padding), blocks) + map(blocks.remove, paddings) + padding = Padding() + # total padding size is the sum of padding sizes plus 4 bytes + # per removed header. + size = sum([padding.length for padding in paddings]) + padding.length = size + 4 * (len(paddings) - 1) + blocks.append(padding) + group_padding = staticmethod(group_padding) + +class StreamInfo(MetadataBlock): + """FLAC stream information. + + This contains information about the audio data in the FLAC file. + Unlike most stream information objects in Mutagen, changes to this + one will rewritten to the file when it is saved. Unless you are + actually changing the audio stream itself, don't change any + attributes of this block. + + Attributes: + min_blocksize -- minimum audio block size + max_blocksize -- maximum audio block size + sample_rate -- audio sample rate in Hz + channels -- audio channels (1 for mono, 2 for stereo) + bits_per_sample -- bits per sample + total_samples -- total samples in file + length -- audio length in seconds + """ + + code = 0 + + def __eq__(self, other): + try: return (self.min_blocksize == other.min_blocksize and + self.max_blocksize == other.max_blocksize and + self.sample_rate == other.sample_rate and + self.channels == other.channels and + self.bits_per_sample == other.bits_per_sample and + self.total_samples == other.total_samples) + except: return False + + def load(self, data): + self.min_blocksize = int(to_int_be(data.read(2))) + self.max_blocksize = int(to_int_be(data.read(2))) + self.min_framesize = int(to_int_be(data.read(3))) + self.max_framesize = int(to_int_be(data.read(3))) + # first 16 bits of sample rate + sample_first = to_int_be(data.read(2)) + # last 4 bits of sample rate, 3 of channels, first 1 of bits/sample + sample_channels_bps = to_int_be(data.read(1)) + # last 4 of bits/sample, 36 of total samples + bps_total = to_int_be(data.read(5)) + + sample_tail = sample_channels_bps >> 4 + self.sample_rate = int((sample_first << 4) + sample_tail) + self.channels = int(((sample_channels_bps >> 1) & 7) + 1) + bps_tail = bps_total >> 36 + bps_head = (sample_channels_bps & 1) << 4 + self.bits_per_sample = int(bps_head + bps_tail + 1) + self.total_samples = bps_total & 0xFFFFFFFFFL + self.length = self.total_samples / float(self.sample_rate) + + self.md5_signature = to_int_be(data.read(16)) + + def write(self): + f = StringIO() + f.write(struct.pack(">I", self.min_blocksize)[-2:]) + f.write(struct.pack(">I", self.max_blocksize)[-2:]) + f.write(struct.pack(">I", self.min_framesize)[-3:]) + f.write(struct.pack(">I", self.max_framesize)[-3:]) + + # first 16 bits of sample rate + f.write(struct.pack(">I", self.sample_rate >> 4)[-2:]) + # 4 bits sample, 3 channel, 1 bps + byte = (self.sample_rate & 0xF) << 4 + byte += ((self.channels - 1) & 7) << 1 + byte += ((self.bits_per_sample - 1) >> 4) & 1 + f.write(chr(byte)) + # 4 bits of bps, 4 of sample count + byte = ((self.bits_per_sample - 1) & 0xF) << 4 + byte += (self.total_samples >> 32) & 0xF + f.write(chr(byte)) + # last 32 of sample count + f.write(struct.pack(">I", self.total_samples & 0xFFFFFFFFL)) + # MD5 signature + sig = self.md5_signature + f.write(struct.pack( + ">4I", (sig >> 96) & 0xFFFFFFFFL, (sig >> 64) & 0xFFFFFFFFL, + (sig >> 32) & 0xFFFFFFFFL, sig & 0xFFFFFFFFL)) + return f.getvalue() + + def pprint(self): + return "FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate) + +class SeekPoint(tuple): + """A single seek point in a FLAC file. + + Placeholder seek points have first_sample of 0xFFFFFFFFFFFFFFFFL, + and byte_offset and num_samples undefined. Seek points must be + sorted in ascending order by first_sample number. Seek points must + be unique by first_sample number, except for placeholder + points. Placeholder points must occur last in the table and there + may be any number of them. + + Attributes: + first_sample -- sample number of first sample in the target frame + byte_offset -- offset from first frame to target frame + num_samples -- number of samples in target frame + """ + + def __new__(cls, first_sample, byte_offset, num_samples): + return super(cls, SeekPoint).__new__(cls, (first_sample, + byte_offset, num_samples)) + first_sample = property(lambda self: self[0]) + byte_offset = property(lambda self: self[1]) + num_samples = property(lambda self: self[2]) + +class SeekTable(MetadataBlock): + """Read and write FLAC seek tables. + + Attributes: + seekpoints -- list of SeekPoint objects + """ + + __SEEKPOINT_FORMAT = '>QQH' + __SEEKPOINT_SIZE = struct.calcsize(__SEEKPOINT_FORMAT) + + code = 3 + + def __init__(self, data): + self.seekpoints = [] + super(SeekTable, self).__init__(data) + + def __eq__(self, other): + try: return (self.seekpoints == other.seekpoints) + except (AttributeError, TypeError): return False + + def load(self, data): + self.seekpoints = [] + sp = data.read(self.__SEEKPOINT_SIZE) + while len(sp) == self.__SEEKPOINT_SIZE: + self.seekpoints.append(SeekPoint( + *struct.unpack(self.__SEEKPOINT_FORMAT, sp))) + sp = data.read(self.__SEEKPOINT_SIZE) + + def write(self): + f = StringIO() + for seekpoint in self.seekpoints: + packed = struct.pack(self.__SEEKPOINT_FORMAT, + seekpoint.first_sample, seekpoint.byte_offset, + seekpoint.num_samples) + f.write(packed) + return f.getvalue() + + def __repr__(self): + return "<%s seekpoints=%r>" % (type(self).__name__, self.seekpoints) + +class VCFLACDict(VCommentDict): + """Read and write FLAC Vorbis comments. + + FLACs don't use the framing bit at the end of the comment block. + So this extends VCommentDict to not use the framing bit. + """ + + code = 4 + + def load(self, data, errors='replace', framing=False): + super(VCFLACDict, self).load(data, errors=errors, framing=framing) + + def write(self, framing=False): + return super(VCFLACDict, self).write(framing=framing) + +class CueSheetTrackIndex(tuple): + """Index for a track in a cuesheet. + + For CD-DA, an index_number of 0 corresponds to the track + pre-gap. The first index in a track must have a number of 0 or 1, + and subsequently, index_numbers must increase by 1. Index_numbers + must be unique within a track. And index_offset must be evenly + divisible by 588 samples. + + Attributes: + index_number -- index point number + index_offset -- offset in samples from track start + """ + + def __new__(cls, index_number, index_offset): + return super(cls, CueSheetTrackIndex).__new__(cls, + (index_number, index_offset)) + index_number = property(lambda self: self[0]) + index_offset = property(lambda self: self[1]) + +class CueSheetTrack(object): + """A track in a cuesheet. + + For CD-DA, track_numbers must be 1-99, or 170 for the + lead-out. Track_numbers must be unique within a cue sheet. There + must be atleast one index in every track except the lead-out track + which must have none. + + Attributes: + track_number -- track number + start_offset -- track offset in samples from start of FLAC stream + isrc -- ISRC code + type -- 0 for audio, 1 for digital data + pre_emphasis -- true if the track is recorded with pre-emphasis + indexes -- list of CueSheetTrackIndex objects + """ + + def __init__(self, track_number, start_offset, isrc='', type_=0, + pre_emphasis=False): + self.track_number = track_number + self.start_offset = start_offset + self.isrc = isrc + self.type = type_ + self.pre_emphasis = pre_emphasis + self.indexes = [] + + def __eq__(self, other): + try: return (self.track_number == other.track_number and + self.start_offset == other.start_offset and + self.isrc == other.isrc and + self.type == other.type and + self.pre_emphasis == other.pre_emphasis and + self.indexes == other.indexes) + except (AttributeError, TypeError): return False + + def __repr__(self): + return ("<%s number=%r, offset=%d, isrc=%r, type=%r, " + "pre_emphasis=%r, indexes=%r)>") % ( + type(self).__name__, self.track_number, self.start_offset, + self.isrc, self.type, self.pre_emphasis, self.indexes) + +class CueSheet(MetadataBlock): + """Read and write FLAC embedded cue sheets. + + Number of tracks should be from 1 to 100. There should always be + exactly one lead-out track and that track must be the last track + in the cue sheet. + + Attributes: + media_catalog_number -- media catalog number in ASCII + lead_in_samples -- number of lead-in samples + compact_disc -- true if the cuesheet corresponds to a compact disc + tracks -- list of CueSheetTrack objects + lead_out -- lead-out as CueSheetTrack or None if lead-out was not found + """ + + __CUESHEET_FORMAT = '>128sQB258xB' + __CUESHEET_SIZE = struct.calcsize(__CUESHEET_FORMAT) + __CUESHEET_TRACK_FORMAT = '>QB12sB13xB' + __CUESHEET_TRACK_SIZE = struct.calcsize(__CUESHEET_TRACK_FORMAT) + __CUESHEET_TRACKINDEX_FORMAT = '>QB3x' + __CUESHEET_TRACKINDEX_SIZE = struct.calcsize(__CUESHEET_TRACKINDEX_FORMAT) + + code = 5 + + media_catalog_number = '' + lead_in_samples = 88200 + compact_disc = True + + def __init__(self, data): + self.tracks = [] + super(CueSheet, self).__init__(data) + + def __eq__(self, other): + try: + return (self.media_catalog_number == other.media_catalog_number and + self.lead_in_samples == other.lead_in_samples and + self.compact_disc == other.compact_disc and + self.tracks == other.tracks) + except (AttributeError, TypeError): return False + + def load(self, data): + header = data.read(self.__CUESHEET_SIZE) + media_catalog_number, lead_in_samples, flags, num_tracks = \ + struct.unpack(self.__CUESHEET_FORMAT, header) + self.media_catalog_number = media_catalog_number.rstrip('\0') + self.lead_in_samples = lead_in_samples + self.compact_disc = bool(flags & 0x80) + self.tracks = [] + for i in range(num_tracks): + track = data.read(self.__CUESHEET_TRACK_SIZE) + start_offset, track_number, isrc_padded, flags, num_indexes = \ + struct.unpack(self.__CUESHEET_TRACK_FORMAT, track) + isrc = isrc_padded.rstrip('\0') + type_ = (flags & 0x80) >> 7 + pre_emphasis = bool(flags & 0x40) + val = CueSheetTrack( + track_number, start_offset, isrc, type_, pre_emphasis) + for j in range(num_indexes): + index = data.read(self.__CUESHEET_TRACKINDEX_SIZE) + index_offset, index_number = struct.unpack( + self.__CUESHEET_TRACKINDEX_FORMAT, index) + val.indexes.append( + CueSheetTrackIndex(index_number, index_offset)) + self.tracks.append(val) + + def write(self): + f = StringIO() + flags = 0 + if self.compact_disc: flags |= 0x80 + packed = struct.pack( + self.__CUESHEET_FORMAT, self.media_catalog_number, + self.lead_in_samples, flags, len(self.tracks)) + f.write(packed) + for track in self.tracks: + track_flags = 0 + track_flags |= (track.type & 1) << 7 + if track.pre_emphasis: track_flags |= 0x40 + track_packed = struct.pack( + self.__CUESHEET_TRACK_FORMAT, track.start_offset, + track.track_number, track.isrc, track_flags, + len(track.indexes)) + f.write(track_packed) + for index in track.indexes: + index_packed = struct.pack( + self.__CUESHEET_TRACKINDEX_FORMAT, + index.index_offset, index.index_number) + f.write(index_packed) + return f.getvalue() + + def __repr__(self): + return ("<%s media_catalog_number=%r, lead_in=%r, compact_disc=%r, " + "tracks=%r>") % ( + type(self).__name__, self.media_catalog_number, + self.lead_in_samples, self.compact_disc, self.tracks) + +class Picture(MetadataBlock): + """Read and write FLAC embed pictures. + + Attributes: + type -- picture type (same as types for ID3 APIC frames) + mime -- MIME type of the picture + desc -- picture's description + width -- width in pixels + height -- height in pixels + depth -- color depth in bits-per-pixel + colors -- number of colors for indexed palettes (like GIF), + 0 for non-indexed + data -- picture data + """ + + code = 6 + + def __init__(self, data=None): + self.type = 0 + self.mime = u'' + self.desc = u'' + self.width = 0 + self.height = 0 + self.depth = 0 + self.colors = 0 + self.data = '' + super(Picture, self).__init__(data) + + def __eq__(self, other): + try: return (self.type == other.type and + self.mime == other.mime and + self.desc == other.desc and + self.width == other.width and + self.height == other.height and + self.depth == other.depth and + self.colors == other.colors and + self.data == other.data) + except (AttributeError, TypeError): return False + + def load(self, data): + self.type, length = struct.unpack('>2I', data.read(8)) + self.mime = data.read(length).decode('UTF-8', 'replace') + length, = struct.unpack('>I', data.read(4)) + self.desc = data.read(length).decode('UTF-8', 'replace') + (self.width, self.height, self.depth, + self.colors, length) = struct.unpack('>5I', data.read(20)) + self.data = data.read(length) + + def write(self): + f = StringIO() + mime = self.mime.encode('UTF-8') + f.write(struct.pack('>2I', self.type, len(mime))) + f.write(mime) + desc = self.desc.encode('UTF-8') + f.write(struct.pack('>I', len(desc))) + f.write(desc) + f.write(struct.pack('>5I', self.width, self.height, self.depth, + self.colors, len(self.data))) + f.write(self.data) + return f.getvalue() + + def __repr__(self): + return "<%s '%s' (%d bytes)>" % (type(self).__name__, self.mime, + len(self.data)) + +class Padding(MetadataBlock): + """Empty padding space for metadata blocks. + + To avoid rewriting the entire FLAC file when editing comments, + metadata is often padded. Padding should occur at the end, and no + more than one padding block should be in any FLAC file. Mutagen + handles this with MetadataBlock.group_padding. + """ + + code = 1 + + def __init__(self, data=""): super(Padding, self).__init__(data) + def load(self, data): self.length = len(data.read()) + def write(self): + try: return "\x00" * self.length + # On some 64 bit platforms this won't generate a MemoryError + # or OverflowError since you might have enough RAM, but it + # still generates a ValueError. On other 64 bit platforms, + # this will still succeed for extremely large values. + # Those should never happen in the real world, and if they + # do, writeblocks will catch it. + except (OverflowError, ValueError, MemoryError): + raise error("cannot write %d bytes" % self.length) + def __eq__(self, other): + return isinstance(other, Padding) and self.length == other.length + def __repr__(self): + return "<%s (%d bytes)>" % (type(self).__name__, self.length) + +class FLAC(FileType): + """A FLAC audio file. + + Attributes: + info -- stream information (length, bitrate, sample rate) + tags -- metadata tags, if any + cuesheet -- CueSheet object, if any + seektable -- SeekTable object, if any + pictures -- list of embedded pictures + """ + + _mimes = ["audio/x-flac", "application/x-flac"] + + METADATA_BLOCKS = [StreamInfo, Padding, None, SeekTable, VCFLACDict, + CueSheet, Picture] + """Known metadata block types, indexed by ID.""" + + def score(filename, fileobj, header): + return (header.startswith("fLaC") + + filename.lower().endswith(".flac") * 3) + score = staticmethod(score) + + def __read_metadata_block(self, file): + byte = ord(file.read(1)) + size = to_int_be(file.read(3)) + try: + data = file.read(size) + if len(data) != size: + raise error( + "file said %d bytes, read %d bytes" % (size, len(data))) + block = self.METADATA_BLOCKS[byte & 0x7F](data) + except (IndexError, TypeError): + block = MetadataBlock(data) + block.code = byte & 0x7F + self.metadata_blocks.append(block) + else: + self.metadata_blocks.append(block) + if block.code == VCFLACDict.code: + if self.tags is None: self.tags = block + else: raise FLACVorbisError("> 1 Vorbis comment block found") + elif block.code == CueSheet.code: + if self.cuesheet is None: self.cuesheet = block + else: raise error("> 1 CueSheet block found") + elif block.code == SeekTable.code: + if self.seektable is None: self.seektable = block + else: raise error("> 1 SeekTable block found") + return (byte >> 7) ^ 1 + + def add_tags(self): + """Add a Vorbis comment block to the file.""" + if self.tags is None: + self.tags = VCFLACDict() + self.metadata_blocks.append(self.tags) + else: raise FLACVorbisError("a Vorbis comment already exists") + add_vorbiscomment = add_tags + + def delete(self, filename=None): + """Remove Vorbis comments from a file. + + If no filename is given, the one most recently loaded is used. + """ + if filename is None: filename = self.filename + for s in list(self.metadata_blocks): + if isinstance(s, VCFLACDict): + self.metadata_blocks.remove(s) + self.tags = None + self.save() + break + + vc = property(lambda s: s.tags, doc="Alias for tags; don't use this.") + + def load(self, filename): + """Load file information from a filename.""" + + self.metadata_blocks = [] + self.tags = None + self.cuesheet = None + self.seektable = None + self.filename = filename + fileobj = file(filename, "rb") + try: + self.__check_header(fileobj) + while self.__read_metadata_block(fileobj): + pass + if fileobj.read(2) not in ["\xff\xf8", "\xff\xf9"]: + raise FLACNoHeaderError("End of metadata did not start audio") + finally: + fileobj.close() + + try: + self.metadata_blocks[0].length + except (AttributeError, IndexError): + raise FLACNoHeaderError("Stream info block not found") + + info = property(lambda s: s.metadata_blocks[0]) + + def add_picture(self, picture): + """Add a new picture to the file.""" + self.metadata_blocks.append(picture) + + def clear_pictures(self): + """Delete all pictures from the file.""" + self.metadata_blocks = filter(lambda b: b.code != Picture.code, + self.metadata_blocks) + + def __get_pictures(self): + return filter(lambda b: b.code == Picture.code, self.metadata_blocks) + pictures = property(__get_pictures, doc="List of embedded pictures") + + def save(self, filename=None, deleteid3=False): + """Save metadata blocks to a file. + + If no filename is given, the one most recently loaded is used. + """ + + if filename is None: filename = self.filename + f = open(filename, 'rb+') + + # Ensure we've got padding at the end, and only at the end. + # If adding makes it too large, we'll scale it down later. + self.metadata_blocks.append(Padding('\x00' * 1020)) + MetadataBlock.group_padding(self.metadata_blocks) + + header = self.__check_header(f) + available = self.__find_audio_offset(f) - header # "fLaC" and maybe ID3 + data = MetadataBlock.writeblocks(self.metadata_blocks) + + # Delete ID3v2 + if deleteid3 and header > 4: + available += header - 4 + header = 4 + + if len(data) > available: + # If we have too much data, see if we can reduce padding. + padding = self.metadata_blocks[-1] + newlength = padding.length - (len(data) - available) + if newlength > 0: + padding.length = newlength + data = MetadataBlock.writeblocks(self.metadata_blocks) + assert len(data) == available + + elif len(data) < available: + # If we have too little data, increase padding. + self.metadata_blocks[-1].length += (available - len(data)) + data = MetadataBlock.writeblocks(self.metadata_blocks) + assert len(data) == available + + if len(data) != available: + # We couldn't reduce the padding enough. + diff = (len(data) - available) + insert_bytes(f, diff, header) + + f.seek(header - 4) + f.write("fLaC" + data) + + # Delete ID3v1 + if deleteid3: + try: f.seek(-128, 2) + except IOError: pass + else: + if f.read(3) == "TAG": + f.seek(-128, 2) + f.truncate() + + def __find_audio_offset(self, fileobj): + byte = 0x00 + while not (byte >> 7) & 1: + byte = ord(fileobj.read(1)) + size = to_int_be(fileobj.read(3)) + fileobj.read(size) + return fileobj.tell() + + def __check_header(self, fileobj): + size = 4 + header = fileobj.read(4) + if header != "fLaC": + size = None + if header[:3] == "ID3": + size = 14 + BitPaddedInt(fileobj.read(6)[2:]) + fileobj.seek(size - 4) + if fileobj.read(4) != "fLaC": size = None + if size is None: + raise FLACNoHeaderError( + "%r is not a valid FLAC file" % fileobj.name) + return size + +Open = FLAC + +def delete(filename): + """Remove tags from a file.""" + FLAC(filename).delete() diff --git a/lib/mutagen/id3.py b/lib/mutagen/id3.py new file mode 100644 index 00000000..989c2cfc --- /dev/null +++ b/lib/mutagen/id3.py @@ -0,0 +1,2005 @@ +# id3 support for mutagen +# Copyright (C) 2005 Michael Urman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# $Id: id3.py 4285 2008-09-06 08:01:31Z piman $ + +"""ID3v2 reading and writing. + +This is based off of the following references: + http://www.id3.org/id3v2.4.0-structure.txt + http://www.id3.org/id3v2.4.0-frames.txt + http://www.id3.org/id3v2.3.0.html + http://www.id3.org/id3v2-00.txt + http://www.id3.org/id3v1.html + +Its largest deviation from the above (versions 2.3 and 2.2) is that it +will not interpret the / characters as a separator, and will almost +always accept null separators to generate multi-valued text frames. + +Because ID3 frame structure differs between frame types, each frame is +implemented as a different class (e.g. TIT2 as mutagen.id3.TIT2). Each +frame's documentation contains a list of its attributes. + +Since this file's documentation is a little unwieldy, you are probably +interested in the 'ID3' class to start with. +""" + +__all__ = ['ID3', 'ID3FileType', 'Frames', 'Open', 'delete'] + +import struct; from struct import unpack, pack +from zlib import error as zlibError +from warnings import warn + +import lib.mutagen +from lib.mutagen._util import insert_bytes, delete_bytes, DictProxy + +class error(Exception): pass +class ID3NoHeaderError(error, ValueError): pass +class ID3BadUnsynchData(error, ValueError): pass +class ID3BadCompressedData(error, ValueError): pass +class ID3TagError(error, ValueError): pass +class ID3UnsupportedVersionError(error, NotImplementedError): pass +class ID3EncryptionUnsupportedError(error, NotImplementedError): pass +class ID3JunkFrameError(error, ValueError): pass + +class ID3Warning(error, UserWarning): pass + +def is_valid_frame_id(frame_id): + return frame_id.isalnum() and frame_id.isupper() + +class ID3(DictProxy, lib.mutagen.Metadata): + """A file with an ID3v2 tag. + + Attributes: + version -- ID3 tag version as a tuple + unknown_frames -- raw frame data of any unknown frames found + size -- the total size of the ID3 tag, including the header + """ + + PEDANTIC = True + version = (2, 4, 0) + + filename = None + size = 0 + __flags = 0 + __readbytes = 0 + __crc = None + + def __init__(self, *args, **kwargs): + self.unknown_frames = [] + super(ID3, self).__init__(*args, **kwargs) + + def __fullread(self, size): + try: + if size < 0: + raise ValueError('Requested bytes (%s) less than zero' % size) + if size > self.__filesize: + raise EOFError('Requested %#x of %#x (%s)' % + (long(size), long(self.__filesize), self.filename)) + except AttributeError: pass + data = self.__fileobj.read(size) + if len(data) != size: raise EOFError + self.__readbytes += size + return data + + def load(self, filename, known_frames=None, translate=True): + """Load tags from a filename. + + Keyword arguments: + filename -- filename to load tag data from + known_frames -- dict mapping frame IDs to Frame objects + translate -- Update all tags to ID3v2.4 internally. Mutagen is + only capable of writing ID3v2.4 tags, so if you + intend to save, this must be true. + + Example of loading a custom frame: + my_frames = dict(mutagen.id3.Frames) + class XMYF(Frame): ... + my_frames["XMYF"] = XMYF + mutagen.id3.ID3(filename, known_frames=my_frames) + """ + + from os.path import getsize + self.filename = filename + self.__known_frames = known_frames + self.__fileobj = file(filename, 'rb') + self.__filesize = getsize(filename) + try: + try: + self.__load_header() + except EOFError: + self.size = 0 + raise ID3NoHeaderError("%s: too small (%d bytes)" %( + filename, self.__filesize)) + except (ID3NoHeaderError, ID3UnsupportedVersionError), err: + self.size = 0 + import sys + stack = sys.exc_info()[2] + try: self.__fileobj.seek(-128, 2) + except EnvironmentError: raise err, None, stack + else: + frames = ParseID3v1(self.__fileobj.read(128)) + if frames is not None: + self.version = (1, 1) + map(self.add, frames.values()) + else: raise err, None, stack + else: + frames = self.__known_frames + if frames is None: + if (2,3,0) <= self.version: frames = Frames + elif (2,2,0) <= self.version: frames = Frames_2_2 + data = self.__fullread(self.size - 10) + for frame in self.__read_frames(data, frames=frames): + if isinstance(frame, Frame): self.add(frame) + else: self.unknown_frames.append(frame) + finally: + self.__fileobj.close() + del self.__fileobj + del self.__filesize + if translate: + self.update_to_v24() + + def getall(self, key): + """Return all frames with a given name (the list may be empty). + + This is best explained by examples: + id3.getall('TIT2') == [id3['TIT2']] + id3.getall('TTTT') == [] + id3.getall('TXXX') == [TXXX(desc='woo', text='bar'), + TXXX(desc='baz', text='quuuux'), ...] + + Since this is based on the frame's HashKey, which is + colon-separated, you can use it to do things like + getall('COMM:MusicMatch') or getall('TXXX:QuodLibet:'). + """ + if key in self: return [self[key]] + else: + key = key + ":" + return [v for s,v in self.items() if s.startswith(key)] + + def delall(self, key): + """Delete all tags of a given kind; see getall.""" + if key in self: del(self[key]) + else: + key = key + ":" + for k in filter(lambda s: s.startswith(key), self.keys()): + del(self[k]) + + def setall(self, key, values): + """Delete frames of the given type and add frames in 'values'.""" + self.delall(key) + for tag in values: + self[tag.HashKey] = tag + + def pprint(self): + """Return tags in a human-readable format. + + "Human-readable" is used loosely here. The format is intended + to mirror that used for Vorbis or APEv2 output, e.g. + TIT2=My Title + However, ID3 frames can have multiple keys: + POPM=user@example.org=3 128/255 + """ + return "\n".join(map(Frame.pprint, self.values())) + + def loaded_frame(self, tag): + """Deprecated; use the add method.""" + # turn 2.2 into 2.3/2.4 tags + if len(type(tag).__name__) == 3: tag = type(tag).__base__(tag) + self[tag.HashKey] = tag + + # add = loaded_frame (and vice versa) break applications that + # expect to be able to override loaded_frame (e.g. Quod Libet), + # as does making loaded_frame call add. + def add(self, frame): + """Add a frame to the tag.""" + return self.loaded_frame(frame) + + def __load_header(self): + fn = self.filename + data = self.__fullread(10) + id3, vmaj, vrev, flags, size = unpack('>3sBBB4s', data) + self.__flags = flags + self.size = BitPaddedInt(size) + 10 + self.version = (2, vmaj, vrev) + + if id3 != 'ID3': + raise ID3NoHeaderError("'%s' doesn't start with an ID3 tag" % fn) + if vmaj not in [2, 3, 4]: + raise ID3UnsupportedVersionError("'%s' ID3v2.%d not supported" + % (fn, vmaj)) + + if self.PEDANTIC: + if (2,4,0) <= self.version and (flags & 0x0f): + raise ValueError("'%s' has invalid flags %#02x" % (fn, flags)) + elif (2,3,0) <= self.version < (2,4,0) and (flags & 0x1f): + raise ValueError("'%s' has invalid flags %#02x" % (fn, flags)) + + if self.f_extended: + extsize = self.__fullread(4) + if extsize in Frames: + # Some tagger sets the extended header flag but + # doesn't write an extended header; in this case, the + # ID3 data follows immediately. Since no extended + # header is going to be long enough to actually match + # a frame, and if it's *not* a frame we're going to be + # completely lost anyway, this seems to be the most + # correct check. + # http://code.google.com/p/quodlibet/issues/detail?id=126 + self.__flags ^= 0x40 + self.__extsize = 0 + self.__fileobj.seek(-4, 1) + self.__readbytes -= 4 + elif self.version >= (2,4,0): + # "Where the 'Extended header size' is the size of the whole + # extended header, stored as a 32 bit synchsafe integer." + self.__extsize = BitPaddedInt(extsize) - 4 + else: + # "Where the 'Extended header size', currently 6 or 10 bytes, + # excludes itself." + self.__extsize = unpack('>L', extsize)[0] + if self.__extsize: + self.__extdata = self.__fullread(self.__extsize) + else: + self.__extdata = "" + + def __determine_bpi(self, data, frames, EMPTY="\x00" * 10): + if self.version < (2, 4, 0): + return int + # have to special case whether to use bitpaddedints here + # spec says to use them, but iTunes has it wrong + + # count number of tags found as BitPaddedInt and how far past + o = 0 + asbpi = 0 + while o < len(data) - 10: + part = data[o:o + 10] + if part == EMPTY: + bpioff = -((len(data) - o) % 10) + break + name, size, flags = unpack('>4sLH', part) + size = BitPaddedInt(size) + o += 10 + size + if name in frames: + asbpi += 1 + else: + bpioff = o - len(data) + + # count number of tags found as int and how far past + o = 0 + asint = 0 + while o < len(data) - 10: + part = data[o:o + 10] + if part == EMPTY: + intoff = -((len(data) - o) % 10) + break + name, size, flags = unpack('>4sLH', part) + o += 10 + size + if name in frames: + asint += 1 + else: + intoff = o - len(data) + + # if more tags as int, or equal and bpi is past and int is not + if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)): + return int + return BitPaddedInt + + def __read_frames(self, data, frames): + if self.version < (2,4,0) and self.f_unsynch: + try: data = unsynch.decode(data) + except ValueError: pass + + if (2,3,0) <= self.version: + bpi = self.__determine_bpi(data, frames) + while data: + header = data[:10] + try: name, size, flags = unpack('>4sLH', header) + except struct.error: return # not enough header + if name.strip('\x00') == '': return + size = bpi(size) + framedata = data[10:10+size] + data = data[10+size:] + if size == 0: continue # drop empty frames + try: tag = frames[name] + except KeyError: + if is_valid_frame_id(name): yield header + framedata + else: + try: yield self.__load_framedata(tag, flags, framedata) + except NotImplementedError: yield header + framedata + except ID3JunkFrameError: pass + + elif (2,2,0) <= self.version: + while data: + header = data[0:6] + try: name, size = unpack('>3s3s', header) + except struct.error: return # not enough header + size, = struct.unpack('>L', '\x00'+size) + if name.strip('\x00') == '': return + framedata = data[6:6+size] + data = data[6+size:] + if size == 0: continue # drop empty frames + try: tag = frames[name] + except KeyError: + if is_valid_frame_id(name): yield header + framedata + else: + try: yield self.__load_framedata(tag, 0, framedata) + except NotImplementedError: yield header + framedata + except ID3JunkFrameError: pass + + def __load_framedata(self, tag, flags, framedata): + return tag.fromData(self, flags, framedata) + + f_unsynch = property(lambda s: bool(s.__flags & 0x80)) + f_extended = property(lambda s: bool(s.__flags & 0x40)) + f_experimental = property(lambda s: bool(s.__flags & 0x20)) + f_footer = property(lambda s: bool(s.__flags & 0x10)) + + #f_crc = property(lambda s: bool(s.__extflags & 0x8000)) + + def save(self, filename=None, v1=1): + """Save changes to a file. + + If no filename is given, the one most recently loaded is used. + + Keyword arguments: + v1 -- if 0, ID3v1 tags will be removed + if 1, ID3v1 tags will be updated but not added + if 2, ID3v1 tags will be created and/or updated + + The lack of a way to update only an ID3v1 tag is intentional. + """ + + # Sort frames by 'importance' + order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"] + order = dict(zip(order, range(len(order)))) + last = len(order) + frames = self.items() + frames.sort(lambda a, b: cmp(order.get(a[0][:4], last), + order.get(b[0][:4], last))) + + framedata = [self.__save_frame(frame) for (key, frame) in frames] + framedata.extend([data for data in self.unknown_frames + if len(data) > 10]) + if not framedata: + try: + self.delete(filename) + except EnvironmentError, err: + from errno import ENOENT + if err.errno != ENOENT: raise + return + + framedata = ''.join(framedata) + framesize = len(framedata) + + if filename is None: filename = self.filename + try: f = open(filename, 'rb+') + except IOError, err: + from errno import ENOENT + if err.errno != ENOENT: raise + f = open(filename, 'ab') # create, then reopen + f = open(filename, 'rb+') + try: + idata = f.read(10) + try: id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata) + except struct.error: id3, insize = '', 0 + insize = BitPaddedInt(insize) + if id3 != 'ID3': insize = -10 + + if insize >= framesize: outsize = insize + else: outsize = (framesize + 1023) & ~0x3FF + framedata += '\x00' * (outsize - framesize) + + framesize = BitPaddedInt.to_str(outsize, width=4) + flags = 0 + header = pack('>3sBBB4s', 'ID3', 4, 0, flags, framesize) + data = header + framedata + + if (insize < outsize): + insert_bytes(f, outsize-insize, insize+10) + f.seek(0) + f.write(data) + + try: + f.seek(-128, 2) + except IOError, err: + from errno import EINVAL + if err.errno != EINVAL: raise + f.seek(0, 2) # ensure read won't get "TAG" + + if f.read(3) == "TAG": + f.seek(-128, 2) + if v1 > 0: f.write(MakeID3v1(self)) + else: f.truncate() + elif v1 == 2: + f.seek(0, 2) + f.write(MakeID3v1(self)) + + finally: + f.close() + + def delete(self, filename=None, delete_v1=True, delete_v2=True): + """Remove tags from a file. + + If no filename is given, the one most recently loaded is used. + + Keyword arguments: + delete_v1 -- delete any ID3v1 tag + delete_v2 -- delete any ID3v2 tag + """ + if filename is None: + filename = self.filename + delete(filename, delete_v1, delete_v2) + self.clear() + + def __save_frame(self, frame): + flags = 0 + if self.PEDANTIC and isinstance(frame, TextFrame): + if len(str(frame)) == 0: return '' + framedata = frame._writeData() + usize = len(framedata) + if usize > 2048: + # Disabled as this causes iTunes and other programs + # to fail to find these frames, which usually includes + # e.g. APIC. + #framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib') + #flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN + pass + datasize = BitPaddedInt.to_str(len(framedata), width=4) + header = pack('>4s4sH', type(frame).__name__, datasize, flags) + return header + framedata + + def update_to_v24(self): + """Convert older tags into an ID3v2.4 tag. + + This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to + TDRC). If you intend to save tags, you must call this function + at some point; it is called by default when loading the tag. + """ + + if self.version < (2,3,0): del self.unknown_frames[:] + # unsafe to write + + # TDAT, TYER, and TIME have been turned into TDRC. + try: + if str(self.get("TYER", "")).strip("\x00"): + date = str(self.pop("TYER")) + if str(self.get("TDAT", "")).strip("\x00"): + dat = str(self.pop("TDAT")) + date = "%s-%s-%s" % (date, dat[2:], dat[:2]) + if str(self.get("TIME", "")).strip("\x00"): + time = str(self.pop("TIME")) + date += "T%s:%s:00" % (time[:2], time[2:]) + if "TDRC" not in self: + self.add(TDRC(encoding=0, text=date)) + except UnicodeDecodeError: + # Old ID3 tags have *lots* of Unicode problems, so if TYER + # is bad, just chuck the frames. + pass + + # TORY can be the first part of a TDOR. + if "TORY" in self: + f = self.pop("TORY") + if "TDOR" not in self: + try: + self.add(TDOR(encoding=0, text=str(f))) + except UnicodeDecodeError: + pass + + # IPLS is now TIPL. + if "IPLS" in self: + f = self.pop("IPLS") + if "TIPL" not in self: + self.add(TIPL(encoding=f.encoding, people=f.people)) + + if "TCON" in self: + # Get rid of "(xx)Foobr" format. + self["TCON"].genres = self["TCON"].genres + + if self.version < (2, 3): + # ID3v2.2 PIC frames are slightly different. + pics = self.getall("APIC") + mimes = { "PNG": "image/png", "JPG": "image/jpeg" } + self.delall("APIC") + for pic in pics: + newpic = APIC( + encoding=pic.encoding, mime=mimes.get(pic.mime, pic.mime), + type=pic.type, desc=pic.desc, data=pic.data) + self.add(newpic) + + # ID3v2.2 LNK frames are just way too different to upgrade. + self.delall("LINK") + + # These can't be trivially translated to any ID3v2.4 tags, or + # should have been removed already. + for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME", "CRM"]: + if key in self: del(self[key]) + +def delete(filename, delete_v1=True, delete_v2=True): + """Remove tags from a file. + + Keyword arguments: + delete_v1 -- delete any ID3v1 tag + delete_v2 -- delete any ID3v2 tag + """ + + f = open(filename, 'rb+') + + if delete_v1: + try: + f.seek(-128, 2) + except IOError: pass + else: + if f.read(3) == "TAG": + f.seek(-128, 2) + f.truncate() + + # technically an insize=0 tag is invalid, but we delete it anyway + # (primarily because we used to write it) + if delete_v2: + f.seek(0, 0) + idata = f.read(10) + try: id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata) + except struct.error: id3, insize = '', -1 + insize = BitPaddedInt(insize) + if id3 == 'ID3' and insize >= 0: + delete_bytes(f, insize + 10, 0) + +class BitPaddedInt(int): + def __new__(cls, value, bits=7, bigendian=True): + "Strips 8-bits bits out of every byte" + mask = (1<<(bits))-1 + if isinstance(value, (int, long)): + bytes = [] + while value: + bytes.append(value & ((1<> 8 + if isinstance(value, str): + bytes = [ord(byte) & mask for byte in value] + if bigendian: bytes.reverse() + numeric_value = 0 + for shift, byte in zip(range(0, len(bytes)*bits, bits), bytes): + numeric_value += byte << shift + if isinstance(numeric_value, long): + self = long.__new__(BitPaddedLong, numeric_value) + else: + self = int.__new__(BitPaddedInt, numeric_value) + self.bits = bits + self.bigendian = bigendian + return self + + def as_str(value, bits=7, bigendian=True, width=4): + bits = getattr(value, 'bits', bits) + bigendian = getattr(value, 'bigendian', bigendian) + value = int(value) + mask = (1<> bits + # PCNT and POPM use growing integers of at least 4 bytes as counters. + if width == -1: width = max(4, len(bytes)) + if len(bytes) > width: + raise ValueError, 'Value too wide (%d bytes)' % len(bytes) + else: bytes.extend([0] * (width-len(bytes))) + if bigendian: bytes.reverse() + return ''.join(map(chr, bytes)) + to_str = staticmethod(as_str) + +class BitPaddedLong(long): + def as_str(value, bits=7, bigendian=True, width=4): + return BitPaddedInt.to_str(value, bits, bigendian, width) + to_str = staticmethod(as_str) + +class unsynch(object): + def decode(value): + output = [] + safe = True + append = output.append + for val in value: + if safe: + append(val) + safe = val != '\xFF' + else: + if val >= '\xE0': raise ValueError('invalid sync-safe string') + elif val != '\x00': append(val) + safe = True + if not safe: raise ValueError('string ended unsafe') + return ''.join(output) + decode = staticmethod(decode) + + def encode(value): + output = [] + safe = True + append = output.append + for val in value: + if safe: + append(val) + if val == '\xFF': safe = False + elif val == '\x00' or val >= '\xE0': + append('\x00') + append(val) + safe = val != '\xFF' + else: + append(val) + safe = True + if not safe: append('\x00') + return ''.join(output) + encode = staticmethod(encode) + +class Spec(object): + def __init__(self, name): self.name = name + def __hash__(self): raise TypeError("Spec objects are unhashable") + +class ByteSpec(Spec): + def read(self, frame, data): return ord(data[0]), data[1:] + def write(self, frame, value): return chr(value) + def validate(self, frame, value): return value + +class IntegerSpec(Spec): + def read(self, frame, data): + return int(BitPaddedInt(data, bits=8)), '' + def write(self, frame, value): + return BitPaddedInt.to_str(value, bits=8, width=-1) + def validate(self, frame, value): + return value + +class SizedIntegerSpec(Spec): + def __init__(self, name, size): + self.name, self.__sz = name, size + def read(self, frame, data): + return int(BitPaddedInt(data[:self.__sz], bits=8)), data[self.__sz:] + def write(self, frame, value): + return BitPaddedInt.to_str(value, bits=8, width=self.__sz) + def validate(self, frame, value): + return value + +class EncodingSpec(ByteSpec): + def read(self, frame, data): + enc, data = super(EncodingSpec, self).read(frame, data) + if enc < 16: return enc, data + else: return 0, chr(enc)+data + + def validate(self, frame, value): + if 0 <= value <= 3: return value + if value is None: return None + raise ValueError, 'Invalid Encoding: %r' % value + +class StringSpec(Spec): + def __init__(self, name, length): + super(StringSpec, self).__init__(name) + self.len = length + def read(s, frame, data): return data[:s.len], data[s.len:] + def write(s, frame, value): + if value is None: return '\x00' * s.len + else: return (str(value) + '\x00' * s.len)[:s.len] + def validate(s, frame, value): + if value is None: return None + if isinstance(value, basestring) and len(value) == s.len: return value + raise ValueError, 'Invalid StringSpec[%d] data: %r' % (s.len, value) + +class BinaryDataSpec(Spec): + def read(self, frame, data): return data, '' + def write(self, frame, value): return str(value) + def validate(self, frame, value): return str(value) + +class EncodedTextSpec(Spec): + # Okay, seriously. This is private and defined explicitly and + # completely by the ID3 specification. You can't just add + # encodings here however you want. + _encodings = ( ('latin1', '\x00'), ('utf16', '\x00\x00'), + ('utf_16_be', '\x00\x00'), ('utf8', '\x00') ) + + def read(self, frame, data): + enc, term = self._encodings[frame.encoding] + ret = '' + if len(term) == 1: + if term in data: + data, ret = data.split(term, 1) + else: + offset = -1 + try: + while True: + offset = data.index(term, offset+1) + if offset & 1: continue + data, ret = data[0:offset], data[offset+2:]; break + except ValueError: pass + + if len(data) < len(term): return u'', ret + return data.decode(enc), ret + + def write(self, frame, value): + enc, term = self._encodings[frame.encoding] + return value.encode(enc) + term + + def validate(self, frame, value): return unicode(value) + +class MultiSpec(Spec): + def __init__(self, name, *specs, **kw): + super(MultiSpec, self).__init__(name) + self.specs = specs + self.sep = kw.get('sep') + + def read(self, frame, data): + values = [] + while data: + record = [] + for spec in self.specs: + value, data = spec.read(frame, data) + record.append(value) + if len(self.specs) != 1: values.append(record) + else: values.append(record[0]) + return values, data + + def write(self, frame, value): + data = [] + if len(self.specs) == 1: + for v in value: + data.append(self.specs[0].write(frame, v)) + else: + for record in value: + for v, s in zip(record, self.specs): + data.append(s.write(frame, v)) + return ''.join(data) + + def validate(self, frame, value): + if value is None: return [] + if self.sep and isinstance(value, basestring): + value = value.split(self.sep) + if isinstance(value, list): + if len(self.specs) == 1: + return [self.specs[0].validate(frame, v) for v in value] + else: + return [ + [s.validate(frame, v) for (v,s) in zip(val, self.specs)] + for val in value ] + raise ValueError, 'Invalid MultiSpec data: %r' % value + +class EncodedNumericTextSpec(EncodedTextSpec): pass +class EncodedNumericPartTextSpec(EncodedTextSpec): pass + +class Latin1TextSpec(EncodedTextSpec): + def read(self, frame, data): + if '\x00' in data: data, ret = data.split('\x00',1) + else: ret = '' + return data.decode('latin1'), ret + + def write(self, data, value): + return value.encode('latin1') + '\x00' + + def validate(self, frame, value): return unicode(value) + +class ID3TimeStamp(object): + """A time stamp in ID3v2 format. + + This is a restricted form of the ISO 8601 standard; time stamps + take the form of: + YYYY-MM-DD HH:MM:SS + Or some partial form (YYYY-MM-DD HH, YYYY, etc.). + + The 'text' attribute contains the raw text data of the time stamp. + """ + + import re + def __init__(self, text): + if isinstance(text, ID3TimeStamp): text = text.text + self.text = text + + __formats = ['%04d'] + ['%02d'] * 5 + __seps = ['-', '-', ' ', ':', ':', 'x'] + def get_text(self): + parts = [self.year, self.month, self.day, + self.hour, self.minute, self.second] + pieces = [] + for i, part in enumerate(iter(iter(parts).next, None)): + pieces.append(self.__formats[i]%part + self.__seps[i]) + return u''.join(pieces)[:-1] + + def set_text(self, text, splitre=re.compile('[-T:/.]|\s+')): + year, month, day, hour, minute, second = \ + splitre.split(text + ':::::')[:6] + for a in 'year month day hour minute second'.split(): + try: v = int(locals()[a]) + except ValueError: v = None + setattr(self, a, v) + + text = property(get_text, set_text, doc="ID3v2.4 date and time.") + + def __str__(self): return self.text + def __repr__(self): return repr(self.text) + def __cmp__(self, other): return cmp(self.text, other.text) + def encode(self, *args): return self.text.encode(*args) + +class TimeStampSpec(EncodedTextSpec): + def read(self, frame, data): + value, data = super(TimeStampSpec, self).read(frame, data) + return self.validate(frame, value), data + + def write(self, frame, data): + return super(TimeStampSpec, self).write(frame, + data.text.replace(' ', 'T')) + + def validate(self, frame, value): + try: return ID3TimeStamp(value) + except TypeError: raise ValueError, "Invalid ID3TimeStamp: %r" % value + +class ChannelSpec(ByteSpec): + (OTHER, MASTER, FRONTRIGHT, FRONTLEFT, BACKRIGHT, BACKLEFT, FRONTCENTRE, + BACKCENTRE, SUBWOOFER) = range(9) + +class VolumeAdjustmentSpec(Spec): + def read(self, frame, data): + value, = unpack('>h', data[0:2]) + return value/512.0, data[2:] + + def write(self, frame, value): + return pack('>h', int(round(value * 512))) + + def validate(self, frame, value): return value + +class VolumePeakSpec(Spec): + def read(self, frame, data): + # http://bugs.xmms.org/attachment.cgi?id=113&action=view + peak = 0 + bits = ord(data[0]) + bytes = min(4, (bits + 7) >> 3) + # not enough frame data + if bytes + 1 > len(data): raise ID3JunkFrameError + shift = ((8 - (bits & 7)) & 7) + (4 - bytes) * 8 + for i in range(1, bytes+1): + peak *= 256 + peak += ord(data[i]) + peak *= 2**shift + return (float(peak) / (2**31-1)), data[1+bytes:] + + def write(self, frame, value): + # always write as 16 bits for sanity. + return "\x10" + pack('>H', int(round(value * 32768))) + + def validate(self, frame, value): return value + +class SynchronizedTextSpec(EncodedTextSpec): + def read(self, frame, data): + texts = [] + encoding, term = self._encodings[frame.encoding] + while data: + l = len(term) + try: + value_idx = data.index(term) + except ValueError: + raise ID3JunkFrameError + value = data[:value_idx].decode(encoding) + time, = struct.unpack(">I", data[value_idx+l:value_idx+l+4]) + texts.append((value, time)) + data = data[value_idx+l+4:] + return texts, "" + + def write(self, frame, value): + data = [] + encoding, term = self._encodings[frame.encoding] + for text, time in frame.text: + text = text.encode(encoding) + term + data.append(text + struct.pack(">I", time)) + return "".join(data) + + def validate(self, frame, value): + return value + +class KeyEventSpec(Spec): + def read(self, frame, data): + events = [] + while len(data) >= 5: + events.append(struct.unpack(">bI", data[:5])) + data = data[5:] + return events, data + + def write(self, frame, value): + return "".join([struct.pack(">bI", *event) for event in value]) + + def validate(self, frame, value): + return value + +class VolumeAdjustmentsSpec(Spec): + # Not to be confused with VolumeAdjustmentSpec. + def read(self, frame, data): + adjustments = {} + while len(data) >= 4: + freq, adj = struct.unpack(">Hh", data[:4]) + data = data[4:] + freq /= 2.0 + adj /= 512.0 + adjustments[freq] = adj + adjustments = adjustments.items() + adjustments.sort() + return adjustments, data + + def write(self, frame, value): + value.sort() + return "".join([struct.pack(">Hh", int(freq * 2), int(adj * 512)) + for (freq, adj) in value]) + + def validate(self, frame, value): + return value + +class ASPIIndexSpec(Spec): + def read(self, frame, data): + if frame.b == 16: + format = "H" + size = 2 + elif frame.b == 8: + format = "B" + size = 1 + else: + warn("invalid bit count in ASPI (%d)" % frame.b, ID3Warning) + return [], data + + indexes = data[:frame.N * size] + data = data[frame.N * size:] + return list(struct.unpack(">" + format * frame.N, indexes)), data + + def write(self, frame, values): + if frame.b == 16: format = "H" + elif frame.b == 8: format = "B" + else: raise ValueError("frame.b must be 8 or 16") + return struct.pack(">" + format * frame.N, *values) + + def validate(self, frame, values): + return values + +class Frame(object): + """Fundamental unit of ID3 data. + + ID3 tags are split into frames. Each frame has a potentially + different structure, and so this base class is not very featureful. + """ + + FLAG23_ALTERTAG = 0x8000 + FLAG23_ALTERFILE = 0x4000 + FLAG23_READONLY = 0x2000 + FLAG23_COMPRESS = 0x0080 + FLAG23_ENCRYPT = 0x0040 + FLAG23_GROUP = 0x0020 + + FLAG24_ALTERTAG = 0x4000 + FLAG24_ALTERFILE = 0x2000 + FLAG24_READONLY = 0x1000 + FLAG24_GROUPID = 0x0040 + FLAG24_COMPRESS = 0x0008 + FLAG24_ENCRYPT = 0x0004 + FLAG24_UNSYNCH = 0x0002 + FLAG24_DATALEN = 0x0001 + + _framespec = [] + def __init__(self, *args, **kwargs): + if len(args)==1 and len(kwargs)==0 and isinstance(args[0], type(self)): + other = args[0] + for checker in self._framespec: + val = checker.validate(self, getattr(other, checker.name)) + setattr(self, checker.name, val) + else: + for checker, val in zip(self._framespec, args): + setattr(self, checker.name, checker.validate(self, val)) + for checker in self._framespec[len(args):]: + validated = checker.validate( + self, kwargs.get(checker.name, None)) + setattr(self, checker.name, validated) + + HashKey = property( + lambda s: s.FrameID, + doc="an internal key used to ensure frame uniqueness in a tag") + FrameID = property( + lambda s: type(s).__name__, + doc="ID3v2 three or four character frame ID") + + def __repr__(self): + """Python representation of a frame. + + The string returned is a valid Python expression to construct + a copy of this frame. + """ + kw = [] + for attr in self._framespec: + kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) + return '%s(%s)' % (type(self).__name__, ', '.join(kw)) + + def _readData(self, data): + odata = data + for reader in self._framespec: + if len(data): + try: value, data = reader.read(self, data) + except UnicodeDecodeError: + raise ID3JunkFrameError + else: raise ID3JunkFrameError + setattr(self, reader.name, value) + if data.strip('\x00'): + warn('Leftover data: %s: %r (from %r)' % ( + type(self).__name__, data, odata), + ID3Warning) + + def _writeData(self): + data = [] + for writer in self._framespec: + data.append(writer.write(self, getattr(self, writer.name))) + return ''.join(data) + + def pprint(self): + """Return a human-readable representation of the frame.""" + return "%s=%s" % (type(self).__name__, self._pprint()) + + def _pprint(self): + return "[unrepresentable data]" + + def fromData(cls, id3, tflags, data): + """Construct this ID3 frame from raw string data.""" + + if (2,4,0) <= id3.version: + if tflags & (Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN): + # The data length int is syncsafe in 2.4 (but not 2.3). + # However, we don't actually need the data length int, + # except to work around a QL 0.12 bug, and in that case + # all we need are the raw bytes. + datalen_bytes = data[:4] + data = data[4:] + if tflags & Frame.FLAG24_UNSYNCH or id3.f_unsynch: + try: data = unsynch.decode(data) + except ValueError, err: + if id3.PEDANTIC: + raise ID3BadUnsynchData, '%s: %r' % (err, data) + if tflags & Frame.FLAG24_ENCRYPT: + raise ID3EncryptionUnsupportedError + if tflags & Frame.FLAG24_COMPRESS: + try: data = data.decode('zlib') + except zlibError, err: + # the initial mutagen that went out with QL 0.12 did not + # write the 4 bytes of uncompressed size. Compensate. + data = datalen_bytes + data + try: data = data.decode('zlib') + except zlibError, err: + if id3.PEDANTIC: + raise ID3BadCompressedData, '%s: %r' % (err, data) + + elif (2,3,0) <= id3.version: + if tflags & Frame.FLAG23_COMPRESS: + usize, = unpack('>L', data[:4]) + data = data[4:] + if tflags & Frame.FLAG23_ENCRYPT: + raise ID3EncryptionUnsupportedError + if tflags & Frame.FLAG23_COMPRESS: + try: data = data.decode('zlib') + except zlibError, err: + if id3.PEDANTIC: + raise ID3BadCompressedData, '%s: %r' % (err, data) + + frame = cls() + frame._rawdata = data + frame._flags = tflags + frame._readData(data) + return frame + fromData = classmethod(fromData) + + def __hash__(self): + raise TypeError("Frame objects are unhashable") + +class FrameOpt(Frame): + """A frame with optional parts. + + Some ID3 frames have optional data; this class extends Frame to + provide support for those parts. + """ + _optionalspec = [] + + def __init__(self, *args, **kwargs): + super(FrameOpt, self).__init__(*args, **kwargs) + for spec in self._optionalspec: + if spec.name in kwargs: + validated = spec.validate(self, kwargs[spec.name]) + setattr(self, spec.name, validated) + else: break + + def _readData(self, data): + odata = data + for reader in self._framespec: + if len(data): value, data = reader.read(self, data) + else: raise ID3JunkFrameError + setattr(self, reader.name, value) + if data: + for reader in self._optionalspec: + if len(data): value, data = reader.read(self, data) + else: break + setattr(self, reader.name, value) + if data.strip('\x00'): + warn('Leftover data: %s: %r (from %r)' % ( + type(self).__name__, data, odata), + ID3Warning) + + def _writeData(self): + data = [] + for writer in self._framespec: + data.append(writer.write(self, getattr(self, writer.name))) + for writer in self._optionalspec: + try: data.append(writer.write(self, getattr(self, writer.name))) + except AttributeError: break + return ''.join(data) + + def __repr__(self): + kw = [] + for attr in self._framespec: + kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) + for attr in self._optionalspec: + if hasattr(self, attr.name): + kw.append('%s=%r' % (attr.name, getattr(self, attr.name))) + return '%s(%s)' % (type(self).__name__, ', '.join(kw)) + + +class TextFrame(Frame): + """Text strings. + + Text frames support casts to unicode or str objects, as well as + list-like indexing, extend, and append. + + Iterating over a TextFrame iterates over its strings, not its + characters. + + Text frames have a 'text' attribute which is the list of strings, + and an 'encoding' attribute; 0 for ISO-8859 1, 1 UTF-16, 2 for + UTF-16BE, and 3 for UTF-8. If you don't want to worry about + encodings, just set it to 3. + """ + + _framespec = [ EncodingSpec('encoding'), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000') ] + def __str__(self): return self.__unicode__().encode('utf-8') + def __unicode__(self): return u'\u0000'.join(self.text) + def __eq__(self, other): + if isinstance(other, str): return str(self) == other + elif isinstance(other, unicode): return unicode(self) == other + return self.text == other + def __getitem__(self, item): return self.text[item] + def __iter__(self): return iter(self.text) + def append(self, value): return self.text.append(value) + def extend(self, value): return self.text.extend(value) + def _pprint(self): return " / ".join(self.text) + +class NumericTextFrame(TextFrame): + """Numerical text strings. + + The numeric value of these frames can be gotten with unary plus, e.g. + frame = TLEN('12345') + length = +frame + """ + + _framespec = [ EncodingSpec('encoding'), + MultiSpec('text', EncodedNumericTextSpec('text'), sep=u'\u0000') ] + + def __pos__(self): + """Return the numerical value of the string.""" + return int(self.text[0]) + +class NumericPartTextFrame(TextFrame): + """Multivalue numerical text strings. + + These strings indicate 'part (e.g. track) X of Y', and unary plus + returns the first value: + frame = TRCK('4/15') + track = +frame # track == 4 + """ + + _framespec = [ EncodingSpec('encoding'), + MultiSpec('text', EncodedNumericPartTextSpec('text'), sep=u'\u0000') ] + def __pos__(self): + return int(self.text[0].split("/")[0]) + +class TimeStampTextFrame(TextFrame): + """A list of time stamps. + + The 'text' attribute in this frame is a list of ID3TimeStamp + objects, not a list of strings. + """ + + _framespec = [ EncodingSpec('encoding'), + MultiSpec('text', TimeStampSpec('stamp'), sep=u',') ] + def __str__(self): return self.__unicode__().encode('utf-8') + def __unicode__(self): return ','.join([stamp.text for stamp in self.text]) + def _pprint(self): + return " / ".join([stamp.text for stamp in self.text]) + +class UrlFrame(Frame): + """A frame containing a URL string. + + The ID3 specification is silent about IRIs and normalized URL + forms. Mutagen assumes all URLs in files are encoded as Latin 1, + but string conversion of this frame returns a UTF-8 representation + for compatibility with other string conversions. + + The only sane way to handle URLs in MP3s is to restrict them to + ASCII. + """ + + _framespec = [ Latin1TextSpec('url') ] + def __str__(self): return self.url.encode('utf-8') + def __unicode__(self): return self.url + def __eq__(self, other): return self.url == other + def _pprint(self): return self.url + +class UrlFrameU(UrlFrame): + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.url)) + +class TALB(TextFrame): "Album" +class TBPM(NumericTextFrame): "Beats per minute" +class TCOM(TextFrame): "Composer" + +class TCON(TextFrame): + """Content type (Genre) + + ID3 has several ways genres can be represented; for convenience, + use the 'genres' property rather than the 'text' attribute. + """ + + from lib.mutagen._constants import GENRES + + def __get_genres(self): + genres = [] + import re + genre_re = re.compile(r"((?:\((?P[0-9]+|RX|CR)\))*)(?P.+)?") + for value in self.text: + if value.isdigit(): + try: genres.append(self.GENRES[int(value)]) + except IndexError: genres.append(u"Unknown") + elif value == "CR": genres.append(u"Cover") + elif value == "RX": genres.append(u"Remix") + elif value: + newgenres = [] + genreid, dummy, genrename = genre_re.match(value).groups() + + if genreid: + for gid in genreid[1:-1].split(")("): + if gid.isdigit() and int(gid) < len(self.GENRES): + gid = unicode(self.GENRES[int(gid)]) + newgenres.append(gid) + elif gid == "CR": newgenres.append(u"Cover") + elif gid == "RX": newgenres.append(u"Remix") + else: newgenres.append(u"Unknown") + + if genrename: + # "Unescaping" the first parenthesis + if genrename.startswith("(("): genrename = genrename[1:] + if genrename not in newgenres: newgenres.append(genrename) + + genres.extend(newgenres) + + return genres + + def __set_genres(self, genres): + if isinstance(genres, basestring): genres = [genres] + self.text = map(self.__decode, genres) + + def __decode(self, value): + if isinstance(value, str): + enc = EncodedTextSpec._encodings[self.encoding][0] + return value.decode(enc) + else: return value + + genres = property(__get_genres, __set_genres, None, + "A list of genres parsed from the raw text data.") + + def _pprint(self): + return " / ".join(self.genres) + +class TCOP(TextFrame): "Copyright (c)" +class TCMP(NumericTextFrame): "iTunes Compilation Flag" +class TDAT(TextFrame): "Date of recording (DDMM)" +class TDEN(TimeStampTextFrame): "Encoding Time" +class TDOR(TimeStampTextFrame): "Original Release Time" +class TDLY(NumericTextFrame): "Audio Delay (ms)" +class TDRC(TimeStampTextFrame): "Recording Time" +class TDRL(TimeStampTextFrame): "Release Time" +class TDTG(TimeStampTextFrame): "Tagging Time" +class TENC(TextFrame): "Encoder" +class TEXT(TextFrame): "Lyricist" +class TFLT(TextFrame): "File type" +class TIME(TextFrame): "Time of recording (HHMM)" +class TIT1(TextFrame): "Content group description" +class TIT2(TextFrame): "Title" +class TIT3(TextFrame): "Subtitle/Description refinement" +class TKEY(TextFrame): "Starting Key" +class TLAN(TextFrame): "Audio Languages" +class TLEN(NumericTextFrame): "Audio Length (ms)" +class TMED(TextFrame): "Source Media Type" +class TMOO(TextFrame): "Mood" +class TOAL(TextFrame): "Original Album" +class TOFN(TextFrame): "Original Filename" +class TOLY(TextFrame): "Original Lyricist" +class TOPE(TextFrame): "Original Artist/Performer" +class TORY(NumericTextFrame): "Original Release Year" +class TOWN(TextFrame): "Owner/Licensee" +class TPE1(TextFrame): "Lead Artist/Performer/Soloist/Group" +class TPE2(TextFrame): "Band/Orchestra/Accompaniment" +class TPE3(TextFrame): "Conductor" +class TPE4(TextFrame): "Interpreter/Remixer/Modifier" +class TPOS(NumericPartTextFrame): "Part of set" +class TPRO(TextFrame): "Produced (P)" +class TPUB(TextFrame): "Publisher" +class TRCK(NumericPartTextFrame): "Track Number" +class TRDA(TextFrame): "Recording Dates" +class TRSN(TextFrame): "Internet Radio Station Name" +class TRSO(TextFrame): "Internet Radio Station Owner" +class TSIZ(NumericTextFrame): "Size of audio data (bytes)" +class TSO2(TextFrame): "iTunes Album Artist Sort" +class TSOA(TextFrame): "Album Sort Order key" +class TSOC(TextFrame): "iTunes Composer Sort" +class TSOP(TextFrame): "Perfomer Sort Order key" +class TSOT(TextFrame): "Title Sort Order key" +class TSRC(TextFrame): "International Standard Recording Code (ISRC)" +class TSSE(TextFrame): "Encoder settings" +class TSST(TextFrame): "Set Subtitle" +class TYER(NumericTextFrame): "Year of recording" + +class TXXX(TextFrame): + """User-defined text data. + + TXXX frames have a 'desc' attribute which is set to any Unicode + value (though the encoding of the text and the description must be + the same). Many taggers use this frame to store freeform keys. + """ + _framespec = [ EncodingSpec('encoding'), EncodedTextSpec('desc'), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000') ] + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc)) + def _pprint(self): return "%s=%s" % (self.desc, " / ".join(self.text)) + +class WCOM(UrlFrameU): "Commercial Information" +class WCOP(UrlFrame): "Copyright Information" +class WOAF(UrlFrame): "Official File Information" +class WOAR(UrlFrameU): "Official Artist/Performer Information" +class WOAS(UrlFrame): "Official Source Information" +class WORS(UrlFrame): "Official Internet Radio Information" +class WPAY(UrlFrame): "Payment Information" +class WPUB(UrlFrame): "Official Publisher Information" + +class WXXX(UrlFrame): + """User-defined URL data. + + Like TXXX, this has a freeform description associated with it. + """ + _framespec = [ EncodingSpec('encoding'), EncodedTextSpec('desc'), + Latin1TextSpec('url') ] + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc)) + +class PairedTextFrame(Frame): + """Paired text strings. + + Some ID3 frames pair text strings, to associate names with a more + specific involvement in the song. The 'people' attribute of these + frames contains a list of pairs: + [['trumpet', 'Miles Davis'], ['bass', 'Paul Chambers']] + + Like text frames, these frames also have an encoding attribute. + """ + + _framespec = [ EncodingSpec('encoding'), MultiSpec('people', + EncodedTextSpec('involvement'), EncodedTextSpec('person')) ] + def __eq__(self, other): + return self.people == other + +class TIPL(PairedTextFrame): "Involved People List" +class TMCL(PairedTextFrame): "Musicians Credits List" +class IPLS(TIPL): "Involved People List" + +class MCDI(Frame): + """Binary dump of CD's TOC. + + The 'data' attribute contains the raw byte string. + """ + _framespec = [ BinaryDataSpec('data') ] + def __eq__(self, other): return self.data == other + +class ETCO(Frame): + """Event timing codes.""" + _framespec = [ ByteSpec("format"), KeyEventSpec("events") ] + def __eq__(self, other): return self.events == other + +class MLLT(Frame): + """MPEG location lookup table. + + This frame's attributes may be changed in the future based on + feedback from real-world use. + """ + _framespec = [ SizedIntegerSpec('frames', 2), + SizedIntegerSpec('bytes', 3), + SizedIntegerSpec('milliseconds', 3), + ByteSpec('bits_for_bytes'), + ByteSpec('bits_for_milliseconds'), + BinaryDataSpec('data') ] + def __eq__(self, other): return self.data == other + +class SYTC(Frame): + """Synchronised tempo codes. + + This frame's attributes may be changed in the future based on + feedback from real-world use. + """ + _framespec = [ ByteSpec("format"), BinaryDataSpec("data") ] + def __eq__(self, other): return self.data == other + +class USLT(Frame): + """Unsynchronised lyrics/text transcription. + + Lyrics have a three letter ISO language code ('lang'), a + description ('desc'), and a block of plain text ('text'). + """ + + _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3), + EncodedTextSpec('desc'), EncodedTextSpec('text') ] + HashKey = property(lambda s: '%s:%s:%r' % (s.FrameID, s.desc, s.lang)) + + def __str__(self): return self.text.encode('utf-8') + def __unicode__(self): return self.text + def __eq__(self, other): return self.text == other + +class SYLT(Frame): + """Synchronised lyrics/text.""" + + _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3), + ByteSpec('format'), ByteSpec('type'), EncodedTextSpec('desc'), + SynchronizedTextSpec('text') ] + HashKey = property(lambda s: '%s:%s:%r' % (s.FrameID, s.desc, s.lang)) + + def __eq__(self, other): + return str(self) == other + + def __str__(self): + return "".join([text for (text, time) in self.text]).encode('utf-8') + +class COMM(TextFrame): + """User comment. + + User comment frames have a descrption, like TXXX, and also a three + letter ISO language code in the 'lang' attribute. + """ + _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3), + EncodedTextSpec('desc'), + MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000') ] + HashKey = property(lambda s: '%s:%s:%r' % (s.FrameID, s.desc, s.lang)) + def _pprint(self): return "%s=%r=%s" % ( + self.desc, self.lang, " / ".join(self.text)) + +class RVA2(Frame): + """Relative volume adjustment (2). + + This frame is used to implemented volume scaling, and in + particular, normalization using ReplayGain. + + Attributes: + desc -- description or context of this adjustment + channel -- audio channel to adjust (master is 1) + gain -- a + or - dB gain relative to some reference level + peak -- peak of the audio as a floating point number, [0, 1] + + When storing ReplayGain tags, use descriptions of 'album' and + 'track' on channel 1. + """ + + _framespec = [ Latin1TextSpec('desc'), ChannelSpec('channel'), + VolumeAdjustmentSpec('gain'), VolumePeakSpec('peak') ] + _channels = ["Other", "Master volume", "Front right", "Front left", + "Back right", "Back left", "Front centre", "Back centre", + "Subwoofer"] + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc)) + + def __eq__(self, other): + return ((str(self) == other) or + (self.desc == other.desc and + self.channel == other.channel and + self.gain == other.gain and + self.peak == other.peak)) + + def __str__(self): + return "%s: %+0.4f dB/%0.4f" % ( + self._channels[self.channel], self.gain, self.peak) + +class EQU2(Frame): + """Equalisation (2). + + Attributes: + method -- interpolation method (0 = band, 1 = linear) + desc -- identifying description + adjustments -- list of (frequency, vol_adjustment) pairs + """ + _framespec = [ ByteSpec("method"), Latin1TextSpec("desc"), + VolumeAdjustmentsSpec("adjustments") ] + def __eq__(self, other): return self.adjustments == other + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc)) + +# class RVAD: unsupported +# class EQUA: unsupported + +class RVRB(Frame): + """Reverb.""" + _framespec = [ SizedIntegerSpec('left', 2), SizedIntegerSpec('right', 2), + ByteSpec('bounce_left'), ByteSpec('bounce_right'), + ByteSpec('feedback_ltl'), ByteSpec('feedback_ltr'), + ByteSpec('feedback_rtr'), ByteSpec('feedback_rtl'), + ByteSpec('premix_ltr'), ByteSpec('premix_rtl') ] + + def __eq__(self, other): return (self.left, self.right) == other + +class APIC(Frame): + """Attached (or linked) Picture. + + Attributes: + encoding -- text encoding for the description + mime -- a MIME type (e.g. image/jpeg) or '-->' if the data is a URI + type -- the source of the image (3 is the album front cover) + desc -- a text description of the image + data -- raw image data, as a byte string + + Mutagen will automatically compress large images when saving tags. + """ + _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('mime'), + ByteSpec('type'), EncodedTextSpec('desc'), BinaryDataSpec('data') ] + def __eq__(self, other): return self.data == other + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc)) + def _pprint(self): + return "%s (%s, %d bytes)" % ( + self.desc, self.mime, len(self.data)) + +class PCNT(Frame): + """Play counter. + + The 'count' attribute contains the (recorded) number of times this + file has been played. + + This frame is basically obsoleted by POPM. + """ + _framespec = [ IntegerSpec('count') ] + + def __eq__(self, other): return self.count == other + def __pos__(self): return self.count + def _pprint(self): return unicode(self.count) + +class POPM(FrameOpt): + """Popularimeter. + + This frame keys a rating (out of 255) and a play count to an email + address. + + Attributes: + email -- email this POPM frame is for + rating -- rating from 0 to 255 + count -- number of times the files has been played (optional) + """ + _framespec = [ Latin1TextSpec('email'), ByteSpec('rating') ] + _optionalspec = [ IntegerSpec('count') ] + + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.email)) + + def __eq__(self, other): return self.rating == other + def __pos__(self): return self.rating + def _pprint(self): return "%s=%r %r/255" % ( + self.email, getattr(self, 'count', None), self.rating) + +class GEOB(Frame): + """General Encapsulated Object. + + A blob of binary data, that is not a picture (those go in APIC). + + Attributes: + encoding -- encoding of the description + mime -- MIME type of the data or '-->' if the data is a URI + filename -- suggested filename if extracted + desc -- text description of the data + data -- raw data, as a byte string + """ + _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('mime'), + EncodedTextSpec('filename'), EncodedTextSpec('desc'), + BinaryDataSpec('data') ] + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc)) + + def __eq__(self, other): return self.data == other + +class RBUF(FrameOpt): + """Recommended buffer size. + + Attributes: + size -- recommended buffer size in bytes + info -- if ID3 tags may be elsewhere in the file (optional) + offset -- the location of the next ID3 tag, if any + + Mutagen will not find the next tag itself. + """ + _framespec = [ SizedIntegerSpec('size', 3) ] + _optionalspec = [ ByteSpec('info'), SizedIntegerSpec('offset', 4) ] + + def __eq__(self, other): return self.size == other + def __pos__(self): return self.size + +class AENC(FrameOpt): + """Audio encryption. + + Attributes: + owner -- key identifying this encryption type + preview_start -- unencrypted data block offset + preview_length -- number of unencrypted blocks + data -- data required for decryption (optional) + + Mutagen cannot decrypt files. + """ + _framespec = [ Latin1TextSpec('owner'), + SizedIntegerSpec('preview_start', 2), + SizedIntegerSpec('preview_length', 2) ] + _optionalspec = [ BinaryDataSpec('data') ] + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.owner)) + + def __str__(self): return self.owner.encode('utf-8') + def __unicode__(self): return self.owner + def __eq__(self, other): return self.owner == other + +class LINK(FrameOpt): + """Linked information. + + Attributes: + frameid -- the ID of the linked frame + url -- the location of the linked frame + data -- further ID information for the frame + """ + + _framespec = [ StringSpec('frameid', 4), Latin1TextSpec('url') ] + _optionalspec = [ BinaryDataSpec('data') ] + def __HashKey(self): + try: + return "%s:%s:%s:%r" % ( + self.FrameID, self.frameid, self.url, self.data) + except AttributeError: + return "%s:%s:%s" % (self.FrameID, self.frameid, self.url) + HashKey = property(__HashKey) + def __eq__(self, other): + try: return (self.frameid, self.url, self.data) == other + except AttributeError: return (self.frameid, self.url) == other + +class POSS(Frame): + """Position synchronisation frame + + Attribute: + format -- format of the position attribute (frames or milliseconds) + position -- current position of the file + """ + _framespec = [ ByteSpec('format'), IntegerSpec('position') ] + + def __pos__(self): return self.position + def __eq__(self, other): return self.position == other + +class UFID(Frame): + """Unique file identifier. + + Attributes: + owner -- format/type of identifier + data -- identifier + """ + + _framespec = [ Latin1TextSpec('owner'), BinaryDataSpec('data') ] + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.owner)) + def __eq__(s, o): + if isinstance(o, UFI): return s.owner == o.owner and s.data == o.data + else: return s.data == o + def _pprint(self): + isascii = ord(max(self.data)) < 128 + if isascii: return "%s=%s" % (self.owner, self.data) + else: return "%s (%d bytes)" % (self.owner, len(self.data)) + +class USER(Frame): + """Terms of use. + + Attributes: + encoding -- text encoding + lang -- ISO three letter language code + text -- licensing terms for the audio + """ + _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3), + EncodedTextSpec('text') ] + HashKey = property(lambda s: '%s:%r' % (s.FrameID, s.lang)) + + def __str__(self): return self.text.encode('utf-8') + def __unicode__(self): return self.text + def __eq__(self, other): return self.text == other + def _pprint(self): return "%r=%s" % (self.lang, self.text) + +class OWNE(Frame): + """Ownership frame.""" + _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'), + StringSpec('date', 8), EncodedTextSpec('seller') ] + + def __str__(self): return self.seller.encode('utf-8') + def __unicode__(self): return self.seller + def __eq__(self, other): return self.seller == other + +class COMR(FrameOpt): + """Commercial frame.""" + _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'), + StringSpec('valid_until', 8), Latin1TextSpec('contact'), + ByteSpec('format'), EncodedTextSpec('seller'), + EncodedTextSpec('desc')] + _optionalspec = [ Latin1TextSpec('mime'), BinaryDataSpec('logo') ] + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s._writeData())) + def __eq__(self, other): return self._writeData() == other._writeData() + +class ENCR(Frame): + """Encryption method registration. + + The standard does not allow multiple ENCR frames with the same owner + or the same method. Mutagen only verifies that the owner is unique. + """ + _framespec = [ Latin1TextSpec('owner'), ByteSpec('method'), + BinaryDataSpec('data') ] + HashKey = property(lambda s: "%s:%s" % (s.FrameID, s.owner)) + def __str__(self): return self.data + def __eq__(self, other): return self.data == other + +class GRID(FrameOpt): + """Group identification registration.""" + _framespec = [ Latin1TextSpec('owner'), ByteSpec('group') ] + _optionalspec = [ BinaryDataSpec('data') ] + HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.group)) + def __pos__(self): return self.group + def __str__(self): return self.owner.encode('utf-8') + def __unicode__(self): return self.owner + def __eq__(self, other): return self.owner == other or self.group == other + + +class PRIV(Frame): + """Private frame.""" + _framespec = [ Latin1TextSpec('owner'), BinaryDataSpec('data') ] + HashKey = property(lambda s: '%s:%s:%s' % ( + s.FrameID, s.owner, s.data.decode('latin1'))) + def __str__(self): return self.data + def __eq__(self, other): return self.data == other + def _pprint(self): + isascii = ord(max(self.data)) < 128 + if isascii: return "%s=%s" % (self.owner, self.data) + else: return "%s (%d bytes)" % (self.owner, len(self.data)) + +class SIGN(Frame): + """Signature frame.""" + _framespec = [ ByteSpec('group'), BinaryDataSpec('sig') ] + HashKey = property(lambda s: '%s:%c:%s' % (s.FrameID, s.group, s.sig)) + def __str__(self): return self.sig + def __eq__(self, other): return self.sig == other + +class SEEK(Frame): + """Seek frame. + + Mutagen does not find tags at seek offsets. + """ + _framespec = [ IntegerSpec('offset') ] + def __pos__(self): return self.offset + def __eq__(self, other): return self.offset == other + +class ASPI(Frame): + """Audio seek point index. + + Attributes: S, L, N, b, and Fi. For the meaning of these, see + the ID3v2.4 specification. Fi is a list of integers. + """ + _framespec = [ SizedIntegerSpec("S", 4), SizedIntegerSpec("L", 4), + SizedIntegerSpec("N", 2), ByteSpec("b"), + ASPIIndexSpec("Fi") ] + def __eq__(self, other): return self.Fi == other + +Frames = dict([(k,v) for (k,v) in globals().items() + if len(k)==4 and isinstance(v, type) and issubclass(v, Frame)]) +"""All supported ID3v2 frames, keyed by frame name.""" +del(k); del(v) + +# ID3v2.2 frames +class UFI(UFID): "Unique File Identifier" + +class TT1(TIT1): "Content group description" +class TT2(TIT2): "Title" +class TT3(TIT3): "Subtitle/Description refinement" +class TP1(TPE1): "Lead Artist/Performer/Soloist/Group" +class TP2(TPE2): "Band/Orchestra/Accompaniment" +class TP3(TPE3): "Conductor" +class TP4(TPE4): "Interpreter/Remixer/Modifier" +class TCM(TCOM): "Composer" +class TXT(TEXT): "Lyricist" +class TLA(TLAN): "Audio Language(s)" +class TCO(TCON): "Content Type (Genre)" +class TAL(TALB): "Album" +class TPA(TPOS): "Part of set" +class TRK(TRCK): "Track Number" +class TRC(TSRC): "International Standard Recording Code (ISRC)" +class TYE(TYER): "Year of recording" +class TDA(TDAT): "Date of recording (DDMM)" +class TIM(TIME): "Time of recording (HHMM)" +class TRD(TRDA): "Recording Dates" +class TMT(TMED): "Source Media Type" +class TFT(TFLT): "File Type" +class TBP(TBPM): "Beats per minute" +class TCP(TCMP): "iTunes Compilation Flag" +class TCR(TCOP): "Copyright (C)" +class TPB(TPUB): "Publisher" +class TEN(TENC): "Encoder" +class TSS(TSSE): "Encoder settings" +class TOF(TOFN): "Original Filename" +class TLE(TLEN): "Audio Length (ms)" +class TSI(TSIZ): "Audio Data size (bytes)" +class TDY(TDLY): "Audio Delay (ms)" +class TKE(TKEY): "Starting Key" +class TOT(TOAL): "Original Album" +class TOA(TOPE): "Original Artist/Perfomer" +class TOL(TOLY): "Original Lyricist" +class TOR(TORY): "Original Release Year" + +class TXX(TXXX): "User-defined Text" + +class WAF(WOAF): "Official File Information" +class WAR(WOAR): "Official Artist/Performer Information" +class WAS(WOAS): "Official Source Information" +class WCM(WCOM): "Commercial Information" +class WCP(WCOP): "Copyright Information" +class WPB(WPUB): "Official Publisher Information" + +class WXX(WXXX): "User-defined URL" + +class IPL(IPLS): "Involved people list" +class MCI(MCDI): "Binary dump of CD's TOC" +class ETC(ETCO): "Event timing codes" +class MLL(MLLT): "MPEG location lookup table" +class STC(SYTC): "Synced tempo codes" +class ULT(USLT): "Unsychronised lyrics/text transcription" +class SLT(SYLT): "Synchronised lyrics/text" +class COM(COMM): "Comment" +#class RVA(RVAD) +#class EQU(EQUA) +class REV(RVRB): "Reverb" +class PIC(APIC): + """Attached Picture. + + The 'mime' attribute of an ID3v2.2 attached picture must be either + 'PNG' or 'JPG'. + """ + _framespec = [ EncodingSpec('encoding'), StringSpec('mime', 3), + ByteSpec('type'), EncodedTextSpec('desc'), BinaryDataSpec('data') ] +class GEO(GEOB): "General Encapsulated Object" +class CNT(PCNT): "Play counter" +class POP(POPM): "Popularimeter" +class BUF(RBUF): "Recommended buffer size" + +class CRM(Frame): + """Encrypted meta frame""" + _framespec = [ Latin1TextSpec('owner'), Latin1TextSpec('desc'), + BinaryDataSpec('data') ] + def __eq__(self, other): return self.data == other + +class CRA(AENC): "Audio encryption" + +class LNK(LINK): + """Linked information""" + _framespec = [ StringSpec('frameid', 3), Latin1TextSpec('url') ] + _optionalspec = [ BinaryDataSpec('data') ] + +Frames_2_2 = dict([(k,v) for (k,v) in globals().items() + if len(k)==3 and isinstance(v, type) and issubclass(v, Frame)]) + +# support open(filename) as interface +Open = ID3 + +# ID3v1.1 support. +def ParseID3v1(string): + """Parse an ID3v1 tag, returning a list of ID3v2.4 frames.""" + from struct import error as StructError + frames = {} + try: + tag, title, artist, album, year, comment, track, genre = unpack( + "3s30s30s30s4s29sBB", string) + except StructError: return None + + if tag != "TAG": return None + def fix(string): + return string.split("\x00")[0].strip().decode('latin1') + title, artist, album, year, comment = map( + fix, [title, artist, album, year, comment]) + + if title: frames["TIT2"] = TIT2(encoding=0, text=title) + if artist: frames["TPE1"] = TPE1(encoding=0, text=[artist]) + if album: frames["TALB"] = TALB(encoding=0, text=album) + if year: frames["TDRC"] = TDRC(encoding=0, text=year) + if comment: frames["COMM"] = COMM( + encoding=0, lang="eng", desc="ID3v1 Comment", text=comment) + # Don't read a track number if it looks like the comment was + # padded with spaces instead of nulls (thanks, WinAmp). + if track and (track != 32 or string[-3] == '\x00'): + frames["TRCK"] = TRCK(encoding=0, text=str(track)) + if genre != 255: frames["TCON"] = TCON(encoding=0, text=str(genre)) + return frames + +def MakeID3v1(id3): + """Return an ID3v1.1 tag string from a dict of ID3v2.4 frames.""" + + v1 = {} + + for v2id, name in {"TIT2": "title", "TPE1": "artist", + "TALB": "album"}.items(): + if v2id in id3: + text = id3[v2id].text[0].encode('latin1', 'replace')[:30] + else: + text = "" + v1[name] = text + ("\x00" * (30 - len(text))) + + if "COMM" in id3: + cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:28] + else: cmnt = "" + v1["comment"] = cmnt + ("\x00" * (29 - len(cmnt))) + + if "TRCK" in id3: + try: v1["track"] = chr(+id3["TRCK"]) + except ValueError: v1["track"] = "\x00" + else: v1["track"] = "\x00" + + if "TCON" in id3: + try: genre = id3["TCON"].genres[0] + except IndexError: pass + else: + if genre in TCON.GENRES: + v1["genre"] = chr(TCON.GENRES.index(genre)) + if "genre" not in v1: v1["genre"] = "\xff" + + if "TDRC" in id3: + v1["year"] = str(id3["TDRC"])[:4] + elif "TYER" in id3: + v1["year"] = str(id3["TYER"])[:4] + else: + v1["year"] = "\x00\x00\x00\x00" + + return ("TAG%(title)s%(artist)s%(album)s%(year)s%(comment)s" + "%(track)s%(genre)s") % v1 + +class ID3FileType(lib.mutagen.FileType): + """An unknown type of file with ID3 tags.""" + + ID3 = ID3 + + class _Info(object): + length = 0 + def __init__(self, fileobj, offset): pass + pprint = staticmethod(lambda: "Unknown format with ID3 tag") + + def score(filename, fileobj, header): + return header.startswith("ID3") + score = staticmethod(score) + + def add_tags(self, ID3=None): + """Add an empty ID3 tag to the file. + + A custom tag reader may be used in instead of the default + mutagen.id3.ID3 object, e.g. an EasyID3 reader. + """ + if ID3 is None: + ID3 = self.ID3 + if self.tags is None: + self.tags = ID3() + else: + raise error("an ID3 tag already exists") + + def load(self, filename, ID3=None, **kwargs): + """Load stream and tag information from a file. + + A custom tag reader may be used in instead of the default + mutagen.id3.ID3 object, e.g. an EasyID3 reader. + """ + if ID3 is None: + ID3 = self.ID3 + self.filename = filename + try: self.tags = ID3(filename, **kwargs) + except error: self.tags = None + if self.tags is not None: + try: offset = self.tags.size + except AttributeError: offset = None + else: offset = None + try: + fileobj = file(filename, "rb") + self.info = self._Info(fileobj, offset) + finally: + fileobj.close() diff --git a/lib/mutagen/m4a.py b/lib/mutagen/m4a.py new file mode 100644 index 00000000..e6cbdb6a --- /dev/null +++ b/lib/mutagen/m4a.py @@ -0,0 +1,499 @@ +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: m4a.py 4231 2007-12-15 08:13:53Z luks $ + +"""Read and write MPEG-4 audio files with iTunes metadata. + +This module will read MPEG-4 audio information and metadata, +as found in Apple's M4A (aka MP4, M4B, M4P) files. + +There is no official specification for this format. The source code +for TagLib, FAAD, and various MPEG specifications at +http://developer.apple.com/documentation/QuickTime/QTFF/, +http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt, +and http://wiki.multimedia.cx/index.php?title=Apple_QuickTime were all +consulted. + +This module does not support 64 bit atom sizes, and so will not +work on metadata over 4GB. +""" + +import struct +import sys + +from cStringIO import StringIO + +from lib.mutagen import FileType, Metadata +from lib.mutagen._constants import GENRES +from lib.mutagen._util import cdata, insert_bytes, delete_bytes, DictProxy + +class error(IOError): pass +class M4AMetadataError(error): pass +class M4AStreamInfoError(error): pass +class M4AMetadataValueError(ValueError, M4AMetadataError): pass + +import warnings +warnings.warn( + "mutagen.m4a is deprecated; use mutagen.mp4 instead.", DeprecationWarning) + +# This is not an exhaustive list of container atoms, but just the +# ones this module needs to peek inside. +_CONTAINERS = ["moov", "udta", "trak", "mdia", "meta", "ilst", + "stbl", "minf", "stsd"] +_SKIP_SIZE = { "meta": 4 } + +__all__ = ['M4A', 'Open', 'delete', 'M4ACover'] + +class M4ACover(str): + """A cover artwork. + + Attributes: + imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG) + """ + FORMAT_JPEG = 0x0D + FORMAT_PNG = 0x0E + + def __new__(cls, data, imageformat=None): + self = str.__new__(cls, data) + if imageformat is None: imageformat = M4ACover.FORMAT_JPEG + self.imageformat = imageformat + try: self.format + except AttributeError: + self.format = imageformat + return self + +class Atom(object): + """An individual atom. + + Attributes: + children -- list child atoms (or None for non-container atoms) + length -- length of this atom, including length and name + name -- four byte name of the atom, as a str + offset -- location in the constructor-given fileobj of this atom + + This structure should only be used internally by Mutagen. + """ + + children = None + + def __init__(self, fileobj): + self.offset = fileobj.tell() + self.length, self.name = struct.unpack(">I4s", fileobj.read(8)) + if self.length == 1: + raise error("64 bit atom sizes are not supported") + elif self.length < 8: + return + + if self.name in _CONTAINERS: + self.children = [] + fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1) + while fileobj.tell() < self.offset + self.length: + self.children.append(Atom(fileobj)) + else: + fileobj.seek(self.offset + self.length, 0) + + def render(name, data): + """Render raw atom data.""" + # this raises OverflowError if Py_ssize_t can't handle the atom data + size = len(data) + 8 + if size <= 0xFFFFFFFF: + return struct.pack(">I4s", size, name) + data + else: + return struct.pack(">I4sQ", 1, name, size + 8) + data + render = staticmethod(render) + + def __getitem__(self, remaining): + """Look up a child atom, potentially recursively. + + e.g. atom['udta', 'meta'] => + """ + if not remaining: + return self + elif self.children is None: + raise KeyError("%r is not a container" % self.name) + for child in self.children: + if child.name == remaining[0]: + return child[remaining[1:]] + else: + raise KeyError, "%r not found" % remaining[0] + + def __repr__(self): + klass = self.__class__.__name__ + if self.children is None: + return "<%s name=%r length=%r offset=%r>" % ( + klass, self.name, self.length, self.offset) + else: + children = "\n".join([" " + line for child in self.children + for line in repr(child).splitlines()]) + return "<%s name=%r length=%r offset=%r\n%s>" % ( + klass, self.name, self.length, self.offset, children) + +class Atoms(object): + """Root atoms in a given file. + + Attributes: + atoms -- a list of top-level atoms as Atom objects + + This structure should only be used internally by Mutagen. + """ + def __init__(self, fileobj): + self.atoms = [] + fileobj.seek(0, 2) + end = fileobj.tell() + fileobj.seek(0) + while fileobj.tell() < end: + self.atoms.append(Atom(fileobj)) + + def path(self, *names): + """Look up and return the complete path of an atom. + + For example, atoms.path('moov', 'udta', 'meta') will return a + list of three atoms, corresponding to the moov, udta, and meta + atoms. + """ + path = [self] + for name in names: + path.append(path[-1][name,]) + return path[1:] + + def __getitem__(self, names): + """Look up a child atom. + + 'names' may be a list of atoms (['moov', 'udta']) or a string + specifying the complete path ('moov.udta'). + """ + if isinstance(names, basestring): + names = names.split(".") + for child in self.atoms: + if child.name == names[0]: + return child[names[1:]] + else: + raise KeyError, "%s not found" % names[0] + + def __repr__(self): + return "\n".join([repr(child) for child in self.atoms]) + +class M4ATags(DictProxy, Metadata): + """Dictionary containing Apple iTunes metadata list key/values. + + Keys are four byte identifiers, except for freeform ('----') + keys. Values are usually unicode strings, but some atoms have a + special structure: + cpil -- boolean + trkn, disk -- tuple of 16 bit ints (current, total) + tmpo -- 16 bit int + covr -- list of M4ACover objects (which are tagged strs) + gnre -- not supported. Use '\\xa9gen' instead. + + The freeform '----' frames use a key in the format '----:mean:name' + where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique + identifier for this frame. The value is a str, but is probably + text that can be decoded as UTF-8. + + M4A tag data cannot exist outside of the structure of an M4A file, + so this class should not be manually instantiated. + + Unknown non-text tags are removed. + """ + + def load(self, atoms, fileobj): + try: ilst = atoms["moov.udta.meta.ilst"] + except KeyError, key: + raise M4AMetadataError(key) + for atom in ilst.children: + fileobj.seek(atom.offset + 8) + data = fileobj.read(atom.length - 8) + parse = self.__atoms.get(atom.name, (M4ATags.__parse_text,))[0] + parse(self, atom, data) + + def __key_sort((key1, v1), (key2, v2)): + # iTunes always writes the tags in order of "relevance", try + # to copy it as closely as possible. + order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb", + "\xa9gen", "gnre", "trkn", "disk", + "\xa9day", "cpil", "tmpo", "\xa9too", + "----", "covr", "\xa9lyr"] + order = dict(zip(order, range(len(order)))) + last = len(order) + # If there's no key-based way to distinguish, order by length. + # If there's still no way, go by string comparison on the + # values, so we at least have something determinstic. + return (cmp(order.get(key1[:4], last), order.get(key2[:4], last)) or + cmp(len(v1), len(v2)) or cmp(v1, v2)) + __key_sort = staticmethod(__key_sort) + + def save(self, filename): + """Save the metadata to the given filename.""" + values = [] + items = self.items() + items.sort(self.__key_sort) + for key, value in items: + render = self.__atoms.get( + key[:4], (None, M4ATags.__render_text))[1] + values.append(render(self, key, value)) + data = Atom.render("ilst", "".join(values)) + + # Find the old atoms. + fileobj = file(filename, "rb+") + try: + atoms = Atoms(fileobj) + + moov = atoms["moov"] + + if moov != atoms.atoms[-1]: + # "Free" the old moov block. Something in the mdat + # block is not happy when its offset changes and it + # won't play back. So, rather than try to figure that + # out, just move the moov atom to the end of the file. + offset = self.__move_moov(fileobj, moov) + else: + offset = 0 + + try: + path = atoms.path("moov", "udta", "meta", "ilst") + except KeyError: + self.__save_new(fileobj, atoms, data, offset) + else: + self.__save_existing(fileobj, atoms, path, data, offset) + finally: + fileobj.close() + + def __move_moov(self, fileobj, moov): + fileobj.seek(moov.offset) + data = fileobj.read(moov.length) + fileobj.seek(moov.offset) + free = Atom.render("free", "\x00" * (moov.length - 8)) + fileobj.write(free) + fileobj.seek(0, 2) + # Figure out how far we have to shift all our successive + # seek calls, relative to what the atoms say. + old_end = fileobj.tell() + fileobj.write(data) + return old_end - moov.offset + + def __save_new(self, fileobj, atoms, ilst, offset): + hdlr = Atom.render("hdlr", "\x00" * 8 + "mdirappl" + "\x00" * 9) + meta = Atom.render("meta", "\x00\x00\x00\x00" + hdlr + ilst) + moov, udta = atoms.path("moov", "udta") + insert_bytes(fileobj, len(meta), udta.offset + offset + 8) + fileobj.seek(udta.offset + offset + 8) + fileobj.write(meta) + self.__update_parents(fileobj, [moov, udta], len(meta), offset) + + def __save_existing(self, fileobj, atoms, path, data, offset): + # Replace the old ilst atom. + ilst = path.pop() + delta = len(data) - ilst.length + fileobj.seek(ilst.offset + offset) + if delta > 0: + insert_bytes(fileobj, delta, ilst.offset + offset) + elif delta < 0: + delete_bytes(fileobj, -delta, ilst.offset + offset) + fileobj.seek(ilst.offset + offset) + fileobj.write(data) + self.__update_parents(fileobj, path, delta, offset) + + def __update_parents(self, fileobj, path, delta, offset): + # Update all parent atoms with the new size. + for atom in path: + fileobj.seek(atom.offset + offset) + size = cdata.uint_be(fileobj.read(4)) + delta + fileobj.seek(atom.offset + offset) + fileobj.write(cdata.to_uint_be(size)) + + def __render_data(self, key, flags, data): + data = struct.pack(">2I", flags, 0) + data + return Atom.render(key, Atom.render("data", data)) + + def __parse_freeform(self, atom, data): + try: + fileobj = StringIO(data) + mean_length = cdata.uint_be(fileobj.read(4)) + # skip over 8 bytes of atom name, flags + mean = fileobj.read(mean_length - 4)[8:] + name_length = cdata.uint_be(fileobj.read(4)) + name = fileobj.read(name_length - 4)[8:] + value_length = cdata.uint_be(fileobj.read(4)) + # Name, flags, and reserved bytes + value = fileobj.read(value_length - 4)[12:] + except struct.error: + # Some ---- atoms have no data atom, I have no clue why + # they actually end up in the file. + pass + else: + self["%s:%s:%s" % (atom.name, mean, name)] = value + def __render_freeform(self, key, value): + dummy, mean, name = key.split(":", 2) + mean = struct.pack(">I4sI", len(mean) + 12, "mean", 0) + mean + name = struct.pack(">I4sI", len(name) + 12, "name", 0) + name + value = struct.pack(">I4s2I", len(value) + 16, "data", 0x1, 0) + value + final = mean + name + value + return Atom.render("----", mean + name + value) + + def __parse_pair(self, atom, data): + self[atom.name] = struct.unpack(">2H", data[18:22]) + def __render_pair(self, key, value): + track, total = value + if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: + data = struct.pack(">4H", 0, track, total, 0) + return self.__render_data(key, 0, data) + else: + raise M4AMetadataValueError("invalid numeric pair %r" % (value,)) + + def __render_pair_no_trailing(self, key, value): + track, total = value + if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: + data = struct.pack(">3H", 0, track, total) + return self.__render_data(key, 0, data) + else: + raise M4AMetadataValueError("invalid numeric pair %r" % (value,)) + + def __parse_genre(self, atom, data): + # Translate to a freeform genre. + genre = cdata.short_be(data[16:18]) + if "\xa9gen" not in self: + try: self["\xa9gen"] = GENRES[genre - 1] + except IndexError: pass + + def __parse_tempo(self, atom, data): + self[atom.name] = cdata.short_be(data[16:18]) + def __render_tempo(self, key, value): + if 0 <= value < 1 << 16: + return self.__render_data(key, 0x15, cdata.to_ushort_be(value)) + else: + raise M4AMetadataValueError("invalid short integer %r" % value) + + def __parse_compilation(self, atom, data): + try: self[atom.name] = bool(ord(data[16:17])) + except TypeError: self[atom.name] = False + + def __render_compilation(self, key, value): + return self.__render_data(key, 0x15, chr(bool(value))) + + def __parse_cover(self, atom, data): + length, name, imageformat = struct.unpack(">I4sI", data[:12]) + if name != "data": + raise M4AMetadataError( + "unexpected atom %r inside 'covr'" % name) + if imageformat not in (M4ACover.FORMAT_JPEG, M4ACover.FORMAT_PNG): + imageformat = M4ACover.FORMAT_JPEG + self[atom.name]= M4ACover(data[16:length], imageformat) + def __render_cover(self, key, value): + try: imageformat = value.imageformat + except AttributeError: imageformat = M4ACover.FORMAT_JPEG + data = Atom.render("data", struct.pack(">2I", imageformat, 0) + value) + return Atom.render(key, data) + + def __parse_text(self, atom, data): + flags = cdata.uint_be(data[8:12]) + if flags == 1: + self[atom.name] = data[16:].decode('utf-8', 'replace') + def __render_text(self, key, value): + return self.__render_data(key, 0x1, value.encode('utf-8')) + + def delete(self, filename): + self.clear() + self.save(filename) + + __atoms = { + "----": (__parse_freeform, __render_freeform), + "trkn": (__parse_pair, __render_pair), + "disk": (__parse_pair, __render_pair_no_trailing), + "gnre": (__parse_genre, None), + "tmpo": (__parse_tempo, __render_tempo), + "cpil": (__parse_compilation, __render_compilation), + "covr": (__parse_cover, __render_cover), + } + + def pprint(self): + values = [] + for key, value in self.iteritems(): + key = key.decode('latin1') + try: values.append("%s=%s" % (key, value)) + except UnicodeDecodeError: + values.append("%s=[%d bytes of data]" % (key, len(value))) + return "\n".join(values) + +class M4AInfo(object): + """MPEG-4 stream information. + + Attributes: + bitrate -- bitrate in bits per second, as an int + length -- file length in seconds, as a float + """ + + bitrate = 0 + + def __init__(self, atoms, fileobj): + hdlr = atoms["moov.trak.mdia.hdlr"] + fileobj.seek(hdlr.offset) + if "soun" not in fileobj.read(hdlr.length): + raise M4AStreamInfoError("track has no audio data") + + mdhd = atoms["moov.trak.mdia.mdhd"] + fileobj.seek(mdhd.offset) + data = fileobj.read(mdhd.length) + if ord(data[8]) == 0: + offset = 20 + fmt = ">2I" + else: + offset = 28 + fmt = ">IQ" + end = offset + struct.calcsize(fmt) + unit, length = struct.unpack(fmt, data[offset:end]) + self.length = float(length) / unit + + try: + atom = atoms["moov.trak.mdia.minf.stbl.stsd"] + fileobj.seek(atom.offset) + data = fileobj.read(atom.length) + self.bitrate = cdata.uint_be(data[-17:-13]) + except (ValueError, KeyError): + # Bitrate values are optional. + pass + + def pprint(self): + return "MPEG-4 audio, %.2f seconds, %d bps" % ( + self.length, self.bitrate) + +class M4A(FileType): + """An MPEG-4 audio file, probably containing AAC. + + If more than one track is present in the file, the first is used. + Only audio ('soun') tracks will be read. + """ + + _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"] + + def load(self, filename): + self.filename = filename + fileobj = file(filename, "rb") + try: + atoms = Atoms(fileobj) + try: self.info = M4AInfo(atoms, fileobj) + except StandardError, err: + raise M4AStreamInfoError, err, sys.exc_info()[2] + try: self.tags = M4ATags(atoms, fileobj) + except M4AMetadataError: + self.tags = None + except StandardError, err: + raise M4AMetadataError, err, sys.exc_info()[2] + finally: + fileobj.close() + + def add_tags(self): + self.tags = M4ATags() + + def score(filename, fileobj, header): + return ("ftyp" in header) + ("mp4" in header) + score = staticmethod(score) + +Open = M4A + +def delete(filename): + """Remove tags from a file.""" + M4A(filename).delete() diff --git a/lib/mutagen/monkeysaudio.py b/lib/mutagen/monkeysaudio.py new file mode 100644 index 00000000..6fa9ba8c --- /dev/null +++ b/lib/mutagen/monkeysaudio.py @@ -0,0 +1,80 @@ +# A Monkey's Audio (APE) reader/tagger +# +# Copyright 2006 Lukas Lalinsky +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: monkeysaudio.py 3976 2007-01-13 22:00:14Z piman $ + +"""Monkey's Audio streams with APEv2 tags. + +Monkey's Audio is a very efficient lossless audio compressor developed +by Matt Ashland. + +For more information, see http://www.monkeysaudio.com/. +""" + +__all__ = ["MonkeysAudio", "Open", "delete"] + +import struct + +from lib.mutagen.apev2 import APEv2File, error, delete +from lib.mutagen._util import cdata + +class MonkeysAudioHeaderError(error): pass + +class MonkeysAudioInfo(object): + """Monkey's Audio stream information. + + Attributes: + channels -- number of audio channels + length -- file length in seconds, as a float + sample_rate -- audio sampling rate in Hz + bits_per_sample -- bits per sample + version -- Monkey's Audio stream version, as a float (eg: 3.99) + """ + + def __init__(self, fileobj): + header = fileobj.read(76) + if len(header) != 76 or not header.startswith("MAC "): + raise MonkeysAudioHeaderError("not a Monkey's Audio file") + self.version = cdata.ushort_le(header[4:6]) + if self.version >= 3980: + (blocks_per_frame, final_frame_blocks, total_frames, + self.bits_per_sample, self.channels, + self.sample_rate) = struct.unpack("= 3950: + blocks_per_frame = 73728 * 4 + elif self.version >= 3900 or (self.version >= 3800 and + compression_level == 4): + blocks_per_frame = 73728 + else: + blocks_per_frame = 9216 + self.version /= 1000.0 + self.length = 0.0 + if self.sample_rate != 0 and total_frames > 0: + total_blocks = ((total_frames - 1) * blocks_per_frame + + final_frame_blocks) + self.length = float(total_blocks) / self.sample_rate + + def pprint(self): + return "Monkey's Audio %.2f, %.2f seconds, %d Hz" % ( + self.version, self.length, self.sample_rate) + +class MonkeysAudio(APEv2File): + _Info = MonkeysAudioInfo + _mimes = ["audio/ape", "audio/x-ape"] + + def score(filename, fileobj, header): + return header.startswith("MAC ") + filename.lower().endswith(".ape") + score = staticmethod(score) + +Open = MonkeysAudio diff --git a/lib/mutagen/mp3.py b/lib/mutagen/mp3.py new file mode 100644 index 00000000..b173b70b --- /dev/null +++ b/lib/mutagen/mp3.py @@ -0,0 +1,243 @@ +# MP3 stream header information support for Mutagen. +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. + +"""MPEG audio stream information and tags.""" + +import os +import struct + +from lib.mutagen.id3 import ID3FileType, BitPaddedInt, delete + +__all__ = ["MP3", "Open", "delete", "MP3"] + +class error(RuntimeError): pass +class HeaderNotFoundError(error, IOError): pass +class InvalidMPEGHeader(error, IOError): pass + +# Mode values. +STEREO, JOINTSTEREO, DUALCHANNEL, MONO = range(4) + +class MPEGInfo(object): + """MPEG audio stream information + + Parse information about an MPEG audio file. This also reads the + Xing VBR header format. + + This code was implemented based on the format documentation at + http://www.dv.co.yu/mpgscript/mpeghdr.htm. + + Useful attributes: + length -- audio length, in seconds + bitrate -- audio bitrate, in bits per second + sketchy -- if true, the file may not be valid MPEG audio + + Useless attributes: + version -- MPEG version (1, 2, 2.5) + layer -- 1, 2, or 3 + mode -- One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3) + protected -- whether or not the file is "protected" + padding -- whether or not audio frames are padded + sample_rate -- audio sample rate, in Hz + """ + + # Map (version, layer) tuples to bitrates. + __BITRATE = { + (1, 1): range(0, 480, 32), + (1, 2): [0, 32, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320,384], + (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320], + (2, 1): [0, 32, 48, 56, 64, 80, 96, 112,128,144,160,176,192,224,256], + (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160], + } + + __BITRATE[(2, 3)] = __BITRATE[(2, 2)] + for i in range(1, 4): __BITRATE[(2.5, i)] = __BITRATE[(2, i)] + + # Map version to sample rates. + __RATES = { + 1: [44100, 48000, 32000], + 2: [22050, 24000, 16000], + 2.5: [11025, 12000, 8000] + } + + sketchy = False + + def __init__(self, fileobj, offset=None): + """Parse MPEG stream information from a file-like object. + + If an offset argument is given, it is used to start looking + for stream information and Xing headers; otherwise, ID3v2 tags + will be skipped automatically. A correct offset can make + loading files significantly faster. + """ + + try: size = os.path.getsize(fileobj.name) + except (IOError, OSError, AttributeError): + fileobj.seek(0, 2) + size = fileobj.tell() + + # If we don't get an offset, try to skip an ID3v2 tag. + if offset is None: + fileobj.seek(0, 0) + idata = fileobj.read(10) + try: id3, insize = struct.unpack('>3sxxx4s', idata) + except struct.error: id3, insize = '', 0 + insize = BitPaddedInt(insize) + if id3 == 'ID3' and insize > 0: + offset = insize + else: offset = 0 + + # Try to find two valid headers (meaning, very likely MPEG data) + # at the given offset, 30% through the file, 60% through the file, + # and 90% through the file. + for i in [offset, 0.3 * size, 0.6 * size, 0.9 * size]: + try: self.__try(fileobj, int(i), size - offset) + except error, e: pass + else: break + # If we can't find any two consecutive frames, try to find just + # one frame back at the original offset given. + else: + self.__try(fileobj, offset, size - offset, False) + self.sketchy = True + + def __try(self, fileobj, offset, real_size, check_second=True): + # This is going to be one really long function; bear with it, + # because there's not really a sane point to cut it up. + fileobj.seek(offset, 0) + + # We "know" we have an MPEG file if we find two frames that look like + # valid MPEG data. If we can't find them in 32k of reads, something + # is horribly wrong (the longest frame can only be about 4k). This + # is assuming the offset didn't lie. + data = fileobj.read(32768) + + frame_1 = data.find("\xff") + while 0 <= frame_1 <= len(data) - 4: + frame_data = struct.unpack(">I", data[frame_1:frame_1 + 4])[0] + if (frame_data >> 16) & 0xE0 != 0xE0: + frame_1 = data.find("\xff", frame_1 + 2) + else: + version = (frame_data >> 19) & 0x3 + layer = (frame_data >> 17) & 0x3 + protection = (frame_data >> 16) & 0x1 + bitrate = (frame_data >> 12) & 0xF + sample_rate = (frame_data >> 10) & 0x3 + padding = (frame_data >> 9) & 0x1 + private = (frame_data >> 8) & 0x1 + self.mode = (frame_data >> 6) & 0x3 + mode_extension = (frame_data >> 4) & 0x3 + copyright = (frame_data >> 3) & 0x1 + original = (frame_data >> 2) & 0x1 + emphasis = (frame_data >> 0) & 0x3 + if (version == 1 or layer == 0 or sample_rate == 0x3 or + bitrate == 0 or bitrate == 0xF): + frame_1 = data.find("\xff", frame_1 + 2) + else: break + else: + raise HeaderNotFoundError("can't sync to an MPEG frame") + + # There is a serious problem here, which is that many flags + # in an MPEG header are backwards. + self.version = [2.5, None, 2, 1][version] + self.layer = 4 - layer + self.protected = not protection + self.padding = bool(padding) + + self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate] + self.bitrate *= 1000 + self.sample_rate = self.__RATES[self.version][sample_rate] + + if self.layer == 1: + frame_length = (12 * self.bitrate / self.sample_rate + padding) * 4 + frame_size = 384 + elif self.version >= 2 and self.layer == 3: + frame_length = 72 * self.bitrate / self.sample_rate + padding + frame_size = 576 + else: + frame_length = 144 * self.bitrate / self.sample_rate + padding + frame_size = 1152 + + if check_second: + possible = frame_1 + frame_length + if possible > len(data) + 4: + raise HeaderNotFoundError("can't sync to second MPEG frame") + frame_data = struct.unpack(">H", data[possible:possible + 2])[0] + if frame_data & 0xFFE0 != 0xFFE0: + raise HeaderNotFoundError("can't sync to second MPEG frame") + + frame_count = real_size / float(frame_length) + samples = frame_size * frame_count + self.length = samples / self.sample_rate + + # Try to find/parse the Xing header, which trumps the above length + # and bitrate calculation. + fileobj.seek(offset, 0) + data = fileobj.read(32768) + try: + xing = data[:-4].index("Xing") + except ValueError: + # Try to find/parse the VBRI header, which trumps the above length + # calculation. + try: + vbri = data[:-24].index("VBRI") + except ValueError: pass + else: + # If a VBRI header was found, this is definitely MPEG audio. + self.sketchy = False + vbri_version = struct.unpack('>H', data[vbri + 4:vbri + 6])[0] + if vbri_version == 1: + frame_count = struct.unpack( + '>I', data[vbri + 14:vbri + 18])[0] + samples = float(frame_size * frame_count) + self.length = (samples / self.sample_rate) or self.length + else: + # If a Xing header was found, this is definitely MPEG audio. + self.sketchy = False + flags = struct.unpack('>I', data[xing + 4:xing + 8])[0] + if flags & 0x1: + frame_count = struct.unpack('>I', data[xing + 8:xing + 12])[0] + samples = float(frame_size * frame_count) + self.length = (samples / self.sample_rate) or self.length + if flags & 0x2: + bytes = struct.unpack('>I', data[xing + 12:xing + 16])[0] + self.bitrate = int((bytes * 8) // self.length) + + # If the bitrate * the length is nowhere near the file + # length, recalculate using the bitrate and file length. + # Don't do this for very small files. + fileobj.seek(2, 0) + size = fileobj.tell() + expected = (self.bitrate / 8) * self.length + if not (size / 2 < expected < size * 2) and size > 2**16: + self.length = size / float(self.bitrate * 8) + + def pprint(self): + s = "MPEG %s layer %d, %d bps, %s Hz, %.2f seconds" % ( + self.version, self.layer, self.bitrate, self.sample_rate, + self.length) + if self.sketchy: s += " (sketchy)" + return s + +class MP3(ID3FileType): + """An MPEG audio (usually MPEG-1 Layer 3) file.""" + + _Info = MPEGInfo + _mimes = ["audio/mp3", "audio/x-mp3", "audio/mpeg", "audio/mpg", + "audio/x-mpeg"] + + def score(filename, fileobj, header): + filename = filename.lower() + return (header.startswith("ID3") * 2 + filename.endswith(".mp3") + + filename.endswith(".mp2") + filename.endswith(".mpg") + + filename.endswith(".mpeg")) + score = staticmethod(score) + +Open = MP3 + +class EasyMP3(MP3): + """Like MP3, but uses EasyID3 for tags.""" + from lib.mutagen.easyid3 import EasyID3 as ID3 + diff --git a/lib/mutagen/mp4.py b/lib/mutagen/mp4.py new file mode 100644 index 00000000..7d0b64eb --- /dev/null +++ b/lib/mutagen/mp4.py @@ -0,0 +1,682 @@ +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: mp4.py 4233 2007-12-28 07:24:59Z luks $ + +"""Read and write MPEG-4 audio files with iTunes metadata. + +This module will read MPEG-4 audio information and metadata, +as found in Apple's MP4 (aka M4A, M4B, M4P) files. + +There is no official specification for this format. The source code +for TagLib, FAAD, and various MPEG specifications at +http://developer.apple.com/documentation/QuickTime/QTFF/, +http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt, +http://standards.iso.org/ittf/PubliclyAvailableStandards/c041828_ISO_IEC_14496-12_2005(E).zip, +and http://wiki.multimedia.cx/index.php?title=Apple_QuickTime were all +consulted. +""" + +import struct +import sys + +from lib.mutagen import FileType, Metadata +from lib.mutagen._constants import GENRES +from lib.mutagen._util import cdata, insert_bytes, delete_bytes, DictProxy, utf8 + +class error(IOError): pass +class MP4MetadataError(error): pass +class MP4StreamInfoError(error): pass +class MP4MetadataValueError(ValueError, MP4MetadataError): pass + +# This is not an exhaustive list of container atoms, but just the +# ones this module needs to peek inside. +_CONTAINERS = ["moov", "udta", "trak", "mdia", "meta", "ilst", + "stbl", "minf", "moof", "traf"] +_SKIP_SIZE = { "meta": 4 } + +__all__ = ['MP4', 'Open', 'delete', 'MP4Cover'] + +class MP4Cover(str): + """A cover artwork. + + Attributes: + imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG) + """ + FORMAT_JPEG = 0x0D + FORMAT_PNG = 0x0E + + def __new__(cls, data, imageformat=None): + self = str.__new__(cls, data) + if imageformat is None: imageformat = MP4Cover.FORMAT_JPEG + self.imageformat = imageformat + try: self.format + except AttributeError: + self.format = imageformat + return self + +class Atom(object): + """An individual atom. + + Attributes: + children -- list child atoms (or None for non-container atoms) + length -- length of this atom, including length and name + name -- four byte name of the atom, as a str + offset -- location in the constructor-given fileobj of this atom + + This structure should only be used internally by Mutagen. + """ + + children = None + + def __init__(self, fileobj): + self.offset = fileobj.tell() + self.length, self.name = struct.unpack(">I4s", fileobj.read(8)) + if self.length == 1: + self.length, = struct.unpack(">Q", fileobj.read(8)) + elif self.length < 8: + return + + if self.name in _CONTAINERS: + self.children = [] + fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1) + while fileobj.tell() < self.offset + self.length: + self.children.append(Atom(fileobj)) + else: + fileobj.seek(self.offset + self.length, 0) + + def render(name, data): + """Render raw atom data.""" + # this raises OverflowError if Py_ssize_t can't handle the atom data + size = len(data) + 8 + if size <= 0xFFFFFFFF: + return struct.pack(">I4s", size, name) + data + else: + return struct.pack(">I4sQ", 1, name, size + 8) + data + render = staticmethod(render) + + def findall(self, name, recursive=False): + """Recursively find all child atoms by specified name.""" + if self.children is not None: + for child in self.children: + if child.name == name: + yield child + if recursive: + for atom in child.findall(name, True): + yield atom + + def __getitem__(self, remaining): + """Look up a child atom, potentially recursively. + + e.g. atom['udta', 'meta'] => + """ + if not remaining: + return self + elif self.children is None: + raise KeyError("%r is not a container" % self.name) + for child in self.children: + if child.name == remaining[0]: + return child[remaining[1:]] + else: + raise KeyError, "%r not found" % remaining[0] + + def __repr__(self): + klass = self.__class__.__name__ + if self.children is None: + return "<%s name=%r length=%r offset=%r>" % ( + klass, self.name, self.length, self.offset) + else: + children = "\n".join([" " + line for child in self.children + for line in repr(child).splitlines()]) + return "<%s name=%r length=%r offset=%r\n%s>" % ( + klass, self.name, self.length, self.offset, children) + +class Atoms(object): + """Root atoms in a given file. + + Attributes: + atoms -- a list of top-level atoms as Atom objects + + This structure should only be used internally by Mutagen. + """ + def __init__(self, fileobj): + self.atoms = [] + fileobj.seek(0, 2) + end = fileobj.tell() + fileobj.seek(0) + while fileobj.tell() + 8 <= end: + self.atoms.append(Atom(fileobj)) + + def path(self, *names): + """Look up and return the complete path of an atom. + + For example, atoms.path('moov', 'udta', 'meta') will return a + list of three atoms, corresponding to the moov, udta, and meta + atoms. + """ + path = [self] + for name in names: + path.append(path[-1][name,]) + return path[1:] + + def __getitem__(self, names): + """Look up a child atom. + + 'names' may be a list of atoms (['moov', 'udta']) or a string + specifying the complete path ('moov.udta'). + """ + if isinstance(names, basestring): + names = names.split(".") + for child in self.atoms: + if child.name == names[0]: + return child[names[1:]] + else: + raise KeyError, "%s not found" % names[0] + + def __repr__(self): + return "\n".join([repr(child) for child in self.atoms]) + +class MP4Tags(DictProxy, Metadata): + """Dictionary containing Apple iTunes metadata list key/values. + + Keys are four byte identifiers, except for freeform ('----') + keys. Values are usually unicode strings, but some atoms have a + special structure: + + Text values (multiple values per key are supported): + '\xa9nam' -- track title + '\xa9alb' -- album + '\xa9ART' -- artist + 'aART' -- album artist + '\xa9wrt' -- composer + '\xa9day' -- year + '\xa9cmt' -- comment + 'desc' -- description (usually used in podcasts) + 'purd' -- purchase date + '\xa9grp' -- grouping + '\xa9gen' -- genre + '\xa9lyr' -- lyrics + 'purl' -- podcast URL + 'egid' -- podcast episode GUID + 'catg' -- podcast category + 'keyw' -- podcast keywords + '\xa9too' -- encoded by + 'cprt' -- copyright + 'soal' -- album sort order + 'soaa' -- album artist sort order + 'soar' -- artist sort order + 'sonm' -- title sort order + 'soco' -- composer sort order + 'sosn' -- show sort order + 'tvsh' -- show name + + Boolean values: + 'cpil' -- part of a compilation + 'pgap' -- part of a gapless album + 'pcst' -- podcast (iTunes reads this only on import) + + Tuples of ints (multiple values per key are supported): + 'trkn' -- track number, total tracks + 'disk' -- disc number, total discs + + Others: + 'tmpo' -- tempo/BPM, 16 bit int + 'covr' -- cover artwork, list of MP4Cover objects (which are + tagged strs) + 'gnre' -- ID3v1 genre. Not supported, use '\xa9gen' instead. + + The freeform '----' frames use a key in the format '----:mean:name' + where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique + identifier for this frame. The value is a str, but is probably + text that can be decoded as UTF-8. Multiple values per key are + supported. + + MP4 tag data cannot exist outside of the structure of an MP4 file, + so this class should not be manually instantiated. + + Unknown non-text tags are removed. + """ + + def load(self, atoms, fileobj): + try: ilst = atoms["moov.udta.meta.ilst"] + except KeyError, key: + raise MP4MetadataError(key) + for atom in ilst.children: + fileobj.seek(atom.offset + 8) + data = fileobj.read(atom.length - 8) + info = self.__atoms.get(atom.name, (type(self).__parse_text, None)) + info[0](self, atom, data, *info[2:]) + + def __key_sort((key1, v1), (key2, v2)): + # iTunes always writes the tags in order of "relevance", try + # to copy it as closely as possible. + order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb", + "\xa9gen", "gnre", "trkn", "disk", + "\xa9day", "cpil", "pgap", "pcst", "tmpo", + "\xa9too", "----", "covr", "\xa9lyr"] + order = dict(zip(order, range(len(order)))) + last = len(order) + # If there's no key-based way to distinguish, order by length. + # If there's still no way, go by string comparison on the + # values, so we at least have something determinstic. + return (cmp(order.get(key1[:4], last), order.get(key2[:4], last)) or + cmp(len(v1), len(v2)) or cmp(v1, v2)) + __key_sort = staticmethod(__key_sort) + + def save(self, filename): + """Save the metadata to the given filename.""" + values = [] + items = self.items() + items.sort(self.__key_sort) + for key, value in items: + info = self.__atoms.get(key[:4], (None, type(self).__render_text)) + try: + values.append(info[1](self, key, value, *info[2:])) + except (TypeError, ValueError), s: + raise MP4MetadataValueError, s, sys.exc_info()[2] + data = Atom.render("ilst", "".join(values)) + + # Find the old atoms. + fileobj = file(filename, "rb+") + try: + atoms = Atoms(fileobj) + try: + path = atoms.path("moov", "udta", "meta", "ilst") + except KeyError: + self.__save_new(fileobj, atoms, data) + else: + self.__save_existing(fileobj, atoms, path, data) + finally: + fileobj.close() + + def __pad_ilst(self, data, length=None): + if length is None: + length = ((len(data) + 1023) & ~1023) - len(data) + return Atom.render("free", "\x00" * length) + + def __save_new(self, fileobj, atoms, ilst): + hdlr = Atom.render("hdlr", "\x00" * 8 + "mdirappl" + "\x00" * 9) + meta = Atom.render( + "meta", "\x00\x00\x00\x00" + hdlr + ilst + self.__pad_ilst(ilst)) + try: + path = atoms.path("moov", "udta") + except KeyError: + # moov.udta not found -- create one + path = atoms.path("moov") + meta = Atom.render("udta", meta) + offset = path[-1].offset + 8 + insert_bytes(fileobj, len(meta), offset) + fileobj.seek(offset) + fileobj.write(meta) + self.__update_parents(fileobj, path, len(meta)) + self.__update_offsets(fileobj, atoms, len(meta), offset) + + def __save_existing(self, fileobj, atoms, path, data): + # Replace the old ilst atom. + ilst = path.pop() + offset = ilst.offset + length = ilst.length + + # Check for padding "free" atoms + meta = path[-1] + index = meta.children.index(ilst) + try: + prev = meta.children[index-1] + if prev.name == "free": + offset = prev.offset + length += prev.length + except IndexError: + pass + try: + next = meta.children[index+1] + if next.name == "free": + length += next.length + except IndexError: + pass + + delta = len(data) - length + if delta > 0 or (delta < 0 and delta > -8): + data += self.__pad_ilst(data) + delta = len(data) - length + insert_bytes(fileobj, delta, offset) + elif delta < 0: + data += self.__pad_ilst(data, -delta - 8) + delta = 0 + + fileobj.seek(offset) + fileobj.write(data) + self.__update_parents(fileobj, path, delta) + self.__update_offsets(fileobj, atoms, delta, offset) + + def __update_parents(self, fileobj, path, delta): + """Update all parent atoms with the new size.""" + for atom in path: + fileobj.seek(atom.offset) + size = cdata.uint_be(fileobj.read(4)) + if size == 1: # 64bit + # skip name (4B) and read size (8B) + size = cdata.ulonglong_be(fileobj.read(12)[4:]) + fileobj.seek(atom.offset + 8) + fileobj.write(cdata.to_ulonglong_be(size + delta)) + else: # 32bit + fileobj.seek(atom.offset) + fileobj.write(cdata.to_uint_be(size + delta)) + + def __update_offset_table(self, fileobj, fmt, atom, delta, offset): + """Update offset table in the specified atom.""" + if atom.offset > offset: + atom.offset += delta + fileobj.seek(atom.offset + 12) + data = fileobj.read(atom.length - 12) + fmt = fmt % cdata.uint_be(data[:4]) + offsets = struct.unpack(fmt, data[4:]) + offsets = [o + (0, delta)[offset < o] for o in offsets] + fileobj.seek(atom.offset + 16) + fileobj.write(struct.pack(fmt, *offsets)) + + def __update_tfhd(self, fileobj, atom, delta, offset): + if atom.offset > offset: + atom.offset += delta + fileobj.seek(atom.offset + 9) + data = fileobj.read(atom.length - 9) + flags = cdata.uint_be("\x00" + data[:3]) + if flags & 1: + o = cdata.ulonglong_be(data[7:15]) + if o > offset: + o += delta + fileobj.seek(atom.offset + 16) + fileobj.write(cdata.to_ulonglong_be(o)) + + def __update_offsets(self, fileobj, atoms, delta, offset): + """Update offset tables in all 'stco' and 'co64' atoms.""" + if delta == 0: + return + moov = atoms["moov"] + for atom in moov.findall('stco', True): + self.__update_offset_table(fileobj, ">%dI", atom, delta, offset) + for atom in moov.findall('co64', True): + self.__update_offset_table(fileobj, ">%dQ", atom, delta, offset) + try: + for atom in atoms["moof"].findall('tfhd', True): + self.__update_tfhd(fileobj, atom, delta, offset) + except KeyError: + pass + + def __parse_data(self, atom, data): + pos = 0 + while pos < atom.length - 8: + length, name, flags = struct.unpack(">I4sI", data[pos:pos+12]) + if name != "data": + raise MP4MetadataError( + "unexpected atom %r inside %r" % (name, atom.name)) + yield flags, data[pos+16:pos+length] + pos += length + def __render_data(self, key, flags, value): + return Atom.render(key, "".join([ + Atom.render("data", struct.pack(">2I", flags, 0) + data) + for data in value])) + + def __parse_freeform(self, atom, data): + length = cdata.uint_be(data[:4]) + mean = data[12:length] + pos = length + length = cdata.uint_be(data[pos:pos+4]) + name = data[pos+12:pos+length] + pos += length + value = [] + while pos < atom.length - 8: + length, atom_name = struct.unpack(">I4s", data[pos:pos+8]) + if atom_name != "data": + raise MP4MetadataError( + "unexpected atom %r inside %r" % (atom_name, atom.name)) + value.append(data[pos+16:pos+length]) + pos += length + if value: + self["%s:%s:%s" % (atom.name, mean, name)] = value + def __render_freeform(self, key, value): + dummy, mean, name = key.split(":", 2) + mean = struct.pack(">I4sI", len(mean) + 12, "mean", 0) + mean + name = struct.pack(">I4sI", len(name) + 12, "name", 0) + name + if isinstance(value, basestring): + value = [value] + return Atom.render("----", mean + name + "".join([ + struct.pack(">I4s2I", len(data) + 16, "data", 1, 0) + data + for data in value])) + + def __parse_pair(self, atom, data): + self[atom.name] = [struct.unpack(">2H", data[2:6]) for + flags, data in self.__parse_data(atom, data)] + def __render_pair(self, key, value): + data = [] + for (track, total) in value: + if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: + data.append(struct.pack(">4H", 0, track, total, 0)) + else: + raise MP4MetadataValueError( + "invalid numeric pair %r" % ((track, total),)) + return self.__render_data(key, 0, data) + + def __render_pair_no_trailing(self, key, value): + data = [] + for (track, total) in value: + if 0 <= track < 1 << 16 and 0 <= total < 1 << 16: + data.append(struct.pack(">3H", 0, track, total)) + else: + raise MP4MetadataValueError( + "invalid numeric pair %r" % ((track, total),)) + return self.__render_data(key, 0, data) + + def __parse_genre(self, atom, data): + # Translate to a freeform genre. + genre = cdata.short_be(data[16:18]) + if "\xa9gen" not in self: + try: self["\xa9gen"] = [GENRES[genre - 1]] + except IndexError: pass + + def __parse_tempo(self, atom, data): + self[atom.name] = [cdata.ushort_be(value[1]) for + value in self.__parse_data(atom, data)] + + def __render_tempo(self, key, value): + try: + if len(value) == 0: + return self.__render_data(key, 0x15, "") + + if min(value) < 0 or max(value) >= 2**16: + raise MP4MetadataValueError( + "invalid 16 bit integers: %r" % value) + except TypeError: + raise MP4MetadataValueError( + "tmpo must be a list of 16 bit integers") + + values = map(cdata.to_ushort_be, value) + return self.__render_data(key, 0x15, values) + + def __parse_bool(self, atom, data): + try: self[atom.name] = bool(ord(data[16:17])) + except TypeError: self[atom.name] = False + def __render_bool(self, key, value): + return self.__render_data(key, 0x15, [chr(bool(value))]) + + def __parse_cover(self, atom, data): + self[atom.name] = [] + pos = 0 + while pos < atom.length - 8: + length, name, imageformat = struct.unpack(">I4sI", data[pos:pos+12]) + if name != "data": + raise MP4MetadataError( + "unexpected atom %r inside 'covr'" % name) + if imageformat not in (MP4Cover.FORMAT_JPEG, MP4Cover.FORMAT_PNG): + imageformat = MP4Cover.FORMAT_JPEG + cover = MP4Cover(data[pos+16:pos+length], imageformat) + self[atom.name].append( + MP4Cover(data[pos+16:pos+length], imageformat)) + pos += length + def __render_cover(self, key, value): + atom_data = [] + for cover in value: + try: imageformat = cover.imageformat + except AttributeError: imageformat = MP4Cover.FORMAT_JPEG + atom_data.append( + Atom.render("data", struct.pack(">2I", imageformat, 0) + cover)) + return Atom.render(key, "".join(atom_data)) + + def __parse_text(self, atom, data, expected_flags=1): + value = [text.decode('utf-8', 'replace') for flags, text + in self.__parse_data(atom, data) + if flags == expected_flags] + if value: + self[atom.name] = value + def __render_text(self, key, value, flags=1): + if isinstance(value, basestring): + value = [value] + return self.__render_data( + key, flags, map(utf8, value)) + + def delete(self, filename): + self.clear() + self.save(filename) + + __atoms = { + "----": (__parse_freeform, __render_freeform), + "trkn": (__parse_pair, __render_pair), + "disk": (__parse_pair, __render_pair_no_trailing), + "gnre": (__parse_genre, None), + "tmpo": (__parse_tempo, __render_tempo), + "cpil": (__parse_bool, __render_bool), + "pgap": (__parse_bool, __render_bool), + "pcst": (__parse_bool, __render_bool), + "covr": (__parse_cover, __render_cover), + "purl": (__parse_text, __render_text, 0), + "egid": (__parse_text, __render_text, 0), + } + + def pprint(self): + values = [] + for key, value in self.iteritems(): + key = key.decode('latin1') + if key == "covr": + values.append("%s=%s" % (key, ", ".join( + ["[%d bytes of data]" % len(data) for data in value]))) + elif isinstance(value, list): + values.append("%s=%s" % (key, " / ".join(map(unicode, value)))) + else: + values.append("%s=%s" % (key, value)) + return "\n".join(values) + +class MP4Info(object): + """MPEG-4 stream information. + + Attributes: + bitrate -- bitrate in bits per second, as an int + length -- file length in seconds, as a float + channels -- number of audio channels + sample_rate -- audio sampling rate in Hz + bits_per_sample -- bits per sample + """ + + bitrate = 0 + channels = 0 + sample_rate = 0 + bits_per_sample = 0 + + def __init__(self, atoms, fileobj): + for trak in list(atoms["moov"].findall("trak")): + hdlr = trak["mdia", "hdlr"] + fileobj.seek(hdlr.offset) + data = fileobj.read(hdlr.length) + if data[16:20] == "soun": + break + else: + raise MP4StreamInfoError("track has no audio data") + + mdhd = trak["mdia", "mdhd"] + fileobj.seek(mdhd.offset) + data = fileobj.read(mdhd.length) + if ord(data[8]) == 0: + offset = 20 + fmt = ">2I" + else: + offset = 28 + fmt = ">IQ" + end = offset + struct.calcsize(fmt) + unit, length = struct.unpack(fmt, data[offset:end]) + self.length = float(length) / unit + + try: + atom = trak["mdia", "minf", "stbl", "stsd"] + fileobj.seek(atom.offset) + data = fileobj.read(atom.length) + if data[20:24] == "mp4a": + length = cdata.uint_be(data[16:20]) + (self.channels, self.bits_per_sample, _, + self.sample_rate) = struct.unpack(">3HI", data[40:50]) + # ES descriptor type + if data[56:60] == "esds" and ord(data[64:65]) == 0x03: + pos = 65 + # skip extended descriptor type tag, length, ES ID + # and stream priority + if data[pos:pos+3] == "\x80\x80\x80": + pos += 3 + pos += 4 + # decoder config descriptor type + if ord(data[pos]) == 0x04: + pos += 1 + # skip extended descriptor type tag, length, + # object type ID, stream type, buffer size + # and maximum bitrate + if data[pos:pos+3] == "\x80\x80\x80": + pos += 3 + pos += 10 + # average bitrate + self.bitrate = cdata.uint_be(data[pos:pos+4]) + except (ValueError, KeyError): + # stsd atoms are optional + pass + + def pprint(self): + return "MPEG-4 audio, %.2f seconds, %d bps" % ( + self.length, self.bitrate) + +class MP4(FileType): + """An MPEG-4 audio file, probably containing AAC. + + If more than one track is present in the file, the first is used. + Only audio ('soun') tracks will be read. + """ + + MP4Tags = MP4Tags + + _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"] + + def load(self, filename): + self.filename = filename + fileobj = file(filename, "rb") + try: + atoms = Atoms(fileobj) + try: self.info = MP4Info(atoms, fileobj) + except StandardError, err: + raise MP4StreamInfoError, err, sys.exc_info()[2] + try: self.tags = self.MP4Tags(atoms, fileobj) + except MP4MetadataError: + self.tags = None + except StandardError, err: + raise MP4MetadataError, err, sys.exc_info()[2] + finally: + fileobj.close() + + def add_tags(self): + self.tags = self.MP4Tags() + + def score(filename, fileobj, header): + return ("ftyp" in header) + ("mp4" in header) + score = staticmethod(score) + +Open = MP4 + +def delete(filename): + """Remove tags from a file.""" + MP4(filename).delete() diff --git a/lib/mutagen/musepack.py b/lib/mutagen/musepack.py new file mode 100644 index 00000000..f5b347da --- /dev/null +++ b/lib/mutagen/musepack.py @@ -0,0 +1,118 @@ +# A Musepack reader/tagger +# +# Copyright 2006 Lukas Lalinsky +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: musepack.py 4013 2007-04-23 09:18:22Z luks $ + +"""Musepack audio streams with APEv2 tags. + +Musepack is an audio format originally based on the MPEG-1 Layer-2 +algorithms. Stream versions 4 through 7 are supported. + +For more information, see http://www.musepack.net/. +""" + +__all__ = ["Musepack", "Open", "delete"] + +import struct + +from lib.mutagen.apev2 import APEv2File, error, delete +from lib.mutagen.id3 import BitPaddedInt +from lib.mutagen._util import cdata + +class MusepackHeaderError(error): pass + +RATES = [44100, 48000, 37800, 32000] + +class MusepackInfo(object): + """Musepack stream information. + + Attributes: + channels -- number of audio channels + length -- file length in seconds, as a float + sample_rate -- audio sampling rate in Hz + bitrate -- audio bitrate, in bits per second + version -- Musepack stream version + + Optional Attributes: + title_gain, title_peak -- Replay Gain and peak data for this song + album_gain, album_peak -- Replay Gain and peak data for this album + + These attributes are only available in stream version 7. The + gains are a float, +/- some dB. The peaks are a percentage [0..1] of + the maximum amplitude. This means to get a number comparable to + VorbisGain, you must multiply the peak by 2. + """ + + def __init__(self, fileobj): + header = fileobj.read(32) + if len(header) != 32: + raise MusepackHeaderError("not a Musepack file") + # Skip ID3v2 tags + if header[:3] == "ID3": + size = 10 + BitPaddedInt(header[6:10]) + fileobj.seek(size) + header = fileobj.read(32) + if len(header) != 32: + raise MusepackHeaderError("not a Musepack file") + # SV7 + if header.startswith("MP+"): + self.version = ord(header[3]) & 0xF + if self.version < 7: + raise MusepackHeaderError("not a Musepack file") + frames = cdata.uint_le(header[4:8]) + flags = cdata.uint_le(header[8:12]) + + self.title_peak, self.title_gain = struct.unpack( + "> 16) & 0x0003] + self.bitrate = 0 + # SV4-SV6 + else: + header_dword = cdata.uint_le(header[0:4]) + self.version = (header_dword >> 11) & 0x03FF; + if self.version < 4 or self.version > 6: + raise MusepackHeaderError("not a Musepack file") + self.bitrate = (header_dword >> 23) & 0x01FF; + self.sample_rate = 44100 + if self.version >= 5: + frames = cdata.uint_le(header[4:8]) + else: + frames = cdata.ushort_le(header[6:8]) + if self.version < 6: + frames -= 1 + self.channels = 2 + self.length = float(frames * 1152 - 576) / self.sample_rate + if not self.bitrate and self.length != 0: + fileobj.seek(0, 2) + self.bitrate = int(fileobj.tell() * 8 / (self.length * 1000) + 0.5) + + def pprint(self): + if self.version >= 7: + rg_data = ", Gain: %+0.2f (title), %+0.2f (album)" %( + self.title_gain, self.album_gain) + else: + rg_data = "" + return "Musepack, %.2f seconds, %d Hz%s" % ( + self.length, self.sample_rate, rg_data) + +class Musepack(APEv2File): + _Info = MusepackInfo + _mimes = ["audio/x-musepack", "audio/x-mpc"] + + def score(filename, fileobj, header): + return header.startswith("MP+") + filename.endswith(".mpc") + score = staticmethod(score) + +Open = Musepack diff --git a/lib/mutagen/ogg.py b/lib/mutagen/ogg.py new file mode 100644 index 00000000..38dfdc7f --- /dev/null +++ b/lib/mutagen/ogg.py @@ -0,0 +1,498 @@ +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: ogg.py 3975 2007-01-13 21:51:17Z piman $ + +"""Read and write Ogg bitstreams and pages. + +This module reads and writes a subset of the Ogg bitstream format +version 0. It does *not* read or write Ogg Vorbis files! For that, +you should use mutagen.oggvorbis. + +This implementation is based on the RFC 3533 standard found at +http://www.xiph.org/ogg/doc/rfc3533.txt. +""" + +import struct +import sys +import zlib + +from cStringIO import StringIO + +from lib.mutagen import FileType +from lib.mutagen._util import cdata, insert_bytes, delete_bytes + +class error(IOError): + """Ogg stream parsing errors.""" + pass + +class OggPage(object): + """A single Ogg page (not necessarily a single encoded packet). + + A page is a header of 26 bytes, followed by the length of the + data, followed by the data. + + The constructor is givin a file-like object pointing to the start + of an Ogg page. After the constructor is finished it is pointing + to the start of the next page. + + Attributes: + version -- stream structure version (currently always 0) + position -- absolute stream position (default -1) + serial -- logical stream serial number (default 0) + sequence -- page sequence number within logical stream (default 0) + offset -- offset this page was read from (default None) + complete -- if the last packet on this page is complete (default True) + packets -- list of raw packet data (default []) + + Note that if 'complete' is false, the next page's 'continued' + property must be true (so set both when constructing pages). + + If a file-like object is supplied to the constructor, the above + attributes will be filled in based on it. + """ + + version = 0 + __type_flags = 0 + position = 0L + serial = 0 + sequence = 0 + offset = None + complete = True + + def __init__(self, fileobj=None): + self.packets = [] + + if fileobj is None: + return + + self.offset = fileobj.tell() + + header = fileobj.read(27) + if len(header) == 0: + raise EOFError + + try: + (oggs, self.version, self.__type_flags, self.position, + self.serial, self.sequence, crc, segments) = struct.unpack( + "<4sBBqIIiB", header) + except struct.error: + raise error("unable to read full header; got %r" % header) + + if oggs != "OggS": + raise error("read %r, expected %r, at 0x%x" % ( + oggs, "OggS", fileobj.tell() - 27)) + + if self.version != 0: + raise error("version %r unsupported" % self.version) + + total = 0 + lacings = [] + lacing_bytes = fileobj.read(segments) + if len(lacing_bytes) != segments: + raise error("unable to read %r lacing bytes" % segments) + for c in map(ord, lacing_bytes): + total += c + if c < 255: + lacings.append(total) + total = 0 + if total: + lacings.append(total) + self.complete = False + + self.packets = map(fileobj.read, lacings) + if map(len, self.packets) != lacings: + raise error("unable to read full data") + + def __eq__(self, other): + """Two Ogg pages are the same if they write the same data.""" + try: + return (self.write() == other.write()) + except AttributeError: + return False + + def __repr__(self): + attrs = ['version', 'position', 'serial', 'sequence', 'offset', + 'complete', 'continued', 'first', 'last'] + values = ["%s=%r" % (attr, getattr(self, attr)) for attr in attrs] + return "<%s %s, %d bytes in %d packets>" % ( + type(self).__name__, " ".join(values), sum(map(len, self.packets)), + len(self.packets)) + + def write(self): + """Return a string encoding of the page header and data. + + A ValueError is raised if the data is too big to fit in a + single page. + """ + + data = [ + struct.pack("<4sBBqIIi", "OggS", self.version, self.__type_flags, + self.position, self.serial, self.sequence, 0) + ] + + lacing_data = [] + for datum in self.packets: + quot, rem = divmod(len(datum), 255) + lacing_data.append("\xff" * quot + chr(rem)) + lacing_data = "".join(lacing_data) + if not self.complete and lacing_data.endswith("\x00"): + lacing_data = lacing_data[:-1] + data.append(chr(len(lacing_data))) + data.append(lacing_data) + data.extend(self.packets) + data = "".join(data) + + # Python's CRC is swapped relative to Ogg's needs. + crc = ~zlib.crc32(data.translate(cdata.bitswap), -1) + # Although we're using to_int_be, this actually makes the CRC + # a proper le integer, since Python's CRC is byteswapped. + crc = cdata.to_int_be(crc).translate(cdata.bitswap) + data = data[:22] + crc + data[26:] + return data + + def __size(self): + size = 27 # Initial header size + for datum in self.packets: + quot, rem = divmod(len(datum), 255) + size += quot + 1 + if not self.complete and rem == 0: + # Packet contains a multiple of 255 bytes and is not + # terminated, so we don't have a \x00 at the end. + size -= 1 + size += sum(map(len, self.packets)) + return size + + size = property(__size, doc="Total frame size.") + + def __set_flag(self, bit, val): + mask = 1 << bit + if val: self.__type_flags |= mask + else: self.__type_flags &= ~mask + + continued = property( + lambda self: cdata.test_bit(self.__type_flags, 0), + lambda self, v: self.__set_flag(0, v), + doc="The first packet is continued from the previous page.") + + first = property( + lambda self: cdata.test_bit(self.__type_flags, 1), + lambda self, v: self.__set_flag(1, v), + doc="This is the first page of a logical bitstream.") + + last = property( + lambda self: cdata.test_bit(self.__type_flags, 2), + lambda self, v: self.__set_flag(2, v), + doc="This is the last page of a logical bitstream.") + + def renumber(klass, fileobj, serial, start): + """Renumber pages belonging to a specified logical stream. + + fileobj must be opened with mode r+b or w+b. + + Starting at page number 'start', renumber all pages belonging + to logical stream 'serial'. Other pages will be ignored. + + fileobj must point to the start of a valid Ogg page; any + occuring after it and part of the specified logical stream + will be numbered. No adjustment will be made to the data in + the pages nor the granule position; only the page number, and + so also the CRC. + + If an error occurs (e.g. non-Ogg data is found), fileobj will + be left pointing to the place in the stream the error occured, + but the invalid data will be left intact (since this function + does not change the total file size). + """ + + number = start + while True: + try: page = OggPage(fileobj) + except EOFError: + break + else: + if page.serial != serial: + # Wrong stream, skip this page. + continue + # Changing the number can't change the page size, + # so seeking back based on the current size is safe. + fileobj.seek(-page.size, 1) + page.sequence = number + fileobj.write(page.write()) + fileobj.seek(page.offset + page.size, 0) + number += 1 + renumber = classmethod(renumber) + + def to_packets(klass, pages, strict=False): + """Construct a list of packet data from a list of Ogg pages. + + If strict is true, the first page must start a new packet, + and the last page must end the last packet. + """ + + serial = pages[0].serial + sequence = pages[0].sequence + packets = [] + + if strict: + if pages[0].continued: + raise ValueError("first packet is continued") + if not pages[-1].complete: + raise ValueError("last packet does not complete") + elif pages and pages[0].continued: + packets.append("") + + for page in pages: + if serial != page.serial: + raise ValueError("invalid serial number in %r" % page) + elif sequence != page.sequence: + raise ValueError("bad sequence number in %r" % page) + else: sequence += 1 + + if page.continued: packets[-1] += page.packets[0] + else: packets.append(page.packets[0]) + packets.extend(page.packets[1:]) + + return packets + to_packets = classmethod(to_packets) + + def from_packets(klass, packets, sequence=0, + default_size=4096, wiggle_room=2048): + """Construct a list of Ogg pages from a list of packet data. + + The algorithm will generate pages of approximately + default_size in size (rounded down to the nearest multiple of + 255). However, it will also allow pages to increase to + approximately default_size + wiggle_room if allowing the + wiggle room would finish a packet (only one packet will be + finished in this way per page; if the next packet would fit + into the wiggle room, it still starts on a new page). + + This method reduces packet fragmentation when packet sizes are + slightly larger than the default page size, while still + ensuring most pages are of the average size. + + Pages are numbered started at 'sequence'; other information is + uninitialized. + """ + + chunk_size = (default_size // 255) * 255 + + pages = [] + + page = OggPage() + page.sequence = sequence + + for packet in packets: + page.packets.append("") + while packet: + data, packet = packet[:chunk_size], packet[chunk_size:] + if page.size < default_size and len(page.packets) < 255: + page.packets[-1] += data + else: + # If we've put any packet data into this page yet, + # we need to mark it incomplete. However, we can + # also have just started this packet on an already + # full page, in which case, just start the new + # page with this packet. + if page.packets[-1]: + page.complete = False + if len(page.packets) == 1: + page.position = -1L + else: + page.packets.pop(-1) + pages.append(page) + page = OggPage() + page.continued = not pages[-1].complete + page.sequence = pages[-1].sequence + 1 + page.packets.append(data) + + if len(packet) < wiggle_room: + page.packets[-1] += packet + packet = "" + + if page.packets: + pages.append(page) + + return pages + from_packets = classmethod(from_packets) + + def replace(klass, fileobj, old_pages, new_pages): + """Replace old_pages with new_pages within fileobj. + + old_pages must have come from reading fileobj originally. + new_pages are assumed to have the 'same' data as old_pages, + and so the serial and sequence numbers will be copied, as will + the flags for the first and last pages. + + fileobj will be resized and pages renumbered as necessary. As + such, it must be opened r+b or w+b. + """ + + # Number the new pages starting from the first old page. + first = old_pages[0].sequence + for page, seq in zip(new_pages, range(first, first + len(new_pages))): + page.sequence = seq + page.serial = old_pages[0].serial + + new_pages[0].first = old_pages[0].first + new_pages[0].last = old_pages[0].last + new_pages[0].continued = old_pages[0].continued + + new_pages[-1].first = old_pages[-1].first + new_pages[-1].last = old_pages[-1].last + new_pages[-1].complete = old_pages[-1].complete + if not new_pages[-1].complete and len(new_pages[-1].packets) == 1: + new_pages[-1].position = -1L + + new_data = "".join(map(klass.write, new_pages)) + + # Make room in the file for the new data. + delta = len(new_data) + fileobj.seek(old_pages[0].offset, 0) + insert_bytes(fileobj, delta, old_pages[0].offset) + fileobj.seek(old_pages[0].offset, 0) + fileobj.write(new_data) + new_data_end = old_pages[0].offset + delta + + # Go through the old pages and delete them. Since we shifted + # the data down the file, we need to adjust their offsets. We + # also need to go backwards, so we don't adjust the deltas of + # the other pages. + old_pages.reverse() + for old_page in old_pages: + adj_offset = old_page.offset + delta + delete_bytes(fileobj, old_page.size, adj_offset) + + # Finally, if there's any discrepency in length, we need to + # renumber the pages for the logical stream. + if len(old_pages) != len(new_pages): + fileobj.seek(new_data_end, 0) + serial = new_pages[-1].serial + sequence = new_pages[-1].sequence + 1 + klass.renumber(fileobj, serial, sequence) + replace = classmethod(replace) + + def find_last(klass, fileobj, serial): + """Find the last page of the stream 'serial'. + + If the file is not multiplexed this function is fast. If it is, + it must read the whole the stream. + + This finds the last page in the actual file object, or the last + page in the stream (with eos set), whichever comes first. + """ + + # For non-muxed streams, look at the last page. + try: fileobj.seek(-256*256, 2) + except IOError: + # The file is less than 64k in length. + fileobj.seek(0) + data = fileobj.read() + try: index = data.rindex("OggS") + except ValueError: + raise error("unable to find final Ogg header") + stringobj = StringIO(data[index:]) + best_page = None + try: + page = OggPage(stringobj) + except error: + pass + else: + if page.serial == serial: + if page.last: return page + else: best_page = page + else: best_page = None + + # The stream is muxed, so use the slow way. + fileobj.seek(0) + try: + page = OggPage(fileobj) + while not page.last: + page = OggPage(fileobj) + while page.serial != serial: + page = OggPage(fileobj) + best_page = page + return page + except error: + return best_page + except EOFError: + return best_page + find_last = classmethod(find_last) + +class OggFileType(FileType): + """An generic Ogg file.""" + + _Info = None + _Tags = None + _Error = None + _mimes = ["application/ogg", "application/x-ogg"] + + def load(self, filename): + """Load file information from a filename.""" + + self.filename = filename + fileobj = file(filename, "rb") + try: + try: + self.info = self._Info(fileobj) + self.tags = self._Tags(fileobj, self.info) + + if self.info.length: + # The streaminfo gave us real length information, + # don't waste time scanning the Ogg. + return + + last_page = OggPage.find_last(fileobj, self.info.serial) + samples = last_page.position + try: + denom = self.info.sample_rate + except AttributeError: + denom = self.info.fps + self.info.length = samples / float(denom) + + except error, e: + raise self._Error, e, sys.exc_info()[2] + except EOFError: + raise self._Error, "no appropriate stream found" + finally: + fileobj.close() + + def delete(self, filename=None): + """Remove tags from a file. + + If no filename is given, the one most recently loaded is used. + """ + if filename is None: + filename = self.filename + + self.tags.clear() + fileobj = file(filename, "rb+") + try: + try: self.tags._inject(fileobj) + except error, e: + raise self._Error, e, sys.exc_info()[2] + except EOFError: + raise self._Error, "no appropriate stream found" + finally: + fileobj.close() + + def save(self, filename=None): + """Save a tag to a file. + + If no filename is given, the one most recently loaded is used. + """ + if filename is None: + filename = self.filename + fileobj = file(filename, "rb+") + try: + try: self.tags._inject(fileobj) + except error, e: + raise self._Error, e, sys.exc_info()[2] + except EOFError: + raise self._Error, "no appropriate stream found" + finally: + fileobj.close() diff --git a/lib/mutagen/oggflac.py b/lib/mutagen/oggflac.py new file mode 100644 index 00000000..c5ea0786 --- /dev/null +++ b/lib/mutagen/oggflac.py @@ -0,0 +1,127 @@ +# Ogg FLAC support. +# +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: oggflac.py 3976 2007-01-13 22:00:14Z piman $ + +"""Read and write Ogg FLAC comments. + +This module handles FLAC files wrapped in an Ogg bitstream. The first +FLAC stream found is used. For 'naked' FLACs, see mutagen.flac. + +This module is based off the specification at +http://flac.sourceforge.net/ogg_mapping.html. +""" + +__all__ = ["OggFLAC", "Open", "delete"] + +import struct + +from cStringIO import StringIO + +from lib.mutagen.flac import StreamInfo, VCFLACDict +from lib.mutagen.ogg import OggPage, OggFileType, error as OggError + +class error(OggError): pass +class OggFLACHeaderError(error): pass + +class OggFLACStreamInfo(StreamInfo): + """Ogg FLAC general header and stream info. + + This encompasses the Ogg wrapper for the FLAC STREAMINFO metadata + block, as well as the Ogg codec setup that precedes it. + + Attributes (in addition to StreamInfo's): + packets -- number of metadata packets + serial -- Ogg logical stream serial number + """ + + packets = 0 + serial = 0 + + def load(self, data): + page = OggPage(data) + while not page.packets[0].startswith("\x7FFLAC"): + page = OggPage(data) + major, minor, self.packets, flac = struct.unpack( + ">BBH4s", page.packets[0][5:13]) + if flac != "fLaC": + raise OggFLACHeaderError("invalid FLAC marker (%r)" % flac) + elif (major, minor) != (1, 0): + raise OggFLACHeaderError( + "unknown mapping version: %d.%d" % (major, minor)) + self.serial = page.serial + + # Skip over the block header. + stringobj = StringIO(page.packets[0][17:]) + super(OggFLACStreamInfo, self).load(StringIO(page.packets[0][17:])) + + def pprint(self): + return "Ogg " + super(OggFLACStreamInfo, self).pprint() + +class OggFLACVComment(VCFLACDict): + def load(self, data, info, errors='replace'): + # data should be pointing at the start of an Ogg page, after + # the first FLAC page. + pages = [] + complete = False + while not complete: + page = OggPage(data) + if page.serial == info.serial: + pages.append(page) + complete = page.complete or (len(page.packets) > 1) + comment = StringIO(OggPage.to_packets(pages)[0][4:]) + super(OggFLACVComment, self).load(comment, errors=errors) + + def _inject(self, fileobj): + """Write tag data into the FLAC Vorbis comment packet/page.""" + + # Ogg FLAC has no convenient data marker like Vorbis, but the + # second packet - and second page - must be the comment data. + fileobj.seek(0) + page = OggPage(fileobj) + while not page.packets[0].startswith("\x7FFLAC"): + page = OggPage(fileobj) + + first_page = page + while not (page.sequence == 1 and page.serial == first_page.serial): + page = OggPage(fileobj) + + old_pages = [page] + while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1): + page = OggPage(fileobj) + if page.serial == first_page.serial: + old_pages.append(page) + + packets = OggPage.to_packets(old_pages, strict=False) + + # Set the new comment block. + data = self.write() + data = packets[0][0] + struct.pack(">I", len(data))[-3:] + data + packets[0] = data + + new_pages = OggPage.from_packets(packets, old_pages[0].sequence) + OggPage.replace(fileobj, old_pages, new_pages) + +class OggFLAC(OggFileType): + """An Ogg FLAC file.""" + + _Info = OggFLACStreamInfo + _Tags = OggFLACVComment + _Error = OggFLACHeaderError + _mimes = ["audio/x-oggflac"] + + def score(filename, fileobj, header): + return (header.startswith("OggS") * ( + ("FLAC" in header) + ("fLaC" in header))) + score = staticmethod(score) + +Open = OggFLAC + +def delete(filename): + """Remove tags from a file.""" + OggFLAC(filename).delete() diff --git a/lib/mutagen/oggspeex.py b/lib/mutagen/oggspeex.py new file mode 100644 index 00000000..4984925c --- /dev/null +++ b/lib/mutagen/oggspeex.py @@ -0,0 +1,123 @@ +# Ogg Speex support. +# +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: oggspeex.py 3976 2007-01-13 22:00:14Z piman $ + +"""Read and write Ogg Speex comments. + +This module handles Speex files wrapped in an Ogg bitstream. The +first Speex stream found is used. + +Read more about Ogg Speex at http://www.speex.org/. This module is +based on the specification at http://www.speex.org/manual2/node7.html +and clarifications after personal communication with Jean-Marc, +http://lists.xiph.org/pipermail/speex-dev/2006-July/004676.html. +""" + +__all__ = ["OggSpeex", "Open", "delete"] + +from lib.mutagen._vorbis import VCommentDict +from lib.mutagen.ogg import OggPage, OggFileType, error as OggError +from lib.mutagen._util import cdata + +class error(OggError): pass +class OggSpeexHeaderError(error): pass + +class OggSpeexInfo(object): + """Ogg Speex stream information. + + Attributes: + bitrate - nominal bitrate in bits per second + channels - number of channels + length - file length in seconds, as a float + + The reference encoder does not set the bitrate; in this case, + the bitrate will be 0. + """ + + length = 0 + + def __init__(self, fileobj): + page = OggPage(fileobj) + while not page.packets[0].startswith("Speex "): + page = OggPage(fileobj) + if not page.first: + raise OggSpeexHeaderError( + "page has ID header, but doesn't start a stream") + self.sample_rate = cdata.uint_le(page.packets[0][36:40]) + self.channels = cdata.uint_le(page.packets[0][48:52]) + self.bitrate = max(0, cdata.int_le(page.packets[0][52:56])) + self.serial = page.serial + + def pprint(self): + return "Ogg Speex, %.2f seconds" % self.length + +class OggSpeexVComment(VCommentDict): + """Speex comments embedded in an Ogg bitstream.""" + + def __init__(self, fileobj, info): + pages = [] + complete = False + while not complete: + page = OggPage(fileobj) + if page.serial == info.serial: + pages.append(page) + complete = page.complete or (len(page.packets) > 1) + data = OggPage.to_packets(pages)[0] + "\x01" + super(OggSpeexVComment, self).__init__(data, framing=False) + + def _inject(self, fileobj): + """Write tag data into the Speex comment packet/page.""" + + fileobj.seek(0) + + # Find the first header page, with the stream info. + # Use it to get the serial number. + page = OggPage(fileobj) + while not page.packets[0].startswith("Speex "): + page = OggPage(fileobj) + + # Look for the next page with that serial number, it'll start + # the comment packet. + serial = page.serial + page = OggPage(fileobj) + while page.serial != serial: + page = OggPage(fileobj) + + # Then find all the pages with the comment packet. + old_pages = [page] + while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1): + page = OggPage(fileobj) + if page.serial == old_pages[0].serial: + old_pages.append(page) + + packets = OggPage.to_packets(old_pages, strict=False) + + # Set the new comment packet. + packets[0] = self.write(framing=False) + + new_pages = OggPage.from_packets(packets, old_pages[0].sequence) + OggPage.replace(fileobj, old_pages, new_pages) + +class OggSpeex(OggFileType): + """An Ogg Speex file.""" + + _Info = OggSpeexInfo + _Tags = OggSpeexVComment + _Error = OggSpeexHeaderError + _mimes = ["audio/x-speex"] + + def score(filename, fileobj, header): + return (header.startswith("OggS") * ("Speex " in header)) + score = staticmethod(score) + +Open = OggSpeex + +def delete(filename): + """Remove tags from a file.""" + OggSpeex(filename).delete() diff --git a/lib/mutagen/oggtheora.py b/lib/mutagen/oggtheora.py new file mode 100644 index 00000000..b0a52985 --- /dev/null +++ b/lib/mutagen/oggtheora.py @@ -0,0 +1,111 @@ +# Ogg Theora support. +# +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: oggtheora.py 3976 2007-01-13 22:00:14Z piman $ + +"""Read and write Ogg Theora comments. + +This module handles Theora files wrapped in an Ogg bitstream. The +first Theora stream found is used. + +Based on the specification at http://theora.org/doc/Theora_I_spec.pdf. +""" + +__all__ = ["OggTheora", "Open", "delete"] + +import struct + +from lib.mutagen._vorbis import VCommentDict +from lib.mutagen.ogg import OggPage, OggFileType, error as OggError + +class error(OggError): pass +class OggTheoraHeaderError(error): pass + +class OggTheoraInfo(object): + """Ogg Theora stream information. + + Attributes: + length - file length in seconds, as a float + fps - video frames per second, as a float + """ + + length = 0 + + def __init__(self, fileobj): + page = OggPage(fileobj) + while not page.packets[0].startswith("\x80theora"): + page = OggPage(fileobj) + if not page.first: + raise OggTheoraHeaderError( + "page has ID header, but doesn't start a stream") + data = page.packets[0] + vmaj, vmin = struct.unpack("2B", data[7:9]) + if (vmaj, vmin) != (3, 2): + raise OggTheoraHeaderError( + "found Theora version %d.%d != 3.2" % (vmaj, vmin)) + fps_num, fps_den = struct.unpack(">2I", data[22:30]) + self.fps = fps_num / float(fps_den) + self.bitrate = struct.unpack(">I", data[37:40] + "\x00")[0] + self.serial = page.serial + + def pprint(self): + return "Ogg Theora, %.2f seconds, %d bps" % (self.length, self.bitrate) + +class OggTheoraCommentDict(VCommentDict): + """Theora comments embedded in an Ogg bitstream.""" + + def __init__(self, fileobj, info): + pages = [] + complete = False + while not complete: + page = OggPage(fileobj) + if page.serial == info.serial: + pages.append(page) + complete = page.complete or (len(page.packets) > 1) + data = OggPage.to_packets(pages)[0][7:] + super(OggTheoraCommentDict, self).__init__(data + "\x01") + + def _inject(self, fileobj): + """Write tag data into the Theora comment packet/page.""" + + fileobj.seek(0) + page = OggPage(fileobj) + while not page.packets[0].startswith("\x81theora"): + page = OggPage(fileobj) + + old_pages = [page] + while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1): + page = OggPage(fileobj) + if page.serial == old_pages[0].serial: + old_pages.append(page) + + packets = OggPage.to_packets(old_pages, strict=False) + + packets[0] = "\x81theora" + self.write(framing=False) + + new_pages = OggPage.from_packets(packets, old_pages[0].sequence) + OggPage.replace(fileobj, old_pages, new_pages) + +class OggTheora(OggFileType): + """An Ogg Theora file.""" + + _Info = OggTheoraInfo + _Tags = OggTheoraCommentDict + _Error = OggTheoraHeaderError + _mimes = ["video/x-theora"] + + def score(filename, fileobj, header): + return (header.startswith("OggS") * + (("\x80theora" in header) + ("\x81theora" in header))) + score = staticmethod(score) + +Open = OggTheora + +def delete(filename): + """Remove tags from a file.""" + OggTheora(filename).delete() diff --git a/lib/mutagen/oggvorbis.py b/lib/mutagen/oggvorbis.py new file mode 100644 index 00000000..dbed6831 --- /dev/null +++ b/lib/mutagen/oggvorbis.py @@ -0,0 +1,128 @@ +# Ogg Vorbis support. +# +# Copyright 2006 Joe Wreschnig +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: oggvorbis.py 3976 2007-01-13 22:00:14Z piman $ + +"""Read and write Ogg Vorbis comments. + +This module handles Vorbis files wrapped in an Ogg bitstream. The +first Vorbis stream found is used. + +Read more about Ogg Vorbis at http://vorbis.com/. This module is based +on the specification at http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html. +""" + +__all__ = ["OggVorbis", "Open", "delete"] + +import struct + +from lib.mutagen._vorbis import VCommentDict +from lib.mutagen.ogg import OggPage, OggFileType, error as OggError + +class error(OggError): pass +class OggVorbisHeaderError(error): pass + +class OggVorbisInfo(object): + """Ogg Vorbis stream information. + + Attributes: + length - file length in seconds, as a float + bitrate - nominal ('average') bitrate in bits per second, as an int + """ + + length = 0 + + def __init__(self, fileobj): + page = OggPage(fileobj) + while not page.packets[0].startswith("\x01vorbis"): + page = OggPage(fileobj) + if not page.first: + raise OggVorbisHeaderError( + "page has ID header, but doesn't start a stream") + (self.channels, self.sample_rate, max_bitrate, nominal_bitrate, + min_bitrate) = struct.unpack(" nominal_bitrate: + self.bitrate = min_bitrate + else: + self.bitrate = nominal_bitrate + + if self.bitrate == 0 and self.length > 0: + fileobj.seek(0, 2) + self.bitrate = int((fileobj.tell() * 8) / self.length) + + + def pprint(self): + return "Ogg Vorbis, %.2f seconds, %d bps" % (self.length, self.bitrate) + +class OggVCommentDict(VCommentDict): + """Vorbis comments embedded in an Ogg bitstream.""" + + def __init__(self, fileobj, info): + pages = [] + complete = False + while not complete: + page = OggPage(fileobj) + if page.serial == info.serial: + pages.append(page) + complete = page.complete or (len(page.packets) > 1) + data = OggPage.to_packets(pages)[0][7:] # Strip off "\x03vorbis". + super(OggVCommentDict, self).__init__(data) + + def _inject(self, fileobj): + """Write tag data into the Vorbis comment packet/page.""" + + # Find the old pages in the file; we'll need to remove them, + # plus grab any stray setup packet data out of them. + fileobj.seek(0) + page = OggPage(fileobj) + while not page.packets[0].startswith("\x03vorbis"): + page = OggPage(fileobj) + + old_pages = [page] + while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1): + page = OggPage(fileobj) + if page.serial == old_pages[0].serial: + old_pages.append(page) + + packets = OggPage.to_packets(old_pages, strict=False) + + # Set the new comment packet. + packets[0] = "\x03vorbis" + self.write() + + new_pages = OggPage.from_packets(packets, old_pages[0].sequence) + OggPage.replace(fileobj, old_pages, new_pages) + +class OggVorbis(OggFileType): + """An Ogg Vorbis file.""" + + _Info = OggVorbisInfo + _Tags = OggVCommentDict + _Error = OggVorbisHeaderError + _mimes = ["audio/vorbis", "audio/x-vorbis"] + + def score(filename, fileobj, header): + return (header.startswith("OggS") * ("\x01vorbis" in header)) + score = staticmethod(score) + +Open = OggVorbis + +def delete(filename): + """Remove tags from a file.""" + OggVorbis(filename).delete() diff --git a/lib/mutagen/optimfrog.py b/lib/mutagen/optimfrog.py new file mode 100644 index 00000000..c1d8f49f --- /dev/null +++ b/lib/mutagen/optimfrog.py @@ -0,0 +1,64 @@ +# OptimFROG reader/tagger +# +# Copyright 2006 Lukas Lalinsky +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# +# $Id: optimfrog.py 3923 2006-10-21 15:07:13Z luks $ + +"""OptimFROG audio streams with APEv2 tags. + +OptimFROG is a lossless audio compression program. Its main goal is to +reduce at maximum the size of audio files, while permitting bit +identical restoration for all input. It is similar with the ZIP +compression, but it is highly specialized to compress audio data. + +Only versions 4.5 and higher are supported. + +For more information, see http://www.losslessaudio.org/ +""" + +__all__ = ["OptimFROG", "Open", "delete"] + +import struct +from lib.mutagen.apev2 import APEv2File, error, delete + +class OptimFROGHeaderError(error): pass + +class OptimFROGInfo(object): + """OptimFROG stream information. + + Attributes: + channels - number of audio channels + length - file length in seconds, as a float + sample_rate - audio sampling rate in Hz + """ + + def __init__(self, fileobj): + header = fileobj.read(76) + if (len(header) != 76 or not header.startswith("OFR ") or + struct.unpack("> 23) & 0xF] + self.length = float(samples) / self.sample_rate + + def pprint(self): + return "WavPack, %.2f seconds, %d Hz" % (self.length, self.sample_rate) + +class WavPack(APEv2File): + _Info = WavPackInfo + _mimes = ["audio/x-wavpack"] + + def score(filename, fileobj, header): + return header.startswith("wvpk") * 2 + score = staticmethod(score) diff --git a/webServer.py b/webServer.py index 72b23118..705290f2 100644 --- a/webServer.py +++ b/webServer.py @@ -217,7 +217,7 @@ class Headphones: releaseid = u.extractUuid(release.id) inc = ws.ReleaseIncludes(artist=True, releaseEvents= True, tracks= True, releaseGroup=True) results = ws.Query().getReleaseById(releaseid, inc) - time.sleep(0.6) + time.sleep(1) for event in results.releaseEvents: if event.country == 'US': @@ -350,23 +350,37 @@ class Headphones: path = config['General']['path_to_xml'] except: path = 'Absolute path to iTunes XML or Top-Level Music Directory' + try: + path2 = config['General']['path_to_itunes'] + except: + path2 = 'Enter a directory to scan' page = [templates._header] page.append(templates._logobar) page.append(templates._nav) - page.append('''

Import or Sync Your iTunes Library/Music Folder


- Enter the full path to your iTunes XML file or music folder

- i.e. /Users/"username"/Music/iTunes/iTunes Music Library.xml
- or /Users/"username"/Music/iTunes/iTunes Media/Music

(artists should have their own directories for folder import to work) -

note: This process can take a LONG time!

- Once you click "Submit" you can navigate away from this - page while the process runs.


+ page.append(''' +

Scan Music Library


+ Where do you keep your music?

+ You can put in any directory, and it will scan for audio files in that folder + (including all subdirectories)

For example: '/Users/name/Music' +

+ It may take a while depending on how many files you have. You can navigate away from the page
+ as soon as you click 'Submit' +

+ +
+ +


+

Import or Sync Your iTunes Library/Music Folder


+ This is here for legacy purposes (try the Music Scanner above!)

+ If you'd rather import an iTunes .xml file, you can enter the full path here.



''' % path) + Force Update Active Artists


''' % (path2, path)) page.append(templates._footer) return page manage.exposed = True @@ -380,6 +394,15 @@ class Headphones: raise cherrypy.HTTPRedirect("home") importItunes.exposed = True + def musicScan(self, path): + config = configobj.ConfigObj(config_file) + config['General']['path_to_itunes'] = path + config.write() + import itunesimport + itunesimport.scanMusic(path) + raise cherrypy.HTTPRedirect("home") + musicScan.exposed = True + def forceUpdate(self): import updater updater.dbUpdate()