Added a music scanner to read metadata from audio files rather than using folder names/xml

This commit is contained in:
Remy
2011-07-11 14:23:11 -07:00
parent fd98828cd1
commit 8f999111c5
42 changed files with 15334 additions and 14 deletions

View File

@@ -258,9 +258,9 @@ form = '''
<td>
<br>
<p><b>Path to iTunes folder</b>:<br><input type="text" name="path_to_itunes" value="%s" size="60" maxlength="40">
<p><b>Path to Music folder</b>:<br><input type="text" name="path_to_itunes" value="%s" size="60" maxlength="40">
<br>
<i class="smalltext">i.e. Music/iTunes or /Users/name/Music/iTunes</i>
<i class="smalltext">i.e. /Users/name/Music/iTunes or /Volumes/share/music</i>
</p>
</td>
<td>

View File

@@ -8,11 +8,44 @@ import time
import os
import sqlite3
from headphones import FULL_PATH
from lib.beets.mediafile import MediaFile
import logger
database = os.path.join(FULL_PATH, 'headphones.db')
def scanMusic(dir):
results = []
for r,d,f in os.walk(dir):
for files in f:
if any(files.endswith(x) for x in (".mp3", ".flac", ".aac", ".ogg", ".ape")):
results.append(os.path.join(r,files))
logger.log(u'%i music files found' % len(results))
lst = []
for song in results:
try:
f = MediaFile(song)
except:
logger.log("Could not read file: '" + song + "'", logger.ERROR)
else:
if not f.artist:
pass
else:
lst.append(f.artist)
artistlist = {}.fromkeys(lst).keys()
logger.log(u"Preparing to import %i artists" % len(artistlist))
importartist(artistlist)
def itunesImport(pathtoxml):
if os.path.splitext(pathtoxml)[1] == '.xml':
logger.log(u"Loading xml file from"+ pathtoxml)
@@ -23,11 +56,17 @@ def itunesImport(pathtoxml):
lst.append(song.artist)
rawlist = {}.fromkeys(lst).keys()
artistlist = [f for f in rawlist if f != None]
importartist(artistlist)
else:
rawlist = os.listdir(pathtoxml)
logger.log(u"Loading artists from directory:" +pathtoxml)
exclude = ['.ds_store', 'various artists', 'untitled folder', 'va']
artistlist = [f for f in rawlist if f.lower() not in exclude]
importartist(artistlist)
def importartist(artistlist):
for name in artistlist:
logger.log(u"Querying MusicBrainz for: "+name)
time.sleep(1)
@@ -43,9 +82,6 @@ def itunesImport(pathtoxml):
artist = ws.Query().getArtistById(artistid, inc)
conn=sqlite3.connect(database)
c=conn.cursor()
c.execute('CREATE TABLE IF NOT EXISTS artists (ArtistID TEXT UNIQUE, ArtistName TEXT, ArtistSortName TEXT, DateAdded TEXT, Status TEXT)')
c.execute('CREATE TABLE IF NOT EXISTS albums (ArtistID TEXT, ArtistName TEXT, AlbumTitle TEXT, AlbumASIN TEXT, ReleaseDate TEXT, DateAdded TEXT, AlbumID TEXT UNIQUE, Status TEXT)')
c.execute('CREATE TABLE IF NOT EXISTS tracks (ArtistID TEXT, ArtistName TEXT, AlbumTitle TEXT, AlbumASIN TEXT, AlbumID TEXT, TrackTitle TEXT, TrackDuration TEXT, TrackID TEXT)')
c.execute('SELECT ArtistID from artists')
artistlist = c.fetchall()
if any(artistid in x for x in artistlist):

1
lib/__init__.py Executable file
View File

@@ -0,0 +1 @@

19
lib/beets/__init__.py Normal file
View File

@@ -0,0 +1,19 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
__version__ = '1.0b9'
__author__ = 'Adrian Sampson <adrian@radbox.org>'
from lib.beets import library
Library = library.Library

View File

@@ -0,0 +1,599 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Facilities for automatically determining files' correct metadata.
"""
import os
import logging
from collections import defaultdict
import re
from lib.munkres import Munkres
from unidecode import unidecode
from lib.beets.autotag import mb
from lib.beets import library, mediafile, plugins
from lib.beets.util import levenshtein, sorted_walk
# Try 5 releases. In the future, this should be more dynamic: let the
# probability of continuing to the next release be inversely
# proportional to how good our current best is and how long we've
# already taken.
MAX_CANDIDATES = 5
# Distance parameters.
# Text distance weights: proportions on the normalized intuitive edit
# distance.
ARTIST_WEIGHT = 3.0
ALBUM_WEIGHT = 3.0
# The weight of the entire distance calculated for a given track.
TRACK_WEIGHT = 1.0
# These distances are components of the track distance (that is, they
# compete against each other but not ARTIST_WEIGHT and ALBUM_WEIGHT;
# the overall TRACK_WEIGHT does that).
TRACK_TITLE_WEIGHT = 3.0
# Used instead of a global artist penalty for various-artist matches.
TRACK_ARTIST_WEIGHT = 2.0
# Added when the indices of tracks don't match.
TRACK_INDEX_WEIGHT = 1.0
# Track length weights: no penalty before GRACE, maximum (WEIGHT)
# penalty at GRACE+MAX discrepancy.
TRACK_LENGTH_GRACE = 10
TRACK_LENGTH_MAX = 30
TRACK_LENGTH_WEIGHT = 2.0
# MusicBrainz track ID matches.
TRACK_ID_WEIGHT = 5.0
# Recommendation constants.
RECOMMEND_STRONG = 'RECOMMEND_STRONG'
RECOMMEND_MEDIUM = 'RECOMMEND_MEDIUM'
RECOMMEND_NONE = 'RECOMMEND_NONE'
# Thresholds for recommendations.
STRONG_REC_THRESH = 0.04
MEDIUM_REC_THRESH = 0.25
REC_GAP_THRESH = 0.25
# Parameters for string distance function.
# Words that can be moved to the end of a string using a comma.
SD_END_WORDS = ['the', 'a', 'an']
# Reduced weights for certain portions of the string.
SD_PATTERNS = [
(r'^the ', 0.1),
(r'[\[\(]?(ep|single)[\]\)]?', 0.0),
(r'[\[\(]?(featuring|feat|ft)[\. :].+', 0.1),
(r'\(.*?\)', 0.3),
(r'\[.*?\]', 0.3),
(r'(, )?(pt\.|part) .+', 0.2),
]
# Replacements to use before testing distance.
SD_REPLACE = [
(r'&', 'and'),
]
# Artist signals that indicate "various artists".
VA_ARTISTS = (u'', u'various artists', u'va', u'unknown')
# Autotagging exceptions.
class AutotagError(Exception):
pass
# Global logger.
log = logging.getLogger('beets')
def albums_in_dir(path):
"""Recursively searches the given directory and returns an iterable
of (path, items) where path is a containing directory and items is
a list of Items that is probably an album. Specifically, any folder
containing any media files is an album.
"""
for root, dirs, files in sorted_walk(path):
# Get a list of items in the directory.
items = []
for filename in files:
try:
i = library.Item.from_path(os.path.join(root, filename))
except mediafile.FileTypeError:
pass
except mediafile.UnreadableFileError:
log.warn('unreadable file: ' + filename)
else:
items.append(i)
# If it's nonempty, yield it.
if items:
yield root, items
def _string_dist_basic(str1, str2):
"""Basic edit distance between two strings, ignoring
non-alphanumeric characters and case. Comparisons are based on a
transliteration/lowering to ASCII characters. Normalized by string
length.
"""
str1 = unidecode(str1)
str2 = unidecode(str2)
str1 = re.sub(r'[^a-z0-9]', '', str1.lower())
str2 = re.sub(r'[^a-z0-9]', '', str2.lower())
if not str1 and not str2:
return 0.0
return levenshtein(str1, str2) / float(max(len(str1), len(str2)))
def string_dist(str1, str2):
"""Gives an "intuitive" edit distance between two strings. This is
an edit distance, normalized by the string length, with a number of
tweaks that reflect intuition about text.
"""
str1 = str1.lower()
str2 = str2.lower()
# Don't penalize strings that move certain words to the end. For
# example, "the something" should be considered equal to
# "something, the".
for word in SD_END_WORDS:
if str1.endswith(', %s' % word):
str1 = '%s %s' % (word, str1[:-len(word)-2])
if str2.endswith(', %s' % word):
str2 = '%s %s' % (word, str2[:-len(word)-2])
# Perform a couple of basic normalizing substitutions.
for pat, repl in SD_REPLACE:
str1 = re.sub(pat, repl, str1)
str2 = re.sub(pat, repl, str2)
# Change the weight for certain string portions matched by a set
# of regular expressions. We gradually change the strings and build
# up penalties associated with parts of the string that were
# deleted.
base_dist = _string_dist_basic(str1, str2)
penalty = 0.0
for pat, weight in SD_PATTERNS:
# Get strings that drop the pattern.
case_str1 = re.sub(pat, '', str1)
case_str2 = re.sub(pat, '', str2)
if case_str1 != str1 or case_str2 != str2:
# If the pattern was present (i.e., it is deleted in the
# the current case), recalculate the distances for the
# modified strings.
case_dist = _string_dist_basic(case_str1, case_str2)
case_delta = max(0.0, base_dist - case_dist)
if case_delta == 0.0:
continue
# Shift our baseline strings down (to avoid rematching the
# same part of the string) and add a scaled distance
# amount to the penalties.
str1 = case_str1
str2 = case_str2
base_dist = case_dist
penalty += weight * case_delta
dist = base_dist + penalty
return dist
def _plurality(objs):
"""Given a sequence of comparable objects, returns the object that
is most common in the set and if it is the only object is the set.
"""
# Calculate frequencies.
freqs = defaultdict(int)
for obj in objs:
freqs[obj] += 1
# Find object with maximum frequency.
max_freq = 0
res = None
for obj, freq in freqs.items():
if freq > max_freq:
max_freq = freq
res = obj
return res, len(freqs) <= 1
def current_metadata(items):
"""Returns the most likely artist and album for a set of Items.
Each is determined by tag reflected by the plurality of the Items.
"""
keys = 'artist', 'album'
likelies = {}
consensus = {}
for key in keys:
values = [getattr(item, key) for item in items]
likelies[key], consensus[key] = _plurality(values)
return likelies['artist'], likelies['album'], consensus['artist']
def order_items(items, trackinfo):
"""Orders the items based on how they match some canonical track
information. This always produces a result if the numbers of tracks
match.
"""
# Make sure lengths match.
if len(items) != len(trackinfo):
return None
# Construct the cost matrix.
costs = []
for cur_item in items:
row = []
for i, canon_item in enumerate(trackinfo):
row.append(track_distance(cur_item, canon_item, i+1))
costs.append(row)
# Find a minimum-cost bipartite matching.
matching = Munkres().compute(costs)
# Order items based on the matching.
ordered_items = [None]*len(items)
for cur_idx, canon_idx in matching:
ordered_items[canon_idx] = items[cur_idx]
return ordered_items
def track_distance(item, track_data, track_index=None, incl_artist=False):
"""Determines the significance of a track metadata change. Returns
a float in [0.0,1.0]. `track_index` is the track number of the
`track_data` metadata set. If `track_index` is provided and
item.track is set, then these indices are used as a component of
the distance calculation. `incl_artist` indicates that a distance
component should be included for the track artist (i.e., for
various-artist releases).
"""
# Distance and normalization accumulators.
dist, dist_max = 0.0, 0.0
# Check track length.
if 'length' not in track_data:
# If there's no length to check, assume the worst.
dist += TRACK_LENGTH_WEIGHT
else:
diff = abs(item.length - track_data['length'])
diff = max(diff - TRACK_LENGTH_GRACE, 0.0)
diff = min(diff, TRACK_LENGTH_MAX)
dist += (diff / TRACK_LENGTH_MAX) * TRACK_LENGTH_WEIGHT
dist_max += TRACK_LENGTH_WEIGHT
# Track title.
dist += string_dist(item.title, track_data['title']) * TRACK_TITLE_WEIGHT
dist_max += TRACK_TITLE_WEIGHT
# Track artist, if included.
# Attention: MB DB does not have artist info for all compilations,
# so only check artist distance if there is actually an artist in
# the MB track data.
if incl_artist and 'artist' in track_data:
dist += string_dist(item.artist, track_data['artist']) * \
TRACK_ARTIST_WEIGHT
dist_max += TRACK_ARTIST_WEIGHT
# Track index.
if track_index and item.track:
if track_index != item.track:
dist += TRACK_INDEX_WEIGHT
dist_max += TRACK_INDEX_WEIGHT
# MusicBrainz track ID.
if item.mb_trackid:
if item.mb_trackid != track_data['id']:
dist += TRACK_ID_WEIGHT
dist_max += TRACK_ID_WEIGHT
# Plugin distances.
plugin_d, plugin_dm = plugins.track_distance(item, track_data)
dist += plugin_d
dist_max += plugin_dm
return dist / dist_max
def distance(items, info):
"""Determines how "significant" an album metadata change would be.
Returns a float in [0.0,1.0]. The list of items must be ordered.
"""
cur_artist, cur_album, _ = current_metadata(items)
cur_artist = cur_artist or ''
cur_album = cur_album or ''
# These accumulate the possible distance components. The final
# distance will be dist/dist_max.
dist = 0.0
dist_max = 0.0
# Artist/album metadata.
if not info['va']:
dist += string_dist(cur_artist, info['artist']) * ARTIST_WEIGHT
dist_max += ARTIST_WEIGHT
dist += string_dist(cur_album, info['album']) * ALBUM_WEIGHT
dist_max += ALBUM_WEIGHT
# Track distances.
for i, (item, track_data) in enumerate(zip(items, info['tracks'])):
dist += track_distance(item, track_data, i+1, info['va']) * \
TRACK_WEIGHT
dist_max += TRACK_WEIGHT
# Plugin distances.
plugin_d, plugin_dm = plugins.album_distance(items, info)
dist += plugin_d
dist_max += plugin_dm
# Normalize distance, avoiding divide-by-zero.
if dist_max == 0.0:
return 0.0
else:
return dist/dist_max
def apply_item_metadata(item, track_data):
"""Set an item's metadata from its matched info dictionary.
"""
item.artist = track_data['artist']
item.title = track_data['title']
item.mb_trackid = track_data['id']
if 'artist_id' in track_data:
item.mb_artistid = track_data['artist_id']
# At the moment, the other metadata is left intact (including album
# and track number). Perhaps these should be emptied?
def apply_metadata(items, info):
"""Set the items' metadata to match the data given in info. The
list of items must be ordered.
"""
for index, (item, track_data) in enumerate(zip(items, info['tracks'])):
# Album, artist, track count.
if 'artist' in track_data:
item.artist = track_data['artist']
else:
item.artist = info['artist']
item.albumartist = info['artist']
item.album = info['album']
item.tracktotal = len(items)
# Release date.
if 'year' in info:
item.year = info['year']
if 'month' in info:
item.month = info['month']
if 'day' in info:
item.day = info['day']
# Title and track index.
item.title = track_data['title']
item.track = index + 1
# MusicBrainz IDs.
item.mb_trackid = track_data['id']
item.mb_albumid = info['album_id']
if 'artist_id' in track_data:
item.mb_artistid = track_data['artist_id']
else:
item.mb_artistid = info['artist_id']
item.mb_albumartistid = info['artist_id']
item.albumtype = info['albumtype']
# Compilation flag.
item.comp = info['va']
def match_by_id(items):
"""If the items are tagged with a MusicBrainz album ID, returns an
info dict for the corresponding album. Otherwise, returns None.
"""
# Is there a consensus on the MB album ID?
albumids = [item.mb_albumid for item in items if item.mb_albumid]
if not albumids:
log.debug('No album IDs found.')
return None
# If all album IDs are equal, look up the album.
if bool(reduce(lambda x,y: x if x==y else (), albumids)):
albumid = albumids[0]
log.debug('Searching for discovered album ID: ' + albumid)
return mb.album_for_id(albumid)
else:
log.debug('No album ID consensus.')
return None
#fixme In the future, at the expense of performance, we could use
# other IDs (i.e., track and artist) in case the album tag isn't
# present, but that event seems very unlikely.
def recommendation(results):
"""Given a sorted list of result tuples, returns a recommendation
flag (RECOMMEND_STRONG, RECOMMEND_MEDIUM, RECOMMEND_NONE) based
on the results' distances.
"""
if not results:
# No candidates: no recommendation.
rec = RECOMMEND_NONE
else:
min_dist = results[0][0]
if min_dist < STRONG_REC_THRESH:
# Strong recommendation level.
rec = RECOMMEND_STRONG
elif len(results) == 1:
# Only a single candidate. Medium recommendation.
rec = RECOMMEND_MEDIUM
elif min_dist <= MEDIUM_REC_THRESH:
# Medium recommendation level.
rec = RECOMMEND_MEDIUM
elif results[1][0] - min_dist >= REC_GAP_THRESH:
# Gap between first two candidates is large.
rec = RECOMMEND_MEDIUM
else:
# No conclusion.
rec = RECOMMEND_NONE
return rec
def validate_candidate(items, tuple_dict, info):
"""Given a candidate info dict, attempt to add the candidate to
the output dictionary of result tuples. This involves checking
the track count, ordering the items, checking for duplicates, and
calculating the distance.
"""
log.debug('Candidate: %s - %s' % (info['artist'], info['album']))
# Don't duplicate.
if info['album_id'] in tuple_dict:
log.debug('Duplicate.')
return
# Make sure the album has the correct number of tracks.
if len(items) != len(info['tracks']):
log.debug('Track count mismatch.')
return
# Put items in order.
ordered = order_items(items, info['tracks'])
if not ordered:
log.debug('Not orderable.')
return
# Get the change distance.
dist = distance(ordered, info)
log.debug('Success. Distance: %f' % dist)
tuple_dict[info['album_id']] = dist, ordered, info
def tag_album(items, timid=False, search_artist=None, search_album=None,
search_id=None):
"""Bundles together the functionality used to infer tags for a
set of items comprised by an album. Returns everything relevant:
- The current artist.
- The current album.
- A list of (distance, items, info) tuples where info is a
dictionary containing the inferred tags and items is a
reordered version of the input items list. The candidates are
sorted by distance (i.e., best match first).
- A recommendation, one of RECOMMEND_STRONG, RECOMMEND_MEDIUM,
or RECOMMEND_NONE; indicating that the first candidate is
very likely, it is somewhat likely, or no conclusion could
be reached.
If search_artist and search_album or search_id are provided, then
they are used as search terms in place of the current metadata.
May raise an AutotagError if existing metadata is insufficient.
"""
# Get current metadata.
cur_artist, cur_album, artist_consensus = current_metadata(items)
log.debug('Tagging %s - %s' % (cur_artist, cur_album))
# The output result tuples (keyed by MB album ID).
out_tuples = {}
# Try to find album indicated by MusicBrainz IDs.
if search_id:
log.debug('Searching for album ID: ' + search_id)
id_info = mb.album_for_id(search_id)
else:
id_info = match_by_id(items)
if id_info:
validate_candidate(items, out_tuples, id_info)
rec = recommendation(out_tuples.values())
log.debug('Album ID match recommendation is ' + str(rec))
if out_tuples and not timid:
# If we have a very good MBID match, return immediately.
# Otherwise, this match will compete against metadata-based
# matches.
if rec == RECOMMEND_STRONG:
log.debug('ID match.')
return cur_artist, cur_album, out_tuples.values(), rec
# If searching by ID, don't continue to metadata search.
if search_id is not None:
if out_tuples:
return cur_artist, cur_album, out_tuples.values(), rec
else:
return cur_artist, cur_album, [], RECOMMEND_NONE
# Search terms.
if not (search_artist and search_album):
# No explicit search terms -- use current metadata.
search_artist, search_album = cur_artist, cur_album
log.debug(u'Search terms: %s - %s' % (search_artist, search_album))
# Get candidate metadata from search.
if search_artist and search_album:
candidates = mb.match_album(search_artist, search_album,
len(items), MAX_CANDIDATES)
candidates = list(candidates)
else:
candidates = []
# Possibly add "various artists" search.
if search_album and ((not artist_consensus) or \
(search_artist.lower() in VA_ARTISTS) or \
any(item.comp for item in items)):
log.debug(u'Possibly Various Artists; adding matches.')
candidates.extend(mb.match_album(None, search_album, len(items),
MAX_CANDIDATES))
# Get candidates from plugins.
candidates.extend(plugins.candidates(items))
# Get the distance to each candidate.
log.debug(u'Evaluating %i candidates.' % len(candidates))
for info in candidates:
validate_candidate(items, out_tuples, info)
# Sort by distance.
out_tuples = out_tuples.values()
out_tuples.sort()
rec = recommendation(out_tuples)
return cur_artist, cur_album, out_tuples, rec
def tag_item(item, timid=False, search_artist=None, search_title=None,
search_id=None):
"""Attempts to find metadata for a single track. Returns a
`(candidates, recommendation)` pair where `candidates` is a list
of `(distance, track_info)` pairs. `search_artist` and
`search_title` may be used to override the current metadata for
the purposes of the MusicBrainz title; likewise `search_id`.
"""
candidates = []
# First, try matching by MusicBrainz ID.
trackid = search_id or item.mb_trackid
if trackid:
log.debug('Searching for track ID: ' + trackid)
track_info = mb.track_for_id(trackid)
if track_info:
dist = track_distance(item, track_info, incl_artist=True)
candidates.append((dist, track_info))
# If this is a good match, then don't keep searching.
rec = recommendation(candidates)
if rec == RECOMMEND_STRONG and not timid:
log.debug('Track ID match.')
return candidates, rec
# If we're searching by ID, don't proceed.
if search_id is not None:
if candidates:
return candidates, rec
else:
return [], RECOMMEND_NONE
# Search terms.
if not (search_artist and search_title):
search_artist, search_title = item.artist, item.title
log.debug(u'Item search terms: %s - %s' % (search_artist, search_title))
# Candidate metadata from search.
for track_info in mb.match_track(search_artist, search_title):
dist = track_distance(item, track_info, incl_artist=True)
candidates.append((dist, track_info))
# Add candidates from plugins.
for track_info in plugins.item_candidates(item):
dist = track_distance(item, track_info, incl_artist=True)
candidates.append((dist, track_info))
# Sort by distance and return with recommendation.
log.debug('Found %i candidates.' % len(candidates))
candidates.sort()
rec = recommendation(candidates)
return candidates, rec

77
lib/beets/autotag/art.py Normal file
View File

@@ -0,0 +1,77 @@
# This file is part of beets.
# Copyright 2010, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Finding album art for tagged albums."""
import urllib
import sys
import logging
from lib.beets.autotag.mb import album_for_id
# The common logger.
log = logging.getLogger('beets')
# Art from Amazon.
AMAZON_URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg'
AMAZON_INDICES = (1,2)
AMAZON_CONTENT_TYPE = 'image/jpeg'
def art_for_asin(asin):
"""Fetches art for an Amazon ID (ASIN) string."""
for index in AMAZON_INDICES:
# Fetch the image.
url = AMAZON_URL % (asin, index)
try:
log.debug('Downloading art: %s' % url)
fn, headers = urllib.urlretrieve(url)
except IOError:
log.debug('error fetching art at URL %s' % url)
continue
# Make sure it's actually an image.
if headers.gettype() == AMAZON_CONTENT_TYPE:
log.debug('Downloaded art to: %s' % fn)
return fn
# Main interface.
def art_for_album(album):
"""Given an album info dictionary from MusicBrainz, returns a path
to downloaded art for the album (or None if no art is found).
"""
if album['asin']:
log.debug('Fetching album art for ASIN %s.' % album['asin'])
return art_for_asin(album['asin'])
else:
log.debug('No ASIN available: no art found.')
return None
# Smoke test.
if __name__ == '__main__':
aid = sys.argv[1]
album = album_for_id(aid)
if not album:
print 'album not found'
else:
fn = art_for_album(album)
if fn:
print fn
print len(open(fn).read())/1024
else:
print 'no art found'

333
lib/beets/autotag/mb.py Normal file
View File

@@ -0,0 +1,333 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Searches for albums in the MusicBrainz database.
This is a thin layer over the official `python-musicbrainz2` module. It
abstracts away that module's object model, the server's Lucene query
syntax, and other uninteresting parts of using musicbrainz2. The
principal interface is the function `match_album`.
"""
from __future__ import with_statement # for Python 2.5
import re
import time
import logging
import lib.musicbrainz2.webservice as mbws
from lib.musicbrainz2.model import Release
from threading import Lock
from lib.musicbrainz2.model import VARIOUS_ARTISTS_ID
SEARCH_LIMIT = 10
VARIOUS_ARTISTS_ID = VARIOUS_ARTISTS_ID.rsplit('/', 1)[1]
class ServerBusyError(Exception): pass
class BadResponseError(Exception): pass
log = logging.getLogger('beets')
# We hard-code IDs for artists that can't easily be searched for.
SPECIAL_CASE_ARTISTS = {
'!!!': 'f26c72d3-e52c-467b-b651-679c73d8e1a7',
}
RELEASE_TYPES = [
Release.TYPE_ALBUM,
Release.TYPE_SINGLE,
Release.TYPE_EP,
Release.TYPE_COMPILATION,
Release.TYPE_SOUNDTRACK,
Release.TYPE_SPOKENWORD,
Release.TYPE_INTERVIEW,
Release.TYPE_AUDIOBOOK,
Release.TYPE_LIVE,
Release.TYPE_REMIX,
Release.TYPE_OTHER
]
RELEASE_INCLUDES = mbws.ReleaseIncludes(artist=True, tracks=True,
releaseEvents=True, labels=True,
releaseGroup=True)
TRACK_INCLUDES = mbws.TrackIncludes(artist=True)
# MusicBrainz requires that a client does not query the server more
# than once a second. This function enforces that limit using a
# module-global variable to keep track of the last time a query was
# sent.
MAX_QUERY_RETRY = 8
QUERY_WAIT_TIME = 1.0
last_query_time = 0.0
mb_lock = Lock()
def _query_wrap(fun, *args, **kwargs):
"""Wait until at least `QUERY_WAIT_TIME` seconds have passed since
the last invocation of this function. Then call
fun(*args, **kwargs). If it fails due to a "server busy" message,
then try again. Tries up to `MAX_QUERY_RETRY` times before
giving up.
"""
with mb_lock:
global last_query_time
for i in range(MAX_QUERY_RETRY):
since_last_query = time.time() - last_query_time
if since_last_query < QUERY_WAIT_TIME:
time.sleep(QUERY_WAIT_TIME - since_last_query)
last_query_time = time.time()
try:
# Try the function.
res = fun(*args, **kwargs)
except mbws.WebServiceError, e:
# Server busy. Retry.
message = str(e.reason)
for errnum in (503, 504):
if 'Error %i' % errnum in message:
break
else:
# This is not the error we're looking for.
raise
except mbws.ConnectionError:
# Typically a timeout.
pass
except mbws.ResponseError, exc:
# Malformed response from server.
log.error('Bad response from MusicBrainz: ' + str(exc))
raise BadResponseError()
else:
# Success. Return the result.
return res
# Gave up.
raise ServerBusyError()
# FIXME exponential backoff?
def get_releases(**params):
"""Given a list of parameters to ReleaseFilter, executes the
query and yields release dicts (complete with tracks).
"""
# Replace special cases.
if 'artistName' in params:
artist = params['artistName']
if artist in SPECIAL_CASE_ARTISTS:
del params['artistName']
params['artistId'] = SPECIAL_CASE_ARTISTS[artist]
# Issue query.
filt = mbws.ReleaseFilter(**params)
try:
results = _query_wrap(mbws.Query().getReleases, filter=filt)
except BadResponseError:
results = ()
# Construct results.
for result in results:
release = result.release
tracks, _ = release_info(release.id)
yield release_dict(release, tracks)
def release_info(release_id):
"""Given a MusicBrainz release ID, fetch a list of tracks on the
release and the release group ID. If the release is not found,
returns None.
"""
try:
release = _query_wrap(mbws.Query().getReleaseById, release_id,
RELEASE_INCLUDES)
except BadResponseError:
release = None
if release:
return release.getTracks(), release.getReleaseGroup().getId()
else:
return None
def _lucene_escape(text):
"""Escapes a string so it may be used verbatim in a Lucene query
string.
"""
# Regex stolen from MusicBrainz Picard.
out = re.sub(r'([+\-&|!(){}\[\]\^"~*?:\\])', r'\\\1', text)
return out.replace('\x00', '')
def _lucene_query(criteria):
"""Given a dictionary containing search criteria, produce a string
that may be used as a MusicBrainz search query.
"""
query_parts = []
for name, value in criteria.items():
value = _lucene_escape(value).strip().lower()
if value:
query_parts.append(u'%s:(%s)' % (name, value))
return u' '.join(query_parts)
def find_releases(criteria, limit=SEARCH_LIMIT):
"""Get a list of release dictionaries from the MusicBrainz
database that match `criteria`. The latter is a dictionary whose
keys are MusicBrainz field names and whose values are search terms
for those fields.
The field names are from MusicBrainz's Lucene query syntax, which
is detailed here:
http://wiki.musicbrainz.org/Text_Search_Syntax
"""
# Replace special cases.
if 'artist' in criteria:
artist = criteria['artist']
if artist in SPECIAL_CASE_ARTISTS:
del criteria['artist']
criteria['arid'] = SPECIAL_CASE_ARTISTS[artist]
# Build the filter and send the query.
if any(criteria.itervalues()):
query = _lucene_query(criteria)
log.debug('album query: %s' % query)
return get_releases(limit=limit, query=query)
def find_tracks(criteria, limit=SEARCH_LIMIT):
"""Get a sequence of track dictionaries from MusicBrainz that match
`criteria`, a search term dictionary similar to the one passed to
`find_releases`.
"""
if any(criteria.itervalues()):
query = _lucene_query(criteria)
log.debug('track query: %s' % query)
filt = mbws.TrackFilter(limit=limit, query=query)
try:
results = _query_wrap(mbws.Query().getTracks, filter=filt)
except BadResponseError:
results = ()
for result in results:
track = result.track
yield track_dict(track)
def track_dict(track):
"""Produces a dictionary summarizing a MusicBrainz `Track` object.
"""
t = {'title': track.title,
'id': track.id.rsplit('/', 1)[1]}
if track.artist is not None:
# Track artists will only be present for releases with
# multiple artists.
t['artist'] = track.artist.name
t['artist_id'] = track.artist.id.rsplit('/', 1)[1]
if track.duration is not None:
# Duration not always present.
t['length'] = track.duration/(1000.0)
return t
def release_dict(release, tracks=None):
"""Takes a MusicBrainz `Release` object and returns a dictionary
containing the interesting data about that release. A list of
`Track` objects may also be provided as `tracks`; they are then
included in the resulting dictionary.
"""
# Basic info.
out = {'album': release.title,
'album_id': release.id.rsplit('/', 1)[1],
'artist': release.artist.name,
'artist_id': release.artist.id.rsplit('/', 1)[1],
'asin': release.asin,
'albumtype': '',
}
out['va'] = out['artist_id'] == VARIOUS_ARTISTS_ID
# Release type not always populated.
for releasetype in release.types:
if releasetype in RELEASE_TYPES:
out['albumtype'] = releasetype.split('#')[1].lower()
break
# Release date and label.
try:
event = release.getEarliestReleaseEvent()
except:
# The python-musicbrainz2 module has a bug that will raise an
# exception when there is no release date to be found. In this
# case, we just skip adding a release date to the dict.
pass
else:
if event:
# Release date.
date_str = event.getDate()
if date_str:
date_parts = date_str.split('-')
for key in ('year', 'month', 'day'):
if date_parts:
out[key] = int(date_parts.pop(0))
# Label name.
label = event.getLabel()
if label:
out['label'] = label.getName()
# Tracks.
if tracks is not None:
out['tracks'] = map(track_dict, tracks)
return out
def match_album(artist, album, tracks=None, limit=SEARCH_LIMIT):
"""Searches for a single album ("release" in MusicBrainz parlance)
and returns an iterator over dictionaries of information (as
returned by `release_dict`).
The query consists of an artist name, an album name, and,
optionally, a number of tracks on the album.
"""
# Build search criteria.
criteria = {'release': album}
if artist is not None:
criteria['artist'] = artist
else:
# Various Artists search.
criteria['arid'] = VARIOUS_ARTISTS_ID
if tracks is not None:
criteria['tracks'] = str(tracks)
# Search for the release.
return find_releases(criteria)
def match_track(artist, title):
"""Searches for a single track and returns an iterable of track
info dictionaries (as returned by `track_dict`).
"""
return find_tracks({
'artist': artist,
'track': title,
})
def album_for_id(albumid):
"""Fetches an album by its MusicBrainz ID and returns an
information dictionary. If no match is found, returns None.
"""
query = mbws.Query()
try:
album = _query_wrap(query.getReleaseById, albumid, RELEASE_INCLUDES)
except BadResponseError:
return None
except (mbws.ResourceNotFoundError, mbws.RequestError), exc:
log.debug('Album ID match failed: ' + str(exc))
return None
return release_dict(album, album.tracks)
def track_for_id(trackid):
"""Fetches a track by its MusicBrainz ID. Returns a track info
dictionary or None if no track is found.
"""
query = mbws.Query()
try:
track = _query_wrap(query.getTrackById, trackid, TRACK_INCLUDES)
except BadResponseError:
return None
except (mbws.ResourceNotFoundError, mbws.RequestError), exc:
log.debug('Track ID match failed: ' + str(exc))
return None
return track_dict(track)

651
lib/beets/importer.py Normal file
View File

@@ -0,0 +1,651 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Provides the basic, interface-agnostic workflow for importing and
autotagging music files.
"""
from __future__ import with_statement # Python 2.5
import os
import logging
import pickle
from lib.beets import autotag
from lib.beets import library
import lib.beets.autotag.art as beets.autotag.art
from lib.beets import plugins
from lib.beets.util import pipeline
from lib.beets.util import syspath, normpath
from lib.beets.util.enumeration import enum
action = enum(
'SKIP', 'ASIS', 'TRACKS', 'MANUAL', 'APPLY', 'MANUAL_ID',
name='action'
)
QUEUE_SIZE = 128
STATE_FILE = os.path.expanduser('~/.beetsstate')
# Global logger.
log = logging.getLogger('beets')
class ImportAbort(Exception):
"""Raised when the user aborts the tagging operation.
"""
pass
# Utilities.
def tag_log(logfile, status, path):
"""Log a message about a given album to logfile. The status should
reflect the reason the album couldn't be tagged.
"""
if logfile:
print >>logfile, '%s %s' % (status, path)
def log_choice(config, task):
"""Logs the task's current choice if it should be logged.
"""
path = task.path if task.is_album else task.item.path
if task.choice_flag is action.ASIS:
tag_log(config.logfile, 'asis', path)
elif task.choice_flag is action.SKIP:
tag_log(config.logfile, 'skip', path)
def _reopen_lib(lib):
"""Because of limitations in SQLite, a given Library is bound to
the thread in which it was created. This function reopens Library
objects so that they can be used from separate threads.
"""
if isinstance(lib, library.Library):
return library.Library(
lib.path,
lib.directory,
lib.path_formats,
lib.art_filename,
)
else:
return lib
def _duplicate_check(lib, artist, album, recent=None):
"""Check whether an album already exists in the library. `recent`
should be a set of (artist, album) pairs that will be built up
with every call to this function and checked along with the
library.
"""
if artist is None:
# As-is import with no artist. Skip check.
return False
# Try the recent albums.
if recent is not None:
if (artist, album) in recent:
return True
recent.add((artist, album))
# Look in the library.
for album_cand in lib.albums(artist=artist):
if album_cand.album == album:
return True
return False
def _item_duplicate_check(lib, artist, title, recent=None):
"""Check whether an item already exists in the library."""
# Try recent items.
if recent is not None:
if (artist, title) in recent:
return True
recent.add((artist, title))
# Check the library.
item_iter = lib.items(artist=artist, title=title)
try:
item_iter.next()
except StopIteration:
return False
finally:
item_iter.close()
return True
# Utilities for reading and writing the beets progress file, which
# allows long tagging tasks to be resumed when they pause (or crash).
PROGRESS_KEY = 'tagprogress'
def progress_set(toppath, path):
"""Record that tagging for the given `toppath` was successful up to
`path`. If path is None, then clear the progress value (indicating
that the tagging completed).
"""
try:
with open(STATE_FILE) as f:
state = pickle.load(f)
except IOError:
state = {PROGRESS_KEY: {}}
if path is None:
# Remove progress from file.
if toppath in state[PROGRESS_KEY]:
del state[PROGRESS_KEY][toppath]
else:
state[PROGRESS_KEY][toppath] = path
with open(STATE_FILE, 'w') as f:
pickle.dump(state, f)
def progress_get(toppath):
"""Get the last successfully tagged subpath of toppath. If toppath
has no progress information, returns None.
"""
try:
with open(STATE_FILE) as f:
state = pickle.load(f)
except IOError:
return None
return state[PROGRESS_KEY].get(toppath)
# The configuration structure.
class ImportConfig(object):
"""Contains all the settings used during an import session. Should
be used in a "write-once" way -- everything is set up initially and
then never touched again.
"""
_fields = ['lib', 'paths', 'resume', 'logfile', 'color', 'quiet',
'quiet_fallback', 'copy', 'write', 'art', 'delete',
'choose_match_func', 'should_resume_func', 'threaded',
'autot', 'singletons', 'timid', 'choose_item_func']
def __init__(self, **kwargs):
for slot in self._fields:
setattr(self, slot, kwargs[slot])
# Normalize the paths.
if self.paths:
self.paths = map(normpath, self.paths)
# The importer task class.
class ImportTask(object):
"""Represents a single set of items to be imported along with its
intermediate state. May represent an album or a single item.
"""
def __init__(self, toppath=None, path=None, items=None):
self.toppath = toppath
self.path = path
self.items = items
self.sentinel = False
@classmethod
def done_sentinel(cls, toppath):
"""Create an ImportTask that indicates the end of a top-level
directory import.
"""
obj = cls(toppath)
obj.sentinel = True
return obj
@classmethod
def progress_sentinel(cls, toppath, path):
"""Create a task indicating that a single directory in a larger
import has finished. This is only required for singleton
imports; progress is implied for album imports.
"""
obj = cls(toppath, path)
obj.sentinel = True
return obj
@classmethod
def item_task(cls, item):
"""Creates an ImportTask for a single item."""
obj = cls()
obj.item = item
obj.is_album = False
return obj
def set_match(self, cur_artist, cur_album, candidates, rec):
"""Sets the candidates for this album matched by the
`autotag.tag_album` method.
"""
assert not self.sentinel
self.cur_artist = cur_artist
self.cur_album = cur_album
self.candidates = candidates
self.rec = rec
self.is_album = True
def set_null_match(self):
"""Set the candidates to indicate no album match was found.
"""
self.set_match(None, None, None, None)
def set_item_match(self, candidates, rec):
"""Set the match for a single-item task."""
assert not self.is_album
assert self.item is not None
self.item_match = (candidates, rec)
def set_null_item_match(self):
"""For single-item tasks, mark the item as having no matches.
"""
assert not self.is_album
assert self.item is not None
self.item_match = None
def set_choice(self, choice):
"""Given either an (info, items) tuple or an action constant,
indicates that an action has been selected by the user (or
automatically).
"""
assert not self.sentinel
# Not part of the task structure:
assert choice not in (action.MANUAL, action.MANUAL_ID)
assert choice != action.APPLY # Only used internally.
if choice in (action.SKIP, action.ASIS, action.TRACKS):
self.choice_flag = choice
self.info = None
else:
assert not isinstance(choice, action)
if self.is_album:
info, items = choice
self.items = items # Reordered items list.
else:
info = choice
self.info = info
self.choice_flag = action.APPLY # Implicit choice.
def save_progress(self):
"""Updates the progress state to indicate that this album has
finished.
"""
if self.sentinel and self.path is None:
# "Done" sentinel.
progress_set(self.toppath, None)
elif self.sentinel or self.is_album:
# "Directory progress" sentinel for singletons or a real
# album task, which implies the same.
progress_set(self.toppath, self.path)
# Logical decisions.
def should_write_tags(self):
"""Should new info be written to the files' metadata?"""
if self.choice_flag == action.APPLY:
return True
elif self.choice_flag in (action.ASIS, action.TRACKS, action.SKIP):
return False
else:
assert False
def should_fetch_art(self):
"""Should album art be downloaded for this album?"""
return self.should_write_tags() and self.is_album
def should_infer_aa(self):
"""When creating an album structure, should the album artist
field be inferred from the plurality of track artists?
"""
assert self.is_album
if self.choice_flag == action.APPLY:
# Album artist comes from the info dictionary.
return False
elif self.choice_flag == action.ASIS:
# As-is imports likely don't have an album artist.
return True
else:
assert False
def should_skip(self):
"""After a choice has been made, returns True if this is a
sentinel or it has been marked for skipping.
"""
return self.sentinel or self.choice_flag == action.SKIP
# Full-album pipeline stages.
def read_tasks(config):
"""A generator yielding all the albums (as ImportTask objects) found
in the user-specified list of paths. In the case of a singleton
import, yields single-item tasks instead.
"""
# Look for saved progress.
progress = config.resume is not False
if progress:
resume_dirs = {}
for path in config.paths:
resume_dir = progress_get(path)
if resume_dir:
# Either accept immediately or prompt for input to decide.
if config.resume:
do_resume = True
log.warn('Resuming interrupted import of %s' % path)
else:
do_resume = config.should_resume_func(config, path)
if do_resume:
resume_dirs[path] = resume_dir
else:
# Clear progress; we're starting from the top.
progress_set(path, None)
for toppath in config.paths:
# Check whether the path is to a file.
if config.singletons and not os.path.isdir(syspath(toppath)):
item = library.Item.from_path(toppath)
yield ImportTask.item_task(item)
continue
# Produce paths under this directory.
if progress:
resume_dir = resume_dirs.get(toppath)
for path, items in autotag.albums_in_dir(toppath):
if progress and resume_dir:
# We're fast-forwarding to resume a previous tagging.
if path == resume_dir:
# We've hit the last good path! Turn off the
# fast-forwarding.
resume_dir = None
continue
# Yield all the necessary tasks.
if config.singletons:
for item in items:
yield ImportTask.item_task(item)
yield ImportTask.progress_sentinel(toppath, path)
else:
yield ImportTask(toppath, path, items)
# Indicate the directory is finished.
yield ImportTask.done_sentinel(toppath)
def initial_lookup(config):
"""A coroutine for performing the initial MusicBrainz lookup for an
album. It accepts lists of Items and yields
(items, cur_artist, cur_album, candidates, rec) tuples. If no match
is found, all of the yielded parameters (except items) are None.
"""
task = None
while True:
task = yield task
if task.sentinel:
continue
log.debug('Looking up: %s' % task.path)
try:
task.set_match(*autotag.tag_album(task.items, config.timid))
except autotag.AutotagError:
task.set_null_match()
def user_query(config):
"""A coroutine for interfacing with the user about the tagging
process. lib is the Library to import into and logfile may be
a file-like object for logging the import process. The coroutine
accepts and yields ImportTask objects.
"""
lib = _reopen_lib(config.lib)
recent = set()
task = None
while True:
task = yield task
if task.sentinel:
continue
# Ask the user for a choice.
choice = config.choose_match_func(task, config)
task.set_choice(choice)
log_choice(config, task)
# As-tracks: transition to singleton workflow.
if choice is action.TRACKS:
# Set up a little pipeline for dealing with the singletons.
item_tasks = []
def emitter():
for item in task.items:
yield ImportTask.item_task(item)
yield ImportTask.progress_sentinel(task.toppath, task.path)
def collector():
while True:
item_task = yield
item_tasks.append(item_task)
ipl = pipeline.Pipeline((emitter(), item_lookup(config),
item_query(config), collector()))
ipl.run_sequential()
task = pipeline.multiple(item_tasks)
# Check for duplicates if we have a match (or ASIS).
if choice is action.ASIS or isinstance(choice, tuple):
if choice is action.ASIS:
artist = task.cur_artist
album = task.cur_album
else:
artist = task.info['artist']
album = task.info['album']
if _duplicate_check(lib, artist, album, recent):
tag_log(config.logfile, 'duplicate', task.path)
log.warn("This album is already in the library!")
task.set_choice(action.SKIP)
def show_progress(config):
"""This stage replaces the initial_lookup and user_query stages
when the importer is run without autotagging. It displays the album
name and artist as the files are added.
"""
task = None
while True:
task = yield task
if task.sentinel:
continue
log.info(task.path)
# Behave as if ASIS were selected.
task.set_null_match()
task.set_choice(action.ASIS)
def apply_choices(config):
"""A coroutine for applying changes to albums during the autotag
process.
"""
lib = _reopen_lib(config.lib)
task = None
while True:
task = yield task
if task.should_skip():
continue
# Change metadata, move, and copy.
if task.should_write_tags():
if task.is_album:
autotag.apply_metadata(task.items, task.info)
else:
autotag.apply_item_metadata(task.item, task.info)
items = task.items if task.is_album else [task.item]
if config.copy and config.delete:
task.old_paths = [os.path.realpath(syspath(item.path))
for item in items]
for item in items:
if config.copy:
item.move(lib, True, task.is_album)
if config.write and task.should_write_tags():
item.write()
# Add items to library. We consolidate this at the end to avoid
# locking while we do the copying and tag updates.
try:
if task.is_album:
# Add an album.
album = lib.add_album(task.items,
infer_aa = task.should_infer_aa())
task.album_id = album.id
else:
# Add tracks.
for item in items:
lib.add(item)
finally:
lib.save()
def fetch_art(config):
"""A coroutine that fetches and applies album art for albums where
appropriate.
"""
lib = _reopen_lib(config.lib)
task = None
while True:
task = yield task
if task.should_skip():
continue
if task.should_fetch_art():
artpath = beets.autotag.art.art_for_album(task.info)
# Save the art if any was found.
if artpath:
try:
album = lib.get_album(task.album_id)
album.set_art(artpath)
finally:
lib.save(False)
def finalize(config):
"""A coroutine that finishes up importer tasks. In particular, the
coroutine sends plugin events, deletes old files, and saves
progress. This is a "terminal" coroutine (it yields None).
"""
lib = _reopen_lib(config.lib)
while True:
task = yield
if task.should_skip():
if config.resume is not False:
task.save_progress()
continue
items = task.items if task.is_album else [task.item]
# Announce that we've added an album.
if task.is_album:
album = lib.get_album(task.album_id)
plugins.send('album_imported', lib=lib, album=album)
else:
for item in items:
plugins.send('item_imported', lib=lib, item=item)
# Finally, delete old files.
if config.copy and config.delete:
new_paths = [os.path.realpath(item.path) for item in items]
for old_path in task.old_paths:
# Only delete files that were actually moved.
if old_path not in new_paths:
os.remove(syspath(old_path))
# Update progress.
if config.resume is not False:
task.save_progress()
# Singleton pipeline stages.
def item_lookup(config):
"""A coroutine used to perform the initial MusicBrainz lookup for
an item task.
"""
task = None
while True:
task = yield task
if task.sentinel:
continue
task.set_item_match(*autotag.tag_item(task.item, config.timid))
def item_query(config):
"""A coroutine that queries the user for input on single-item
lookups.
"""
lib = _reopen_lib(config.lib)
task = None
recent = set()
while True:
task = yield task
if task.sentinel:
continue
choice = config.choose_item_func(task, config)
task.set_choice(choice)
log_choice(config, task)
# Duplicate check.
if task.choice_flag in (action.ASIS, action.APPLY):
if choice is action.ASIS:
artist = task.item.artist
title = task.item.title
else:
artist = task.info['artist']
title = task.info['title']
if _item_duplicate_check(lib, artist, title, recent):
tag_log(config.logfile, 'duplicate', task.item.path)
log.warn("This item is already in the library!")
task.set_choice(action.SKIP)
def item_progress(config):
"""Skips the lookup and query stages in a non-autotagged singleton
import. Just shows progress.
"""
task = None
log.info('Importing items:')
while True:
task = yield task
if task.sentinel:
continue
log.info(task.item.path)
task.set_null_item_match()
task.set_choice(action.ASIS)
# Main driver.
def run_import(**kwargs):
"""Run an import. The keyword arguments are the same as those to
ImportConfig.
"""
config = ImportConfig(**kwargs)
# Set up the pipeline.
stages = [read_tasks(config)]
if config.singletons:
# Singleton importer.
if config.autot:
stages += [item_lookup(config), item_query(config)]
else:
stages += [item_progress(config)]
else:
# Whole-album importer.
if config.autot:
# Only look up and query the user when autotagging.
stages += [initial_lookup(config), user_query(config)]
else:
# When not autotagging, just display progress.
stages += [show_progress(config)]
stages += [apply_choices(config)]
if config.art:
stages += [fetch_art(config)]
stages += [finalize(config)]
pl = pipeline.Pipeline(stages)
# Run the pipeline.
try:
if config.threaded:
pl.run_parallel(QUEUE_SIZE)
else:
pl.run_sequential()
except ImportAbort:
# User aborted operation. Silently stop.
pass

1202
lib/beets/library.py Normal file

File diff suppressed because it is too large Load Diff

859
lib/beets/mediafile.py Normal file
View File

@@ -0,0 +1,859 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Handles low-level interfacing for files' tags. Wraps Mutagen to
automatically detect file types and provide a unified interface for a
useful subset of music files' tags.
Usage:
>>> f = MediaFile('Lucy.mp3')
>>> f.title
u'Lucy in the Sky with Diamonds'
>>> f.artist = 'The Beatles'
>>> f.save()
A field will always return a reasonable value of the correct type, even
if no tag is present. If no value is available, the value will be false
(e.g., zero or the empty string).
"""
import lib.mutagen
import lib.mutagen.mp3
import lib.mutagen.oggvorbis
import lib.mutagen.mp4
import lib.mutagen.flac
import lib.mutagen.monkeysaudio
import datetime
import re
import base64
import imghdr
import os
from lib.beets.util.enumeration import enum
__all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile']
# Exceptions.
# Raised for any file MediaFile can't read.
class UnreadableFileError(IOError):
pass
# Raised for files that don't seem to have a type MediaFile supports.
class FileTypeError(UnreadableFileError):
pass
# Constants.
# Human-readable type names.
TYPES = {
'mp3': 'MP3',
'mp4': 'AAC',
'ogg': 'OGG',
'flac': 'FLAC',
'ape': 'APE',
'wv': 'WavPack',
'mpc': 'Musepack',
}
# Utility.
def _safe_cast(out_type, val):
"""Tries to covert val to out_type but will never raise an
exception. If the value can't be converted, then a sensible
default value is returned. out_type should be bool, int, or
unicode; otherwise, the value is just passed through.
"""
if out_type == int:
if val is None:
return 0
elif isinstance(val, int) or isinstance(val, float):
# Just a number.
return int(val)
else:
# Process any other type as a string.
if not isinstance(val, basestring):
val = unicode(val)
# Get a number from the front of the string.
val = re.match('[0-9]*', val.strip()).group(0)
if not val:
return 0
else:
return int(val)
elif out_type == bool:
if val is None:
return False
else:
try:
# Should work for strings, bools, ints:
return bool(int(val))
except ValueError:
return False
elif out_type == unicode:
if val is None:
return u''
else:
return unicode(val)
else:
return val
# Flags for encoding field behavior.
# Determine style of packing, if any.
packing = enum('SLASHED', # pair delimited by /
'TUPLE', # a python tuple of 2 items
'DATE', # YYYY-MM-DD
name='packing')
class StorageStyle(object):
"""Parameterizes the storage behavior of a single field for a
certain tag format.
- key: The Mutagen key used to access the field's data.
- list_elem: Store item as a single object or as first element
of a list.
- as_type: Which type the value is stored as (unicode, int,
bool, or str).
- packing: If this value is packed in a multiple-value storage
unit, which type of packing (in the packing enum). Otherwise,
None. (Makes as_type irrelevant).
- pack_pos: If the value is packed, in which position it is
stored.
- ID3 storage only: match against this 'desc' field as well
as the key.
"""
def __init__(self, key, list_elem = True, as_type = unicode,
packing = None, pack_pos = 0, id3_desc = None,
id3_frame_field = 'text'):
self.key = key
self.list_elem = list_elem
self.as_type = as_type
self.packing = packing
self.pack_pos = pack_pos
self.id3_desc = id3_desc
self.id3_frame_field = id3_frame_field
# Dealing with packings.
class Packed(object):
"""Makes a packed list of values subscriptable. To access the packed
output after making changes, use packed_thing.items.
"""
def __init__(self, items, packstyle, none_val=0, out_type=int):
"""Create a Packed object for subscripting the packed values in
items. The items are packed using packstyle, which is a value
from the packing enum. none_val is returned from a request when
no suitable value is found in the items. Vales are converted to
out_type before they are returned.
"""
self.items = items
self.packstyle = packstyle
self.none_val = none_val
self.out_type = out_type
def __getitem__(self, index):
if not isinstance(index, int):
raise TypeError('index must be an integer')
if self.items is None:
return self.none_val
items = self.items
if self.packstyle == packing.DATE:
# Remove time information from dates. Usually delimited by
# a "T" or a space.
items = re.sub(r'[Tt ].*$', '', unicode(items))
# transform from a string packing into a list we can index into
if self.packstyle == packing.SLASHED:
seq = unicode(items).split('/')
elif self.packstyle == packing.DATE:
seq = unicode(items).split('-')
elif self.packstyle == packing.TUPLE:
seq = items # tuple: items is already indexable
try:
out = seq[index]
except:
out = None
if out is None or out == self.none_val or out == '':
return _safe_cast(self.out_type, self.none_val)
else:
return _safe_cast(self.out_type, out)
def __setitem__(self, index, value):
if self.packstyle in (packing.SLASHED, packing.TUPLE):
# SLASHED and TUPLE are always two-item packings
length = 2
else:
# DATE can have up to three fields
length = 3
# make a list of the items we'll pack
new_items = []
for i in range(length):
if i == index:
next_item = value
else:
next_item = self[i]
new_items.append(next_item)
if self.packstyle == packing.DATE:
# Truncate the items wherever we reach an invalid (none)
# entry. This prevents dates like 2008-00-05.
for i, item in enumerate(new_items):
if item == self.none_val or item is None:
del(new_items[i:]) # truncate
break
if self.packstyle == packing.SLASHED:
self.items = '/'.join(map(unicode, new_items))
elif self.packstyle == packing.DATE:
field_lengths = [4, 2, 2] # YYYY-MM-DD
elems = []
for i, item in enumerate(new_items):
elems.append( ('%0' + str(field_lengths[i]) + 'i') % item )
self.items = '-'.join(elems)
elif self.packstyle == packing.TUPLE:
self.items = new_items
# The field itself.
class MediaField(object):
"""A descriptor providing access to a particular (abstract) metadata
field. out_type is the type that users of MediaFile should see and
can be unicode, int, or bool. id3, mp4, and flac are StorageStyle
instances parameterizing the field's storage for each type.
"""
def __init__(self, out_type = unicode, **kwargs):
"""Creates a new MediaField.
- out_type: The field's semantic (exterior) type.
- kwargs: A hash whose keys are 'mp3', 'mp4', and 'etc'
and whose values are StorageStyle instances
parameterizing the field's storage for each type.
"""
self.out_type = out_type
if not set(['mp3', 'mp4', 'etc']) == set(kwargs):
raise TypeError('MediaField constructor must have keyword '
'arguments mp3, mp4, and etc')
self.styles = kwargs
def _fetchdata(self, obj, style):
"""Get the value associated with this descriptor's field stored
with the given StorageStyle. Unwraps from a list if necessary.
"""
# fetch the value, which may be a scalar or a list
if obj.type == 'mp3':
if style.id3_desc is not None: # also match on 'desc' field
frames = obj.mgfile.tags.getall(style.key)
entry = None
for frame in frames:
if frame.desc == style.id3_desc:
entry = getattr(frame, style.id3_frame_field)
break
if entry is None: # no desc match
return None
else:
# Get the metadata frame object.
try:
frame = obj.mgfile[style.key]
except KeyError:
return None
entry = getattr(frame, style.id3_frame_field)
else: # Not MP3.
try:
entry = obj.mgfile[style.key]
except KeyError:
return None
# possibly index the list
if style.list_elem:
if entry: # List must have at least one value.
return entry[0]
else:
return None
else:
return entry
def _storedata(self, obj, val, style):
"""Store val for this descriptor's field in the tag dictionary
according to the provided StorageStyle. Store it as a
single-item list if necessary.
"""
# wrap as a list if necessary
if style.list_elem: out = [val]
else: out = val
if obj.type == 'mp3':
# Try to match on "desc" field.
if style.id3_desc is not None:
frames = obj.mgfile.tags.getall(style.key)
# try modifying in place
found = False
for frame in frames:
if frame.desc == style.id3_desc:
setattr(frame, style.id3_frame_field, out)
found = True
break
# need to make a new frame?
if not found:
assert isinstance(style.id3_frame_field, str) # Keyword.
frame = lib.mutagen.id3.Frames[style.key](
encoding=3,
desc=style.id3_desc,
**{style.id3_frame_field: val}
)
obj.mgfile.tags.add(frame)
# Try to match on "owner" field.
elif style.key.startswith('UFID:'):
owner = style.key.split(':', 1)[1]
frames = obj.mgfile.tags.getall(style.key)
for frame in frames:
# Replace existing frame data.
if frame.owner == owner:
setattr(frame, style.id3_frame_field, val)
else:
# New frame.
assert isinstance(style.id3_frame_field, str) # Keyword.
frame = lib.mutagen.id3.UFID(owner=owner,
**{style.id3_frame_field: val})
obj.mgfile.tags.setall('UFID', [frame])
# Just replace based on key.
else:
assert isinstance(style.id3_frame_field, str) # Keyword.
frame = lib.mutagen.id3.Frames[style.key](encoding = 3,
**{style.id3_frame_field: val})
obj.mgfile.tags.setall(style.key, [frame])
else: # Not MP3.
obj.mgfile[style.key] = out
def _styles(self, obj):
if obj.type in ('mp3', 'mp4'):
styles = self.styles[obj.type]
else:
styles = self.styles['etc'] # sane styles
# Make sure we always return a list of styles, even when given
# a single style for convenience.
if isinstance(styles, StorageStyle):
return [styles]
else:
return styles
def __get__(self, obj, owner):
"""Retrieve the value of this metadata field.
"""
# Fetch the data using the various StorageStyles.
styles = self._styles(obj)
for style in styles:
# Use the first style that returns a reasonable value.
out = self._fetchdata(obj, style)
if out:
break
if style.packing:
out = Packed(out, style.packing)[style.pack_pos]
return _safe_cast(self.out_type, out)
def __set__(self, obj, val):
"""Set the value of this metadata field.
"""
# Store using every StorageStyle available.
styles = self._styles(obj)
for style in styles:
if style.packing:
p = Packed(self._fetchdata(obj, style), style.packing)
p[style.pack_pos] = val
out = p.items
else: # unicode, integer, or boolean scalar
out = val
# deal with Nones according to abstract type if present
if out is None:
if self.out_type == int:
out = 0
elif self.out_type == bool:
out = False
elif self.out_type == unicode:
out = u''
# We trust that packed values are handled above.
# convert to correct storage type (irrelevant for
# packed values)
if style.as_type == unicode:
if out is None:
out = u''
else:
if self.out_type == bool:
# store bools as 1,0 instead of True,False
out = unicode(int(out))
else:
out = unicode(out)
elif style.as_type == int:
if out is None:
out = 0
else:
out = int(out)
elif style.as_type in (bool, str):
out = style.as_type(out)
# store the data
self._storedata(obj, out, style)
class CompositeDateField(object):
"""A MediaFile field for conveniently accessing the year, month, and
day fields as a datetime.date object. Allows both getting and
setting of the component fields.
"""
def __init__(self, year_field, month_field, day_field):
"""Create a new date field from the indicated MediaFields for
the component values.
"""
self.year_field = year_field
self.month_field = month_field
self.day_field = day_field
def __get__(self, obj, owner):
"""Return a datetime.date object whose components indicating the
smallest valid date whose components are at least as large as
the three component fields (that is, if year == 1999, month == 0,
and day == 0, then date == datetime.date(1999, 1, 1)). If the
components indicate an invalid date (e.g., if month == 47),
datetime.date.min is returned.
"""
try:
return datetime.date(
max(self.year_field.__get__(obj, owner), datetime.MINYEAR),
max(self.month_field.__get__(obj, owner), 1),
max(self.day_field.__get__(obj, owner), 1)
)
except ValueError: # Out of range values.
return datetime.date.min
def __set__(self, obj, val):
"""Set the year, month, and day fields to match the components of
the provided datetime.date object.
"""
self.year_field.__set__(obj, val.year)
self.month_field.__set__(obj, val.month)
self.day_field.__set__(obj, val.day)
class ImageField(object):
"""A descriptor providing access to a file's embedded album art.
Holds a bytestring reflecting the image data. The image should
either be a JPEG or a PNG for cross-format compatibility. It's
probably a bad idea to use anything but these two formats.
"""
@classmethod
def _mime(cls, data):
"""Return the MIME type (either image/png or image/jpeg) of the
image data (a bytestring).
"""
kind = imghdr.what(None, h=data)
if kind == 'png':
return 'image/png'
else:
# Currently just fall back to JPEG.
return 'image/jpeg'
@classmethod
def _mp4kind(cls, data):
"""Return the MPEG-4 image type code of the data. If the image
is not a PNG or JPEG, JPEG is assumed.
"""
kind = imghdr.what(None, h=data)
if kind == 'png':
return lib.mutagen.mp4.MP4Cover.FORMAT_PNG
else:
return lib.mutagen.mp4.MP4Cover.FORMAT_JPEG
def __get__(self, obj, owner):
if obj.type == 'mp3':
# Look for APIC frames.
for frame in obj.mgfile.tags.values():
if frame.FrameID == 'APIC':
picframe = frame
break
else:
# No APIC frame.
return None
return picframe.data
elif obj.type == 'mp4':
if 'covr' in obj.mgfile:
covers = obj.mgfile['covr']
if covers:
cover = covers[0]
# cover is an MP4Cover, which is a subclass of str.
return cover
# No cover found.
return None
else:
# Here we're assuming everything but MP3 and MPEG-4 uses
# the Xiph/Vorbis Comments standard. This may not be valid.
# http://wiki.xiph.org/VorbisComment#Cover_art
if 'metadata_block_picture' not in obj.mgfile:
# Try legacy COVERART tags.
if 'coverart' in obj.mgfile and obj.mgfile['coverart']:
return base64.b64decode(obj.mgfile['coverart'][0])
return None
for data in obj.mgfile["metadata_block_picture"]:
try:
pic = lib.mutagen.flac.Picture(base64.b64decode(data))
break
except TypeError:
pass
else:
return None
return pic.data
def __set__(self, obj, val):
if val is not None:
if not isinstance(val, str):
raise ValueError('value must be a byte string or None')
if obj.type == 'mp3':
# Clear all APIC frames.
obj.mgfile.tags.delall('APIC')
if val is None:
# If we're clearing the image, we're done.
return
picframe = lib.mutagen.id3.APIC(
encoding = 3,
mime = self._mime(val),
type = 3, # front cover
desc = u'',
data = val,
)
obj.mgfile['APIC'] = picframe
elif obj.type == 'mp4':
if val is None:
if 'covr' in obj.mgfile:
del obj.mgfile['covr']
else:
cover = lib.mutagen.mp4.MP4Cover(val, self._mp4kind(val))
obj.mgfile['covr'] = [cover]
else:
# Again, assuming Vorbis Comments standard.
# Strip all art, including legacy COVERART.
if 'metadata_block_picture' in obj.mgfile:
if 'metadata_block_picture' in obj.mgfile:
del obj.mgfile['metadata_block_picture']
if 'coverart' in obj.mgfile:
del obj.mgfile['coverart']
if 'coverartmime' in obj.mgfile:
del obj.mgfile['coverartmime']
# Add new art if provided.
if val is not None:
pic = lib.mutagen.flac.Picture()
pic.data = val
pic.mime = self._mime(val)
obj.mgfile['metadata_block_picture'] = [
base64.b64encode(pic.write())
]
# The file (a collection of fields).
class MediaFile(object):
"""Represents a multimedia file on disk and provides access to its
metadata.
"""
def __init__(self, path):
"""Constructs a new MediaFile reflecting the file at path. May
throw UnreadableFileError.
"""
self.path = path
unreadable_exc = (
lib.mutagen.mp3.HeaderNotFoundError,
lib.mutagen.flac.FLACNoHeaderError,
lib.mutagen.monkeysaudio.MonkeysAudioHeaderError,
lib.mutagen.mp4.MP4StreamInfoError,
lib.mutagen.oggvorbis.OggVorbisHeaderError,
)
try:
self.mgfile = lib.mutagen.File(path)
except unreadable_exc:
raise UnreadableFileError('Mutagen could not read file')
except IOError:
raise UnreadableFileError('could not read file')
if self.mgfile is None: # Mutagen couldn't guess the type
raise FileTypeError('file type unsupported by Mutagen')
elif type(self.mgfile).__name__ == 'M4A' or \
type(self.mgfile).__name__ == 'MP4':
self.type = 'mp4'
elif type(self.mgfile).__name__ == 'ID3' or \
type(self.mgfile).__name__ == 'MP3':
self.type = 'mp3'
elif type(self.mgfile).__name__ == 'FLAC':
self.type = 'flac'
elif type(self.mgfile).__name__ == 'OggVorbis':
self.type = 'ogg'
elif type(self.mgfile).__name__ == 'MonkeysAudio':
self.type = 'ape'
elif type(self.mgfile).__name__ == 'WavPack':
self.type = 'wv'
elif type(self.mgfile).__name__ == 'Musepack':
self.type = 'mpc'
else:
raise FileTypeError('file type %s unsupported by MediaFile' %
type(self.mgfile).__name__)
# add a set of tags if it's missing
if self.mgfile.tags is None:
self.mgfile.add_tags()
def save(self):
self.mgfile.save()
#### field definitions ####
title = MediaField(
mp3 = StorageStyle('TIT2'),
mp4 = StorageStyle("\xa9nam"),
etc = StorageStyle('title'),
)
artist = MediaField(
mp3 = StorageStyle('TPE1'),
mp4 = StorageStyle("\xa9ART"),
etc = StorageStyle('artist'),
)
album = MediaField(
mp3 = StorageStyle('TALB'),
mp4 = StorageStyle("\xa9alb"),
etc = StorageStyle('album'),
)
genre = MediaField(
mp3 = StorageStyle('TCON'),
mp4 = StorageStyle("\xa9gen"),
etc = StorageStyle('genre'),
)
composer = MediaField(
mp3 = StorageStyle('TCOM'),
mp4 = StorageStyle("\xa9wrt"),
etc = StorageStyle('composer'),
)
grouping = MediaField(
mp3 = StorageStyle('TIT1'),
mp4 = StorageStyle("\xa9grp"),
etc = StorageStyle('grouping'),
)
year = MediaField(out_type=int,
mp3 = StorageStyle('TDRC',
packing = packing.DATE,
pack_pos = 0),
mp4 = StorageStyle("\xa9day",
packing = packing.DATE,
pack_pos = 0),
etc = [StorageStyle('date',
packing = packing.DATE,
pack_pos = 0),
StorageStyle('year')]
)
month = MediaField(out_type=int,
mp3 = StorageStyle('TDRC',
packing = packing.DATE,
pack_pos = 1),
mp4 = StorageStyle("\xa9day",
packing = packing.DATE,
pack_pos = 1),
etc = StorageStyle('date',
packing = packing.DATE,
pack_pos = 1)
)
day = MediaField(out_type=int,
mp3 = StorageStyle('TDRC',
packing = packing.DATE,
pack_pos = 2),
mp4 = StorageStyle("\xa9day",
packing = packing.DATE,
pack_pos = 2),
etc = StorageStyle('date',
packing = packing.DATE,
pack_pos = 2)
)
date = CompositeDateField(year, month, day)
track = MediaField(out_type = int,
mp3 = StorageStyle('TRCK',
packing = packing.SLASHED,
pack_pos = 0),
mp4 = StorageStyle('trkn',
packing = packing.TUPLE,
pack_pos = 0),
etc = [StorageStyle('track'),
StorageStyle('tracknumber')]
)
tracktotal = MediaField(out_type = int,
mp3 = StorageStyle('TRCK',
packing = packing.SLASHED,
pack_pos = 1),
mp4 = StorageStyle('trkn',
packing = packing.TUPLE,
pack_pos = 1),
etc = [StorageStyle('tracktotal'),
StorageStyle('trackc'),
StorageStyle('totaltracks')]
)
disc = MediaField(out_type = int,
mp3 = StorageStyle('TPOS',
packing = packing.SLASHED,
pack_pos = 0),
mp4 = StorageStyle('disk',
packing = packing.TUPLE,
pack_pos = 0),
etc = [StorageStyle('disc'),
StorageStyle('discnumber')]
)
disctotal = MediaField(out_type = int,
mp3 = StorageStyle('TPOS',
packing = packing.SLASHED,
pack_pos = 1),
mp4 = StorageStyle('disk',
packing = packing.TUPLE,
pack_pos = 1),
etc = [StorageStyle('disctotal'),
StorageStyle('discc'),
StorageStyle('totaldiscs')]
)
lyrics = MediaField(
mp3 = StorageStyle('USLT',
list_elem = False,
id3_desc = u''),
mp4 = StorageStyle("\xa9lyr"),
etc = StorageStyle('lyrics')
)
comments = MediaField(
mp3 = StorageStyle('COMM', id3_desc = u''),
mp4 = StorageStyle("\xa9cmt"),
etc = [StorageStyle('description'),
StorageStyle('comment')]
)
bpm = MediaField(out_type = int,
mp3 = StorageStyle('TBPM'),
mp4 = StorageStyle('tmpo', as_type = int),
etc = StorageStyle('bpm')
)
comp = MediaField(out_type = bool,
mp3 = StorageStyle('TCMP'),
mp4 = StorageStyle('cpil',
list_elem = False,
as_type = bool),
etc = StorageStyle('compilation')
)
albumartist = MediaField(
mp3 = StorageStyle('TXXX', id3_desc=u'Album Artist'),
mp4 = StorageStyle(
'----:com.apple.iTunes:Album Artist',
as_type=str),
etc = [StorageStyle('album artist'),
StorageStyle('albumartist')]
)
albumtype = MediaField(
mp3 = StorageStyle('TXXX', id3_desc=u'MusicBrainz Album Type'),
mp4 = StorageStyle(
'----:com.apple.iTunes:MusicBrainz Album Type',
as_type=str),
etc = StorageStyle('musicbrainz_albumtype')
)
# Album art.
art = ImageField()
# MusicBrainz IDs.
mb_trackid = MediaField(
mp3 = StorageStyle('UFID:http://musicbrainz.org',
list_elem = False,
id3_frame_field = 'data'),
mp4 = StorageStyle(
'----:com.apple.iTunes:MusicBrainz Track Id',
as_type=str),
etc = StorageStyle('musicbrainz_trackid')
)
mb_albumid = MediaField(
mp3 = StorageStyle('TXXX', id3_desc=u'MusicBrainz Album Id'),
mp4 = StorageStyle(
'----:com.apple.iTunes:MusicBrainz Album Id',
as_type=str),
etc = StorageStyle('musicbrainz_albumid')
)
mb_artistid = MediaField(
mp3 = StorageStyle('TXXX', id3_desc=u'MusicBrainz Artist Id'),
mp4 = StorageStyle(
'----:com.apple.iTunes:MusicBrainz Artist Id',
as_type=str),
etc = StorageStyle('musicbrainz_artistid')
)
mb_albumartistid = MediaField(
mp3 = StorageStyle('TXXX',
id3_desc=u'MusicBrainz Album Artist Id'),
mp4 = StorageStyle(
'----:com.apple.iTunes:MusicBrainz Album Artist Id',
as_type=str),
etc = StorageStyle('musicbrainz_albumartistid')
)
@property
def length(self):
return self.mgfile.info.length
@property
def bitrate(self):
if hasattr(self.mgfile.info, 'bitrate'):
# Many formats provide it explicitly.
return self.mgfile.info.bitrate
else:
# Otherwise, we calculate bitrate from the file size. (This
# is the case for all of the lossless formats.)
size = os.path.getsize(self.path)
return int(size * 8 / self.length)
@property
def format(self):
return TYPES[self.type]

223
lib/beets/plugins.py Executable file
View File

@@ -0,0 +1,223 @@
# This file is part of beets.
# Copyright 2010, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Support for beets plugins."""
import logging
import itertools
import traceback
from collections import defaultdict
PLUGIN_NAMESPACE = 'beetsplug'
DEFAULT_PLUGINS = []
# Global logger.
log = logging.getLogger('beets')
# Managing the plugins themselves.
class BeetsPlugin(object):
"""The base class for all beets plugins. Plugins provide
functionality by defining a subclass of BeetsPlugin and overriding
the abstract methods defined here.
"""
def commands(self):
"""Should return a list of beets.ui.Subcommand objects for
commands that should be added to beets' CLI.
"""
return ()
def track_distance(self, item, info):
"""Should return a (distance, distance_max) pair to be added
to the distance value for every track comparison.
"""
return 0.0, 0.0
def album_distance(self, items, info):
"""Should return a (distance, distance_max) pair to be added
to the distance value for every album-level comparison.
"""
return 0.0, 0.0
def candidates(self, items):
"""Should return a sequence of MusicBrainz info dictionaries
that match the album whose items are provided.
"""
return ()
def item_candidates(self, item):
"""Should return a sequence of MusicBrainz track info
dictionaries that match the item provided.
"""
return ()
def configure(self, config):
"""This method is called with the ConfigParser object after
the CLI starts up.
"""
pass
listeners = None
@classmethod
def register_listener(cls, event, func):
"""Add a function as a listener for the specified event. (An
imperative alternative to the @listen decorator.)
"""
if cls.listeners is None:
cls.listeners = defaultdict(list)
cls.listeners[event].append(func)
@classmethod
def listen(cls, event):
"""Decorator that adds a function as an event handler for the
specified event (as a string). The parameters passed to function
will vary depending on what event occurred.
The function should respond to named parameters.
function(**kwargs) will trap all arguments in a dictionary.
Example:
>>> @MyPlugin.listen("imported")
>>> def importListener(**kwargs):
>>> pass
"""
def helper(func):
if cls.listeners is None:
cls.listeners = defaultdict(list)
cls.listeners[event].append(func)
return func
return helper
def load_plugins(names=()):
"""Imports the modules for a sequence of plugin names. Each name
must be the name of a Python module under the "beetsplug" namespace
package in sys.path; the module indicated should contain the
BeetsPlugin subclasses desired. A default set of plugins is also
loaded.
"""
for name in itertools.chain(names, DEFAULT_PLUGINS):
modname = '%s.%s' % (PLUGIN_NAMESPACE, name)
try:
try:
__import__(modname, None, None)
except ImportError, exc:
# Again, this is hacky:
if exc.args[0].endswith(' ' + name):
log.warn('** plugin %s not found' % name)
else:
raise
except:
log.warn('** error loading plugin %s' % name)
log.warn(traceback.format_exc())
_instances = {}
def find_plugins():
"""Returns a list of BeetsPlugin subclass instances from all
currently loaded beets plugins. Loads the default plugin set
first.
"""
load_plugins()
plugins = []
for cls in BeetsPlugin.__subclasses__():
# Only instantiate each plugin class once.
if cls not in _instances:
_instances[cls] = cls()
plugins.append(_instances[cls])
return plugins
# Communication with plugins.
def commands():
"""Returns a list of Subcommand objects from all loaded plugins.
"""
out = []
for plugin in find_plugins():
out += plugin.commands()
return out
def track_distance(item, info):
"""Gets the track distance calculated by all loaded plugins.
Returns a (distance, distance_max) pair.
"""
dist = 0.0
dist_max = 0.0
for plugin in find_plugins():
d, dm = plugin.track_distance(item, info)
dist += d
dist_max += dm
return dist, dist_max
def album_distance(items, info):
"""Returns the album distance calculated by plugins."""
dist = 0.0
dist_max = 0.0
for plugin in find_plugins():
d, dm = plugin.album_distance(items, info)
dist += d
dist_max += dm
return dist, dist_max
def candidates(items):
"""Gets MusicBrainz candidates for an album from each plugin.
"""
out = []
for plugin in find_plugins():
out.extend(plugin.candidates(items))
return out
def item_candidates(item):
"""Gets MusicBrainz candidates for an item from the plugins.
"""
out = []
for plugin in find_plugins():
out.extend(plugin.item_candidates(item))
return out
def configure(config):
"""Sends the configuration object to each plugin."""
for plugin in find_plugins():
plugin.configure(config)
# Event dispatch.
# All the handlers for the event system.
# Each key of the dictionary should contain a list of functions to be
# called for any event. Functions will be called in the order they were
# added.
_event_handlers = defaultdict(list)
def load_listeners():
"""Loads and registers event handlers from all loaded plugins.
"""
for plugin in find_plugins():
if plugin.listeners:
for event, handlers in plugin.listeners.items():
_event_handlers[event] += handlers
def send(event, **arguments):
"""Sends an event to all assigned event listeners. Event is the
name of the event to send, all other named arguments go to the
event handler(s).
Returns the number of handlers called.
"""
log.debug('Sending event: %s' % event)
handlers = _event_handlers[event]
for handler in handlers:
handler(**arguments)
return len(handlers)

616
lib/beets/ui/__init__.py Normal file
View File

@@ -0,0 +1,616 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""This module contains all of the core logic for beets' command-line
interface. To invoke the CLI, just call beets.ui.main(). The actual
CLI commands are implemented in the ui.commands module.
"""
import os
import locale
import optparse
import textwrap
import ConfigParser
import sys
from difflib import SequenceMatcher
import logging
import sqlite3
import errno
from lib.beets import library
from lib.beets import plugins
from lib.beets import util
# Constants.
CONFIG_PATH_VAR = 'BEETSCONFIG'
DEFAULT_CONFIG_FILE = os.path.expanduser('~/.beetsconfig')
DEFAULT_LIBRARY = '~/.beetsmusic.blb'
DEFAULT_DIRECTORY = '~/Music'
DEFAULT_PATH_FORMATS = {
'default': '$albumartist/$album/$track $title',
'comp': 'Compilations/$album/$track $title',
'singleton': 'Non-Album/$artist/$title',
}
DEFAULT_ART_FILENAME = 'cover'
# UI exception. Commands should throw this in order to display
# nonrecoverable errors to the user.
class UserError(Exception):
pass
# Utilities.
def print_(*strings):
"""Like print, but rather than raising an error when a character
is not in the terminal's encoding's character set, just silently
replaces it.
"""
if strings:
if isinstance(strings[0], unicode):
txt = u' '.join(strings)
else:
txt = ' '.join(strings)
else:
txt = u''
if isinstance(txt, unicode):
try:
encoding = locale.getdefaultlocale()[1] or 'utf8'
except ValueError:
# Invalid locale environment variable setting. To avoid
# failing entirely for no good reason, assume UTF-8.
encoding = 'utf8'
txt = txt.encode(encoding, 'replace')
print txt
def input_options(options, require=False, prompt=None, fallback_prompt=None,
numrange=None, default=None, color=False, max_width=72):
"""Prompts a user for input. The sequence of `options` defines the
choices the user has. A single-letter shortcut is inferred for each
option; the user's choice is returned as that single, lower-case
letter. The options should be provided as lower-case strings unless
a particular shortcut is desired; in that case, only that letter
should be capitalized.
By default, the first option is the default. If `require` is
provided, then there is no default. `default` can be provided to
override this. The prompt and fallback prompt are also inferred but
can be overridden.
If numrange is provided, it is a pair of `(high, low)` (both ints)
indicating that, in addition to `options`, the user may enter an
integer in that inclusive range.
`max_width` specifies the maximum number of columns in the
automatically generated prompt string.
"""
# Assign single letters to each option. Also capitalize the options
# to indicate the letter.
letters = {}
display_letters = []
capitalized = []
first = True
for option in options:
# Is a letter already capitalized?
for letter in option:
if letter.isalpha() and letter.upper() == letter:
found_letter = letter
break
else:
# Infer a letter.
for letter in option:
if not letter.isalpha():
continue # Don't use punctuation.
if letter not in letters:
found_letter = letter
break
else:
raise ValueError('no unambiguous lettering found')
letters[found_letter.lower()] = option
index = option.index(found_letter)
# Mark the option's shortcut letter for display.
if (default is None and not numrange and first) \
or (isinstance(default, basestring) and
found_letter.lower() == default.lower()):
# The first option is the default; mark it.
show_letter = '[%s]' % found_letter.upper()
is_default = True
else:
show_letter = found_letter.upper()
is_default = False
# Possibly colorize the letter shortcut.
if color:
color = 'turquoise' if is_default else 'blue'
show_letter = colorize(color, show_letter)
# Insert the highlighted letter back into the word.
capitalized.append(
option[:index] + show_letter + option[index+1:]
)
display_letters.append(found_letter.upper())
first = False
# The default is just the first option if unspecified.
if default is None:
if require:
default = None
elif numrange:
default = numrange[0]
else:
default = display_letters[0].lower()
# Make a prompt if one is not provided.
if not prompt:
prompt_parts = []
prompt_part_lengths = []
if numrange:
if isinstance(default, int):
default_name = str(default)
if color:
default_name = colorize('turquoise', default_name)
tmpl = '# selection (default %s)'
prompt_parts.append(tmpl % default_name)
prompt_part_lengths.append(len(tmpl % str(default)))
else:
prompt_parts.append('# selection')
prompt_part_lengths.append(prompt_parts[-1])
prompt_parts += capitalized
prompt_part_lengths += [len(s) for s in options]
# Wrap the query text.
prompt = ''
line_length = 0
for i, (part, length) in enumerate(zip(prompt_parts,
prompt_part_lengths)):
# Add punctuation.
if i == len(prompt_parts) - 1:
part += '?'
else:
part += ','
length += 1
# Choose either the current line or the beginning of the next.
if line_length + length + 1 > max_width:
prompt += '\n'
line_length = 0
if line_length != 0:
# Not the beginning of the line; need a space.
part = ' ' + part
length += 1
prompt += part
line_length += length
# Make a fallback prompt too. This is displayed if the user enters
# something that is not recognized.
if not fallback_prompt:
fallback_prompt = 'Enter one of '
if numrange:
fallback_prompt += '%i-%i, ' % numrange
fallback_prompt += ', '.join(display_letters) + ':'
# (raw_input(prompt) was causing problems with colors.)
print prompt,
resp = raw_input()
while True:
resp = resp.strip().lower()
# Try default option.
if default is not None and not resp:
resp = default
# Try an integer input if available.
if numrange:
try:
resp = int(resp)
except ValueError:
pass
else:
low, high = numrange
if low <= resp <= high:
return resp
else:
resp = None
# Try a normal letter input.
if resp:
resp = resp[0]
if resp in letters:
return resp
# Prompt for new input.
print fallback_prompt,
resp = raw_input()
def input_yn(prompt, require=False, color=False):
"""Prompts the user for a "yes" or "no" response. The default is
"yes" unless `require` is `True`, in which case there is no default.
"""
sel = input_options(
('y', 'n'), require, prompt, 'Enter Y or N:', color=color
)
return sel == 'y'
def make_query(criteria):
"""Make query string for the list of criteria."""
return ' '.join(criteria).strip() or None
def config_val(config, section, name, default, vtype=None):
"""Queries the configuration file for a value (given by the
section and name). If no value is present, returns default.
vtype optionally specifies the return type (although only bool
is supported for now).
"""
if not config.has_section(section):
config.add_section(section)
try:
if vtype is bool:
return config.getboolean(section, name)
else:
return config.get(section, name)
except ConfigParser.NoOptionError:
return default
def human_bytes(size):
"""Formats size, a number of bytes, in a human-readable way."""
suffices = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB', 'HB']
for suffix in suffices:
if size < 1024:
return "%3.1f %s" % (size, suffix)
size /= 1024.0
return "big"
def human_seconds(interval):
"""Formats interval, a number of seconds, as a human-readable time
interval.
"""
units = [
(1, 'second'),
(60, 'minute'),
(60, 'hour'),
(24, 'day'),
(7, 'week'),
(52, 'year'),
(10, 'decade'),
]
for i in range(len(units)-1):
increment, suffix = units[i]
next_increment, _ = units[i+1]
interval /= float(increment)
if interval < next_increment:
break
else:
# Last unit.
increment, suffix = units[-1]
interval /= float(increment)
return "%3.1f %ss" % (interval, suffix)
# ANSI terminal colorization code heavily inspired by pygments:
# http://dev.pocoo.org/hg/pygments-main/file/b2deea5b5030/pygments/console.py
# (pygments is by Tim Hatch, Armin Ronacher, et al.)
COLOR_ESCAPE = "\x1b["
DARK_COLORS = ["black", "darkred", "darkgreen", "brown", "darkblue",
"purple", "teal", "lightgray"]
LIGHT_COLORS = ["darkgray", "red", "green", "yellow", "blue",
"fuchsia", "turquoise", "white"]
RESET_COLOR = COLOR_ESCAPE + "39;49;00m"
def colorize(color, text):
"""Returns a string that prints the given text in the given color
in a terminal that is ANSI color-aware. The color must be something
in DARK_COLORS or LIGHT_COLORS.
"""
if color in DARK_COLORS:
escape = COLOR_ESCAPE + "%im" % (DARK_COLORS.index(color) + 30)
elif color in LIGHT_COLORS:
escape = COLOR_ESCAPE + "%i;01m" % (LIGHT_COLORS.index(color) + 30)
else:
raise ValueError('no such color %s', color)
return escape + text + RESET_COLOR
def colordiff(a, b, highlight='red'):
"""Given two strings, return the same pair of strings except with
their differences highlighted in the specified color.
"""
a_out = []
b_out = []
matcher = SequenceMatcher(lambda x: False, a, b)
for op, a_start, a_end, b_start, b_end in matcher.get_opcodes():
if op == 'equal':
# In both strings.
a_out.append(a[a_start:a_end])
b_out.append(b[b_start:b_end])
elif op == 'insert':
# Right only.
b_out.append(colorize(highlight, b[b_start:b_end]))
elif op == 'delete':
# Left only.
a_out.append(colorize(highlight, a[a_start:a_end]))
elif op == 'replace':
# Right and left differ.
a_out.append(colorize(highlight, a[a_start:a_end]))
b_out.append(colorize(highlight, b[b_start:b_end]))
else:
assert(False)
return ''.join(a_out), ''.join(b_out)
# Subcommand parsing infrastructure.
# This is a fairly generic subcommand parser for optparse. It is
# maintained externally here:
# http://gist.github.com/462717
# There you will also find a better description of the code and a more
# succinct example program.
class Subcommand(object):
"""A subcommand of a root command-line application that may be
invoked by a SubcommandOptionParser.
"""
def __init__(self, name, parser=None, help='', aliases=()):
"""Creates a new subcommand. name is the primary way to invoke
the subcommand; aliases are alternate names. parser is an
OptionParser responsible for parsing the subcommand's options.
help is a short description of the command. If no parser is
given, it defaults to a new, empty OptionParser.
"""
self.name = name
self.parser = parser or optparse.OptionParser()
self.aliases = aliases
self.help = help
class SubcommandsOptionParser(optparse.OptionParser):
"""A variant of OptionParser that parses subcommands and their
arguments.
"""
# A singleton command used to give help on other subcommands.
_HelpSubcommand = Subcommand('help', optparse.OptionParser(),
help='give detailed help on a specific sub-command',
aliases=('?',))
def __init__(self, *args, **kwargs):
"""Create a new subcommand-aware option parser. All of the
options to OptionParser.__init__ are supported in addition
to subcommands, a sequence of Subcommand objects.
"""
# The subcommand array, with the help command included.
self.subcommands = list(kwargs.pop('subcommands', []))
self.subcommands.append(self._HelpSubcommand)
# A more helpful default usage.
if 'usage' not in kwargs:
kwargs['usage'] = """
%prog COMMAND [ARGS...]
%prog help COMMAND"""
# Super constructor.
optparse.OptionParser.__init__(self, *args, **kwargs)
# Adjust the help-visible name of each subcommand.
for subcommand in self.subcommands:
subcommand.parser.prog = '%s %s' % \
(self.get_prog_name(), subcommand.name)
# Our root parser needs to stop on the first unrecognized argument.
self.disable_interspersed_args()
def add_subcommand(self, cmd):
"""Adds a Subcommand object to the parser's list of commands.
"""
self.subcommands.append(cmd)
# Add the list of subcommands to the help message.
def format_help(self, formatter=None):
# Get the original help message, to which we will append.
out = optparse.OptionParser.format_help(self, formatter)
if formatter is None:
formatter = self.formatter
# Subcommands header.
result = ["\n"]
result.append(formatter.format_heading('Commands'))
formatter.indent()
# Generate the display names (including aliases).
# Also determine the help position.
disp_names = []
help_position = 0
for subcommand in self.subcommands:
name = subcommand.name
if subcommand.aliases:
name += ' (%s)' % ', '.join(subcommand.aliases)
disp_names.append(name)
# Set the help position based on the max width.
proposed_help_position = len(name) + formatter.current_indent + 2
if proposed_help_position <= formatter.max_help_position:
help_position = max(help_position, proposed_help_position)
# Add each subcommand to the output.
for subcommand, name in zip(self.subcommands, disp_names):
# Lifted directly from optparse.py.
name_width = help_position - formatter.current_indent - 2
if len(name) > name_width:
name = "%*s%s\n" % (formatter.current_indent, "", name)
indent_first = help_position
else:
name = "%*s%-*s " % (formatter.current_indent, "",
name_width, name)
indent_first = 0
result.append(name)
help_width = formatter.width - help_position
help_lines = textwrap.wrap(subcommand.help, help_width)
result.append("%*s%s\n" % (indent_first, "", help_lines[0]))
result.extend(["%*s%s\n" % (help_position, "", line)
for line in help_lines[1:]])
formatter.dedent()
# Concatenate the original help message with the subcommand
# list.
return out + "".join(result)
def _subcommand_for_name(self, name):
"""Return the subcommand in self.subcommands matching the
given name. The name may either be the name of a subcommand or
an alias. If no subcommand matches, returns None.
"""
for subcommand in self.subcommands:
if name == subcommand.name or \
name in subcommand.aliases:
return subcommand
return None
def parse_args(self, a=None, v=None):
"""Like OptionParser.parse_args, but returns these four items:
- options: the options passed to the root parser
- subcommand: the Subcommand object that was invoked
- suboptions: the options passed to the subcommand parser
- subargs: the positional arguments passed to the subcommand
"""
options, args = optparse.OptionParser.parse_args(self, a, v)
if not args:
# No command given.
self.print_help()
self.exit()
else:
cmdname = args.pop(0)
subcommand = self._subcommand_for_name(cmdname)
if not subcommand:
self.error('unknown command ' + cmdname)
suboptions, subargs = subcommand.parser.parse_args(args)
if subcommand is self._HelpSubcommand:
if subargs:
# particular
cmdname = subargs[0]
helpcommand = self._subcommand_for_name(cmdname)
helpcommand.parser.print_help()
self.exit()
else:
# general
self.print_help()
self.exit()
return options, subcommand, suboptions, subargs
# The root parser and its main function.
def main(args=None, configfh=None):
"""Run the main command-line interface for beets."""
# Get the default subcommands.
from beets.ui.commands import default_commands
# Read defaults from config file.
config = ConfigParser.SafeConfigParser()
if configfh:
configpath = None
elif CONFIG_PATH_VAR in os.environ:
configpath = os.path.expanduser(os.environ[CONFIG_PATH_VAR])
else:
configpath = DEFAULT_CONFIG_FILE
if configpath:
configpath = util.syspath(configpath)
if os.path.exists(util.syspath(configpath)):
configfh = open(configpath)
else:
configfh = None
if configfh:
config.readfp(configfh)
# Add plugin paths.
plugpaths = config_val(config, 'beets', 'pluginpath', '')
for plugpath in plugpaths.split(':'):
sys.path.append(os.path.expanduser(plugpath))
# Load requested plugins.
plugnames = config_val(config, 'beets', 'plugins', '')
plugins.load_plugins(plugnames.split())
plugins.load_listeners()
plugins.send("pluginload")
plugins.configure(config)
# Construct the root parser.
commands = list(default_commands)
commands += plugins.commands()
parser = SubcommandsOptionParser(subcommands=commands)
parser.add_option('-l', '--library', dest='libpath',
help='library database file to use')
parser.add_option('-d', '--directory', dest='directory',
help="destination music directory")
parser.add_option('-p', '--pathformat', dest='path_format',
help="destination path format string")
parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
help='print debugging information')
# Parse the command-line!
options, subcommand, suboptions, subargs = parser.parse_args(args)
# Open library file.
libpath = options.libpath or \
config_val(config, 'beets', 'library', DEFAULT_LIBRARY)
directory = options.directory or \
config_val(config, 'beets', 'directory', DEFAULT_DIRECTORY)
legacy_path_format = config_val(config, 'beets', 'path_format', None)
if options.path_format:
# If given, -p overrides all path format settings
path_formats = {'default': options.path_format}
else:
if legacy_path_format:
# Old path formats override the default values.
path_formats = {'default': legacy_path_format}
else:
# If no legacy path format, use the defaults instead.
path_formats = DEFAULT_PATH_FORMATS
if config.has_section('paths'):
path_formats.update(config.items('paths'))
art_filename = \
config_val(config, 'beets', 'art_filename', DEFAULT_ART_FILENAME)
db_path = os.path.expanduser(libpath)
try:
lib = library.Library(db_path,
directory,
path_formats,
art_filename)
except sqlite3.OperationalError:
raise UserError("database file %s could not be opened" % db_path)
# Configure the logger.
log = logging.getLogger('beets')
if options.verbose:
log.setLevel(logging.DEBUG)
else:
log.setLevel(logging.INFO)
# Invoke the subcommand.
try:
subcommand.func(lib, config, suboptions, subargs)
except UserError, exc:
message = exc.args[0] if exc.args else None
subcommand.parser.error(message)
except IOError, exc:
if exc.errno == errno.EPIPE:
# "Broken pipe". End silently.
pass
else:
raise

722
lib/beets/ui/commands.py Executable file
View File

@@ -0,0 +1,722 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""This module provides the default commands for beets' command-line
interface.
"""
from __future__ import with_statement # Python 2.5
import logging
import sys
import os
import time
from lib.beets import ui
from lib.beets.ui import print_
from lib.beets import autotag
import lib.beets.autotag.art as beets.autotag.art
from lib.beets import plugins
from lib.beets import importer
from lib.beets.util import syspath, normpath
# Global logger.
log = logging.getLogger('beets')
# The list of default subcommands. This is populated with Subcommand
# objects that can be fed to a SubcommandsOptionParser.
default_commands = []
# import: Autotagger and importer.
DEFAULT_IMPORT_COPY = True
DEFAULT_IMPORT_WRITE = True
DEFAULT_IMPORT_DELETE = False
DEFAULT_IMPORT_AUTOT = True
DEFAULT_IMPORT_TIMID = False
DEFAULT_IMPORT_ART = True
DEFAULT_IMPORT_QUIET = False
DEFAULT_IMPORT_QUIET_FALLBACK = 'skip'
DEFAULT_IMPORT_RESUME = None # "ask"
DEFAULT_THREADED = True
DEFAULT_COLOR = True
VARIOUS_ARTISTS = u'Various Artists'
# Importer utilities and support.
def dist_string(dist, color):
"""Formats a distance (a float) as a similarity percentage string.
The string is colorized if color is True.
"""
out = '%.1f%%' % ((1 - dist) * 100)
if color:
if dist <= autotag.STRONG_REC_THRESH:
out = ui.colorize('green', out)
elif dist <= autotag.MEDIUM_REC_THRESH:
out = ui.colorize('yellow', out)
else:
out = ui.colorize('red', out)
return out
def show_change(cur_artist, cur_album, items, info, dist, color=True):
"""Print out a representation of the changes that will be made if
tags are changed from (cur_artist, cur_album, items) to info with
distance dist.
"""
def show_album(artist, album):
if artist:
print_(' %s - %s' % (artist, album))
elif album:
print_(' %s' % album)
else:
print_(' (unknown album)')
# Identify the album in question.
if cur_artist != info['artist'] or \
(cur_album != info['album'] and info['album'] != VARIOUS_ARTISTS):
artist_l, artist_r = cur_artist or '', info['artist']
album_l, album_r = cur_album or '', info['album']
if artist_r == VARIOUS_ARTISTS:
# Hide artists for VA releases.
artist_l, artist_r = u'', u''
if color:
artist_l, artist_r = ui.colordiff(artist_l, artist_r)
album_l, album_r = ui.colordiff(album_l, album_r)
print_("Correcting tags from:")
show_album(artist_l, album_l)
print_("To:")
show_album(artist_r, album_r)
else:
print_("Tagging: %s - %s" % (info['artist'], info['album']))
# Distance/similarity.
print_('(Similarity: %s)' % dist_string(dist, color))
# Tracks.
for i, (item, track_data) in enumerate(zip(items, info['tracks'])):
cur_track = str(item.track)
new_track = str(i+1)
cur_title = item.title
new_title = track_data['title']
# Possibly colorize changes.
if color:
cur_title, new_title = ui.colordiff(cur_title, new_title)
if cur_track != new_track:
cur_track = ui.colorize('red', cur_track)
new_track = ui.colorize('red', new_track)
if cur_title != new_title and cur_track != new_track:
print_(" * %s (%s) -> %s (%s)" % (
cur_title, cur_track, new_title, new_track
))
elif cur_title != new_title:
print_(" * %s -> %s" % (cur_title, new_title))
elif cur_track != new_track:
print_(" * %s (%s -> %s)" % (item.title, cur_track, new_track))
def show_item_change(item, info, dist, color):
"""Print out the change that would occur by tagging `item` with the
metadata from `info`.
"""
cur_artist, new_artist = item.artist, info['artist']
cur_title, new_title = item.title, info['title']
if cur_artist != new_artist or cur_title != new_title:
if color:
cur_artist, new_artist = ui.colordiff(cur_artist, new_artist)
cur_title, new_title = ui.colordiff(cur_title, new_title)
print_("Correcting track tags from:")
print_(" %s - %s" % (cur_artist, cur_title))
print_("To:")
print_(" %s - %s" % (new_artist, new_title))
else:
print_("Tagging track: %s - %s" % (cur_artist, cur_title))
print_('(Similarity: %s)' % dist_string(dist, color))
def should_resume(config, path):
return ui.input_yn("Import of the directory:\n%s"
"\nwas interrupted. Resume (Y/n)?" % path)
def _quiet_fall_back(config):
"""Show the user that the default action is being taken because
we're in quiet mode and the recommendation is not strong.
"""
if config.quiet_fallback == importer.action.SKIP:
print_('Skipping.')
elif config.quiet_fallback == importer.action.ASIS:
print_('Importing as-is.')
else:
assert(False)
return config.quiet_fallback
def choose_candidate(candidates, singleton, rec, color, timid,
cur_artist=None, cur_album=None, item=None):
"""Given a sorted list of candidates, ask the user for a selection
of which candidate to use. Applies to both full albums and
singletons (tracks). For albums, the candidates are `(dist, items,
info)` triples and `cur_artist` and `cur_album` must be provided.
For singletons, the candidates are `(dist, info)` pairs and `item`
must be provided.
Returns the result of the choice, which may SKIP, ASIS, TRACKS, or
MANUAL or a candidate. For albums, a candidate is a `(info, items)`
pair; for items, it is just an `info` dictionary.
"""
# Sanity check.
if singleton:
assert item is not None
else:
assert cur_artist is not None
assert cur_album is not None
# Zero candidates.
if not candidates:
print_("No match found.")
if singleton:
opts = ('Use as-is', 'Skip', 'Enter search', 'enter Id',
'aBort')
else:
opts = ('Use as-is', 'as Tracks', 'Skip', 'Enter search',
'enter Id', 'aBort')
sel = ui.input_options(opts, color=color)
if sel == 'u':
return importer.action.ASIS
elif sel == 't':
assert not singleton
return importer.action.TRACKS
elif sel == 'e':
return importer.action.MANUAL
elif sel == 's':
return importer.action.SKIP
elif sel == 'b':
raise importer.ImportAbort()
elif sel == 'i':
return importer.action.MANUAL_ID
else:
assert False
# Is the change good enough?
bypass_candidates = False
if rec != autotag.RECOMMEND_NONE:
if singleton:
dist, info = candidates[0]
else:
dist, items, info = candidates[0]
bypass_candidates = True
while True:
# Display and choose from candidates.
if not bypass_candidates:
# Display list of candidates.
if singleton:
print_('Finding tags for track "%s - %s".' %
(item.artist, item.title))
print_('Candidates:')
for i, (dist, info) in enumerate(candidates):
print_('%i. %s - %s (%s)' % (i+1, info['artist'],
info['title'], dist_string(dist, color)))
else:
print_('Finding tags for album "%s - %s".' %
(cur_artist, cur_album))
print_('Candidates:')
for i, (dist, items, info) in enumerate(candidates):
print_('%i. %s - %s (%s)' % (i+1, info['artist'],
info['album'], dist_string(dist, color)))
# Ask the user for a choice.
if singleton:
opts = ('Skip', 'Use as-is', 'Enter search', 'enter Id',
'aBort')
else:
opts = ('Skip', 'Use as-is', 'as Tracks', 'Enter search',
'enter Id', 'aBort')
sel = ui.input_options(opts, numrange=(1, len(candidates)),
color=color)
if sel == 's':
return importer.action.SKIP
elif sel == 'u':
return importer.action.ASIS
elif sel == 'e':
return importer.action.MANUAL
elif sel == 't':
assert not singleton
return importer.action.TRACKS
elif sel == 'b':
raise importer.ImportAbort()
elif sel == 'i':
return importer.action.MANUAL_ID
else: # Numerical selection.
if singleton:
dist, info = candidates[sel-1]
else:
dist, items, info = candidates[sel-1]
bypass_candidates = False
# Show what we're about to do.
if singleton:
show_item_change(item, info, dist, color)
else:
show_change(cur_artist, cur_album, items, info, dist, color)
# Exact match => tag automatically if we're not in timid mode.
if rec == autotag.RECOMMEND_STRONG and not timid:
if singleton:
return info
else:
return info, items
# Ask for confirmation.
if singleton:
opts = ('Apply', 'More candidates', 'Skip', 'Use as-is',
'Enter search', 'enter Id', 'aBort')
else:
opts = ('Apply', 'More candidates', 'Skip', 'Use as-is',
'as Tracks', 'Enter search', 'enter Id', 'aBort')
sel = ui.input_options(opts, color=color)
if sel == 'a':
if singleton:
return info
else:
return info, items
elif sel == 'm':
pass
elif sel == 's':
return importer.action.SKIP
elif sel == 'u':
return importer.action.ASIS
elif sel == 't':
assert not singleton
return importer.action.TRACKS
elif sel == 'e':
return importer.action.MANUAL
elif sel == 'b':
raise importer.ImportAbort()
elif sel == 'i':
return importer.action.MANUAL_ID
def manual_search(singleton):
"""Input either an artist and album (for full albums) or artist and
track name (for singletons) for manual search.
"""
artist = raw_input('Artist: ').decode(sys.stdin.encoding)
name = raw_input('Track: ' if singleton else 'Album: ') \
.decode(sys.stdin.encoding)
return artist.strip(), name.strip()
def manual_id(singleton):
"""Input a MusicBrainz ID, either for an album or a track.
"""
prompt = 'Enter MusicBrainz %s ID: ' % ('track' if singleton else 'album')
return raw_input(prompt).decode(sys.stdin.encoding).strip()
def choose_match(task, config):
"""Given an initial autotagging of items, go through an interactive
dance with the user to ask for a choice of metadata. Returns an
(info, items) pair, ASIS, or SKIP.
"""
# Show what we're tagging.
print_()
print_(task.path)
if config.quiet:
# No input; just make a decision.
if task.rec == autotag.RECOMMEND_STRONG:
dist, items, info = task.candidates[0]
show_change(task.cur_artist, task.cur_album, items, info, dist,
config.color)
return info, items
else:
return _quiet_fall_back(config)
# Loop until we have a choice.
candidates, rec = task.candidates, task.rec
while True:
# Ask for a choice from the user.
choice = choose_candidate(candidates, False, rec, config.color,
config.timid, task.cur_artist,
task.cur_album)
# Choose which tags to use.
if choice in (importer.action.SKIP, importer.action.ASIS,
importer.action.TRACKS):
# Pass selection to main control flow.
return choice
elif choice is importer.action.MANUAL:
# Try again with manual search terms.
search_artist, search_album = manual_search(False)
try:
_, _, candidates, rec = \
autotag.tag_album(task.items, config.timid, search_artist,
search_album)
except autotag.AutotagError:
candidates, rec = None, None
elif choice is importer.action.MANUAL_ID:
# Try a manually-entered ID.
search_id = manual_id(False)
try:
_, _, candidates, rec = \
autotag.tag_album(task.items, config.timid,
search_id=search_id)
except autotag.AutotagError:
candidates, rec = None, None
else:
# We have a candidate! Finish tagging. Here, choice is
# an (info, items) pair as desired.
assert not isinstance(choice, importer.action)
return choice
def choose_item(task, config):
"""Ask the user for a choice about tagging a single item. Returns
either an action constant or a track info dictionary.
"""
print_()
print_(task.item.path)
candidates, rec = task.item_match
if config.quiet:
# Quiet mode; make a decision.
if rec == autotag.RECOMMEND_STRONG:
dist, track_info = candidates[0]
show_item_change(task.item, track_info, dist, config.color)
return track_info
else:
return _quiet_fall_back(config)
while True:
# Ask for a choice.
choice = choose_candidate(candidates, True, rec, config.color,
config.timid, item=task.item)
if choice in (importer.action.SKIP, importer.action.ASIS):
return choice
elif choice == importer.action.TRACKS:
assert False # TRACKS is only legal for albums.
elif choice == importer.action.MANUAL:
# Continue in the loop with a new set of candidates.
search_artist, search_title = manual_search(True)
candidates, rec = autotag.tag_item(task.item, config.timid,
search_artist, search_title)
elif choice == importer.action.MANUAL_ID:
# Ask for a track ID.
search_id = manual_id(True)
candidates, rec = autotag.tag_item(task.item, config.timid,
search_id=search_id)
else:
# Chose a candidate.
assert not isinstance(choice, importer.action)
return choice
# The import command.
def import_files(lib, paths, copy, write, autot, logpath, art, threaded,
color, delete, quiet, resume, quiet_fallback, singletons,
timid):
"""Import the files in the given list of paths, tagging each leaf
directory as an album. If copy, then the files are copied into
the library folder. If write, then new metadata is written to the
files themselves. If not autot, then just import the files
without attempting to tag. If logpath is provided, then untaggable
albums will be logged there. If art, then attempt to download
cover art for each album. If threaded, then accelerate autotagging
imports by running them in multiple threads. If color, then
ANSI-colorize some terminal output. If delete, then old files are
deleted when they are copied. If quiet, then the user is
never prompted for input; instead, the tagger just skips anything
it is not confident about. resume indicates whether interrupted
imports can be resumed and is either a boolean or None.
quiet_fallback should be either ASIS or SKIP and indicates what
should happen in quiet mode when the recommendation is not strong.
"""
# Check the user-specified directories.
for path in paths:
if not singletons and not os.path.isdir(syspath(path)):
raise ui.UserError('not a directory: ' + path)
elif singletons and not os.path.exists(syspath(path)):
raise ui.UserError('no such file: ' + path)
# Check parameter consistency.
if quiet and timid:
raise ui.UserError("can't be both quiet and timid")
# Open the log.
if logpath:
logpath = normpath(logpath)
logfile = open(syspath(logpath), 'a')
print >>logfile, 'import started', time.asctime()
else:
logfile = None
# Never ask for input in quiet mode.
if resume is None and quiet:
resume = False
# Perform the import.
importer.run_import(
lib = lib,
paths = paths,
resume = resume,
logfile = logfile,
color = color,
quiet = quiet,
quiet_fallback = quiet_fallback,
copy = copy,
write = write,
art = art,
delete = delete,
threaded = threaded,
autot = autot,
choose_match_func = choose_match,
should_resume_func = should_resume,
singletons = singletons,
timid = timid,
choose_item_func = choose_item,
)
# If we were logging, close the file.
if logfile:
print >>logfile, ''
logfile.close()
# Emit event.
plugins.send('import', lib=lib, paths=paths)
import_cmd = ui.Subcommand('import', help='import new music',
aliases=('imp', 'im'))
import_cmd.parser.add_option('-c', '--copy', action='store_true',
default=None, help="copy tracks into library directory (default)")
import_cmd.parser.add_option('-C', '--nocopy', action='store_false',
dest='copy', help="don't copy tracks (opposite of -c)")
import_cmd.parser.add_option('-w', '--write', action='store_true',
default=None, help="write new metadata to files' tags (default)")
import_cmd.parser.add_option('-W', '--nowrite', action='store_false',
dest='write', help="don't write metadata (opposite of -w)")
import_cmd.parser.add_option('-a', '--autotag', action='store_true',
dest='autotag', help="infer tags for imported files (default)")
import_cmd.parser.add_option('-A', '--noautotag', action='store_false',
dest='autotag',
help="don't infer tags for imported files (opposite of -a)")
import_cmd.parser.add_option('-p', '--resume', action='store_true',
default=None, help="resume importing if interrupted")
import_cmd.parser.add_option('-P', '--noresume', action='store_false',
dest='resume', help="do not try to resume importing")
import_cmd.parser.add_option('-r', '--art', action='store_true',
default=None, help="try to download album art")
import_cmd.parser.add_option('-R', '--noart', action='store_false',
dest='art', help="don't album art (opposite of -r)")
import_cmd.parser.add_option('-q', '--quiet', action='store_true',
dest='quiet', help="never prompt for input: skip albums instead")
import_cmd.parser.add_option('-l', '--log', dest='logpath',
help='file to log untaggable albums for later review')
import_cmd.parser.add_option('-s', '--singletons', action='store_true',
help='import individual tracks instead of full albums')
import_cmd.parser.add_option('-t', '--timid', dest='timid',
action='store_true', help='always confirm all actions')
def import_func(lib, config, opts, args):
copy = opts.copy if opts.copy is not None else \
ui.config_val(config, 'beets', 'import_copy',
DEFAULT_IMPORT_COPY, bool)
write = opts.write if opts.write is not None else \
ui.config_val(config, 'beets', 'import_write',
DEFAULT_IMPORT_WRITE, bool)
delete = ui.config_val(config, 'beets', 'import_delete',
DEFAULT_IMPORT_DELETE, bool)
autot = opts.autotag if opts.autotag is not None else DEFAULT_IMPORT_AUTOT
art = opts.art if opts.art is not None else \
ui.config_val(config, 'beets', 'import_art',
DEFAULT_IMPORT_ART, bool)
threaded = ui.config_val(config, 'beets', 'threaded',
DEFAULT_THREADED, bool)
color = ui.config_val(config, 'beets', 'color', DEFAULT_COLOR, bool)
quiet = opts.quiet if opts.quiet is not None else DEFAULT_IMPORT_QUIET
quiet_fallback_str = ui.config_val(config, 'beets', 'import_quiet_fallback',
DEFAULT_IMPORT_QUIET_FALLBACK)
singletons = opts.singletons
timid = opts.timid if opts.timid is not None else \
ui.config_val(config, 'beets', 'import_timid',
DEFAULT_IMPORT_TIMID, bool)
logpath = opts.logpath if opts.logpath is not None else \
ui.config_val(config, 'beets', 'import_log', None)
# Resume has three options: yes, no, and "ask" (None).
resume = opts.resume if opts.resume is not None else \
ui.config_val(config, 'beets', 'import_resume', DEFAULT_IMPORT_RESUME)
if isinstance(resume, basestring):
if resume.lower() in ('yes', 'true', 't', 'y', '1'):
resume = True
elif resume.lower() in ('no', 'false', 'f', 'n', '0'):
resume = False
else:
resume = None
if quiet_fallback_str == 'asis':
quiet_fallback = importer.action.ASIS
else:
quiet_fallback = importer.action.SKIP
import_files(lib, args, copy, write, autot, logpath, art, threaded,
color, delete, quiet, resume, quiet_fallback, singletons,
timid)
import_cmd.func = import_func
default_commands.append(import_cmd)
# list: Query and show library contents.
def list_items(lib, query, album, path):
"""Print out items in lib matching query. If album, then search for
albums instead of single items. If path, print the matched objects'
paths instead of human-readable information about them.
"""
if album:
for album in lib.albums(query):
if path:
print_(album.item_dir())
else:
print_(album.albumartist + u' - ' + album.album)
else:
for item in lib.items(query):
if path:
print_(item.path)
else:
print_(item.artist + u' - ' + item.album + u' - ' + item.title)
list_cmd = ui.Subcommand('list', help='query the library', aliases=('ls',))
list_cmd.parser.add_option('-a', '--album', action='store_true',
help='show matching albums instead of tracks')
list_cmd.parser.add_option('-p', '--path', action='store_true',
help='print paths for matched items or albums')
def list_func(lib, config, opts, args):
list_items(lib, ui.make_query(args), opts.album, opts.path)
list_cmd.func = list_func
default_commands.append(list_cmd)
# remove: Remove items from library, delete files.
def remove_items(lib, query, album, delete=False):
"""Remove items matching query from lib. If album, then match and
remove whole albums. If delete, also remove files from disk.
"""
# Get the matching items.
if album:
albums = list(lib.albums(query))
items = []
for al in albums:
items += al.items()
else:
items = list(lib.items(query))
if not items:
print_('No matching items found.')
return
# Show all the items.
for item in items:
print_(item.artist + ' - ' + item.album + ' - ' + item.title)
# Confirm with user.
print_()
if delete:
prompt = 'Really DELETE %i files (y/n)?' % len(items)
else:
prompt = 'Really remove %i items from the library (y/n)?' % \
len(items)
if not ui.input_yn(prompt, True):
return
# Remove (and possibly delete) items.
if album:
for al in albums:
al.remove(delete)
else:
for item in items:
lib.remove(item, delete)
lib.save()
remove_cmd = ui.Subcommand('remove',
help='remove matching items from the library', aliases=('rm',))
remove_cmd.parser.add_option("-d", "--delete", action="store_true",
help="also remove files from disk")
remove_cmd.parser.add_option('-a', '--album', action='store_true',
help='match albums instead of tracks')
def remove_func(lib, config, opts, args):
remove_items(lib, ui.make_query(args), opts.album, opts.delete)
remove_cmd.func = remove_func
default_commands.append(remove_cmd)
# stats: Show library/query statistics.
def show_stats(lib, query):
"""Shows some statistics about the matched items."""
items = lib.items(query)
total_size = 0
total_time = 0.0
total_items = 0
artists = set()
albums = set()
for item in items:
#fixme This is approximate, so people might complain that
# this total size doesn't match "du -sh". Could fix this
# by putting total file size in the database.
total_size += int(item.length * item.bitrate / 8)
total_time += item.length
total_items += 1
artists.add(item.artist)
albums.add(item.album)
print_("""Tracks: %i
Total time: %s
Total size: %s
Artists: %i
Albums: %i""" % (
total_items,
ui.human_seconds(total_time),
ui.human_bytes(total_size),
len(artists), len(albums)
))
stats_cmd = ui.Subcommand('stats',
help='show statistics about the library or a query')
def stats_func(lib, config, opts, args):
show_stats(lib, ui.make_query(args))
stats_cmd.func = stats_func
default_commands.append(stats_cmd)
# version: Show current beets version.
def show_version(lib, config, opts, args):
print 'beets version %s' % beets.__version__
# Show plugins.
names = []
for plugin in plugins.find_plugins():
modname = plugin.__module__
names.append(modname.split('.')[-1])
if names:
print 'plugins:', ', '.join(names)
else:
print 'no plugins loaded'
version_cmd = ui.Subcommand('version',
help='output version information')
version_cmd.func = show_version
default_commands.append(version_cmd)

255
lib/beets/util/__init__.py Normal file
View File

@@ -0,0 +1,255 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Miscellaneous utility functions."""
import os
import sys
import re
MAX_FILENAME_LENGTH = 200
def normpath(path):
"""Provide the canonical form of the path suitable for storing in
the database.
"""
return os.path.normpath(os.path.abspath(os.path.expanduser(path)))
def ancestry(path, pathmod=None):
"""Return a list consisting of path's parent directory, its
grandparent, and so on. For instance:
>>> ancestry('/a/b/c')
['/', '/a', '/a/b']
"""
pathmod = pathmod or os.path
out = []
last_path = None
while path:
path = pathmod.dirname(path)
if path == last_path:
break
last_path = path
if path: # don't yield ''
out.insert(0, path)
return out
def sorted_walk(path):
"""Like os.walk, but yields things in sorted, breadth-first
order.
"""
# Make sure the path isn't a Unicode string.
path = bytestring_path(path)
# Get all the directories and files at this level.
dirs = []
files = []
for base in os.listdir(path):
cur = os.path.join(path, base)
if os.path.isdir(syspath(cur)):
dirs.append(base)
else:
files.append(base)
# Sort lists and yield the current level.
dirs.sort()
files.sort()
yield (path, dirs, files)
# Recurse into directories.
for base in dirs:
cur = os.path.join(path, base)
# yield from _sorted_walk(cur)
for res in sorted_walk(cur):
yield res
def mkdirall(path):
"""Make all the enclosing directories of path (like mkdir -p on the
parent).
"""
for ancestor in ancestry(path):
if not os.path.isdir(syspath(ancestor)):
os.mkdir(syspath(ancestor))
def prune_dirs(path, root):
"""If path is an empty directory, then remove it. Recursively
remove path's ancestry up to root (which is never removed) where
there are empty directories. If path is not contained in root, then
nothing is removed.
"""
path = normpath(path)
root = normpath(root)
ancestors = ancestry(path)
if root in ancestors:
# Only remove directories below the root.
ancestors = ancestors[ancestors.index(root)+1:]
# Traverse upward from path.
ancestors.append(path)
ancestors.reverse()
for directory in ancestors:
try:
os.rmdir(syspath(directory))
except OSError:
break
def components(path, pathmod=None):
"""Return a list of the path components in path. For instance:
>>> components('/a/b/c')
['a', 'b', 'c']
"""
pathmod = pathmod or os.path
comps = []
ances = ancestry(path, pathmod)
for anc in ances:
comp = pathmod.basename(anc)
if comp:
comps.append(comp)
else: # root
comps.append(anc)
last = pathmod.basename(path)
if last:
comps.append(last)
return comps
def bytestring_path(path):
"""Given a path, which is either a str or a unicode, returns a str
path (ensuring that we never deal with Unicode pathnames).
"""
# Pass through bytestrings.
if isinstance(path, str):
return path
# Try to encode with default encodings, but fall back to UTF8.
encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
try:
return path.encode(encoding)
except UnicodeError:
return path.encode('utf8')
def syspath(path, pathmod=None):
"""Convert a path for use by the operating system. In particular,
paths on Windows must receive a magic prefix and must be converted
to unicode before they are sent to the OS.
"""
pathmod = pathmod or os.path
windows = pathmod.__name__ == 'ntpath'
# Don't do anything if we're not on windows
if not windows:
return path
if not isinstance(path, unicode):
# Try to decode with default encodings, but fall back to UTF8.
encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
try:
path = path.decode(encoding, 'replace')
except UnicodeError:
path = path.decode('utf8', 'replace')
# Add the magic prefix if it isn't already there
if not path.startswith(u'\\\\?\\'):
path = u'\\\\?\\' + path
return path
def soft_remove(path):
"""Remove the file if it exists."""
path = syspath(path)
if os.path.exists(path):
os.remove(path)
# Note: POSIX actually supports \ and : -- I just think they're
# a pain. And ? has caused problems for some.
CHAR_REPLACE = [
(re.compile(r'[\\/\?]|^\.'), '_'),
(re.compile(r':'), '-'),
]
CHAR_REPLACE_WINDOWS = re.compile('["\*<>\|]|^\.|\.$| +$'), '_'
def sanitize_path(path, pathmod=None):
"""Takes a path and makes sure that it is legal. Returns a new path.
Only works with fragments; won't work reliably on Windows when a
path begins with a drive letter. Path separators (including altsep!)
should already be cleaned from the path components.
"""
pathmod = pathmod or os.path
windows = pathmod.__name__ == 'ntpath'
comps = components(path, pathmod)
if not comps:
return ''
for i, comp in enumerate(comps):
# Replace special characters.
for regex, repl in CHAR_REPLACE:
comp = regex.sub(repl, comp)
if windows:
regex, repl = CHAR_REPLACE_WINDOWS
comp = regex.sub(repl, comp)
# Truncate each component.
comp = comp[:MAX_FILENAME_LENGTH]
comps[i] = comp
return pathmod.join(*comps)
def sanitize_for_path(value, pathmod, key=None):
"""Sanitize the value for inclusion in a path: replace separators
with _, etc. Doesn't guarantee that the whole path will be valid;
you should still call sanitize_path on the complete path.
"""
if isinstance(value, basestring):
for sep in (pathmod.sep, pathmod.altsep):
if sep:
value = value.replace(sep, u'_')
elif key in ('track', 'tracktotal', 'disc', 'disctotal'):
# pad with zeros
value = u'%02i' % value
elif key == 'bitrate':
# Bitrate gets formatted as kbps.
value = u'%ikbps' % (value / 1000)
else:
value = unicode(value)
return value
def str2bool(value):
"""Returns a boolean reflecting a human-entered string."""
if value.lower() in ('yes', '1', 'true', 't', 'y'):
return True
else:
return False
def levenshtein(s1, s2):
"""A nice DP edit distance implementation from Wikibooks:
http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/
Levenshtein_distance#Python
"""
if len(s1) < len(s2):
return levenshtein(s2, s1)
if not s1:
return len(s2)
previous_row = xrange(len(s2) + 1)
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]

View File

@@ -0,0 +1,178 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""A metaclass for enumerated types that really are types.
You can create enumerations with `enum(values, [name])` and they work
how you would expect them to.
>>> from enumeration import enum
>>> Direction = enum('north east south west', name='Direction')
>>> Direction.west
Direction.west
>>> Direction.west == Direction.west
True
>>> Direction.west == Direction.east
False
>>> isinstance(Direction.west, Direction)
True
>>> Direction[3]
Direction.west
>>> Direction['west']
Direction.west
>>> Direction.west.name
'west'
>>> Direction.north < Direction.west
True
Enumerations are classes; their instances represent the possible values
of the enumeration. Because Python classes must have names, you may
provide a `name` parameter to `enum`; if you don't, a meaningless one
will be chosen for you.
"""
import random
class Enumeration(type):
"""A metaclass whose classes are enumerations.
The `values` attribute of the class is used to populate the
enumeration. Values may either be a list of enumerated names or a
string containing a space-separated list of names. When the class
is created, it is instantiated for each name value in `values`.
Each such instance is the name of the enumerated item as the sole
argument.
The `Enumerated` class is a good choice for a superclass.
"""
def __init__(cls, name, bases, dic):
super(Enumeration, cls).__init__(name, bases, dic)
if 'values' not in dic:
# Do nothing if no values are provided (i.e., with
# Enumerated itself).
return
# May be called with a single string, in which case we split on
# whitespace for convenience.
values = dic['values']
if isinstance(values, basestring):
values = values.split()
# Create the Enumerated instances for each value. We have to use
# super's __setattr__ here because we disallow setattr below.
super(Enumeration, cls).__setattr__('_items_dict', {})
super(Enumeration, cls).__setattr__('_items_list', [])
for value in values:
item = cls(value, len(cls._items_list))
cls._items_dict[value] = item
cls._items_list.append(item)
def __getattr__(cls, key):
try:
return cls._items_dict[key]
except KeyError:
raise AttributeError("enumeration '" + cls.__name__ +
"' has no item '" + key + "'")
def __setattr__(cls, key, val):
raise TypeError("enumerations do not support attribute assignment")
def __getitem__(cls, key):
if isinstance(key, int):
return cls._items_list[key]
else:
return getattr(cls, key)
def __len__(cls):
return len(cls._items_list)
def __iter__(cls):
return iter(cls._items_list)
def __nonzero__(cls):
# Ensures that __len__ doesn't get called before __init__ by
# pydoc.
return True
class Enumerated(object):
"""An item in an enumeration.
Contains instance methods inherited by enumerated objects. The
metaclass is preset to `Enumeration` for your convenience.
Instance attributes:
name -- The name of the item.
index -- The index of the item in its enumeration.
>>> from enumeration import Enumerated
>>> class Garment(Enumerated):
... values = 'hat glove belt poncho lederhosen suspenders'
... def wear(self):
... print 'now wearing a ' + self.name
...
>>> Garment.poncho.wear()
now wearing a poncho
"""
__metaclass__ = Enumeration
def __init__(self, name, index):
self.name = name
self.index = index
def __str__(self):
return type(self).__name__ + '.' + self.name
def __repr__(self):
return str(self)
def __cmp__(self, other):
if type(self) is type(other):
# Note that we're assuming that the items are direct
# instances of the same Enumeration (i.e., no fancy
# subclassing), which is probably okay.
return cmp(self.index, other.index)
else:
return NotImplemented
def enum(*values, **kwargs):
"""Shorthand for creating a new Enumeration class.
Call with enumeration values as a list, a space-delimited string, or
just an argument list. To give the class a name, pass it as the
`name` keyword argument. Otherwise, a name will be chosen for you.
The following are all equivalent:
enum('pinkie ring middle index thumb')
enum('pinkie', 'ring', 'middle', 'index', 'thumb')
enum(['pinkie', 'ring', 'middle', 'index', 'thumb'])
"""
if ('name' not in kwargs) or kwargs['name'] is None:
# Create a probably-unique name. It doesn't really have to be
# unique, but getting distinct names each time helps with
# identification in debugging.
name = 'Enumeration' + hex(random.randint(0,0xfffffff))[2:].upper()
else:
name = kwargs['name']
if len(values) == 1:
# If there's only one value, we have a couple of alternate calling
# styles.
if isinstance(values[0], basestring) or hasattr(values[0], '__iter__'):
values = values[0]
return type(name, (Enumerated,), {'values': values})

442
lib/beets/util/pipeline.py Normal file
View File

@@ -0,0 +1,442 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Simple but robust implementation of generator/coroutine-based
pipelines in Python. The pipelines may be run either sequentially
(single-threaded) or in parallel (one thread per pipeline stage).
This implementation supports pipeline bubbles (indications that the
processing for a certain item should abort). To use them, yield the
BUBBLE constant from any stage coroutine except the last.
In the parallel case, the implementation transparently handles thread
shutdown when the processing is complete and when a stage raises an
exception. KeyboardInterrupts (^C) are also handled.
When running a parallel pipeline, it is also possible to use
multiple coroutines for the same pipeline stage; this lets you speed
up a bottleneck stage by dividing its work among multiple threads.
To do so, pass an iterable of coroutines to the Pipeline constructor
in place of any single coroutine.
"""
from __future__ import with_statement # for Python 2.5
import Queue
from threading import Thread, Lock
import sys
import types
BUBBLE = '__PIPELINE_BUBBLE__'
POISON = '__PIPELINE_POISON__'
DEFAULT_QUEUE_SIZE = 16
def _invalidate_queue(q, val=None, sync=True):
"""Breaks a Queue such that it never blocks, always has size 1,
and has no maximum size. get()ing from the queue returns `val`,
which defaults to None. `sync` controls whether a lock is
required (because it's not reentrant!).
"""
def _qsize(len=len):
return 1
def _put(item):
pass
def _get():
return val
if sync:
q.mutex.acquire()
try:
q.maxsize = 0
q._qsize = _qsize
q._put = _put
q._get = _get
q.not_empty.notifyAll()
q.not_full.notifyAll()
finally:
if sync:
q.mutex.release()
class CountedQueue(Queue.Queue):
"""A queue that keeps track of the number of threads that are
still feeding into it. The queue is poisoned when all threads are
finished with the queue.
"""
def __init__(self, maxsize=0):
Queue.Queue.__init__(self, maxsize)
self.nthreads = 0
self.poisoned = False
def acquire(self):
"""Indicate that a thread will start putting into this queue.
Should not be called after the queue is already poisoned.
"""
with self.mutex:
assert not self.poisoned
assert self.nthreads >= 0
self.nthreads += 1
def release(self):
"""Indicate that a thread that was putting into this queue has
exited. If this is the last thread using the queue, the queue
is poisoned.
"""
with self.mutex:
self.nthreads -= 1
assert self.nthreads >= 0
if self.nthreads == 0:
# All threads are done adding to this queue. Poison it
# when it becomes empty.
self.poisoned = True
# Replacement _get invalidates when no items remain.
_old_get = self._get
def _get():
out = _old_get()
if not self.queue:
_invalidate_queue(self, POISON, False)
return out
if self.queue:
# Items remain.
self._get = _get
else:
# No items. Invalidate immediately.
_invalidate_queue(self, POISON, False)
class MultiMessage(object):
"""A message yielded by a pipeline stage encapsulating multiple
values to be sent to the next stage.
"""
def __init__(self, messages):
self.messages = messages
def multiple(messages):
"""Yield multiple([message, ..]) from a pipeline stage to send
multiple values to the next pipeline stage.
"""
return MultiMessage(messages)
def _allmsgs(obj):
"""Returns a list of all the messages encapsulated in obj. If obj
is a MultiMessage, returns its enclosed messages. If obj is BUBBLE,
returns an empty list. Otherwise, returns a list containing obj.
"""
if isinstance(obj, MultiMessage):
return obj.messages
elif obj == BUBBLE:
return []
else:
return [obj]
class PipelineThread(Thread):
"""Abstract base class for pipeline-stage threads."""
def __init__(self, all_threads):
super(PipelineThread, self).__init__()
self.abort_lock = Lock()
self.abort_flag = False
self.all_threads = all_threads
self.exc_info = None
def abort(self):
"""Shut down the thread at the next chance possible.
"""
with self.abort_lock:
self.abort_flag = True
# Ensure that we are not blocking on a queue read or write.
if hasattr(self, 'in_queue'):
_invalidate_queue(self.in_queue)
if hasattr(self, 'out_queue'):
_invalidate_queue(self.out_queue)
def abort_all(self, exc_info):
"""Abort all other threads in the system for an exception.
"""
self.exc_info = exc_info
for thread in self.all_threads:
thread.abort()
class FirstPipelineThread(PipelineThread):
"""The thread running the first stage in a parallel pipeline setup.
The coroutine should just be a generator.
"""
def __init__(self, coro, out_queue, all_threads):
super(FirstPipelineThread, self).__init__(all_threads)
self.coro = coro
self.out_queue = out_queue
self.out_queue.acquire()
self.abort_lock = Lock()
self.abort_flag = False
def run(self):
try:
while True:
with self.abort_lock:
if self.abort_flag:
return
# Get the value from the generator.
try:
msg = self.coro.next()
except StopIteration:
break
# Send messages to the next stage.
for msg in _allmsgs(msg):
with self.abort_lock:
if self.abort_flag:
return
self.out_queue.put(msg)
except:
self.abort_all(sys.exc_info())
return
# Generator finished; shut down the pipeline.
self.out_queue.release()
class MiddlePipelineThread(PipelineThread):
"""A thread running any stage in the pipeline except the first or
last.
"""
def __init__(self, coro, in_queue, out_queue, all_threads):
super(MiddlePipelineThread, self).__init__(all_threads)
self.coro = coro
self.in_queue = in_queue
self.out_queue = out_queue
self.out_queue.acquire()
def run(self):
try:
# Prime the coroutine.
self.coro.next()
while True:
with self.abort_lock:
if self.abort_flag:
return
# Get the message from the previous stage.
msg = self.in_queue.get()
if msg is POISON:
break
with self.abort_lock:
if self.abort_flag:
return
# Invoke the current stage.
out = self.coro.send(msg)
# Send messages to next stage.
for msg in _allmsgs(out):
with self.abort_lock:
if self.abort_flag:
return
self.out_queue.put(msg)
except:
self.abort_all(sys.exc_info())
return
# Pipeline is shutting down normally.
self.out_queue.release()
class LastPipelineThread(PipelineThread):
"""A thread running the last stage in a pipeline. The coroutine
should yield nothing.
"""
def __init__(self, coro, in_queue, all_threads):
super(LastPipelineThread, self).__init__(all_threads)
self.coro = coro
self.in_queue = in_queue
def run(self):
# Prime the coroutine.
self.coro.next()
try:
while True:
with self.abort_lock:
if self.abort_flag:
return
# Get the message from the previous stage.
msg = self.in_queue.get()
if msg is POISON:
break
with self.abort_lock:
if self.abort_flag:
return
# Send to consumer.
self.coro.send(msg)
except:
self.abort_all(sys.exc_info())
return
class Pipeline(object):
"""Represents a staged pattern of work. Each stage in the pipeline
is a coroutine that receives messages from the previous stage and
yields messages to be sent to the next stage.
"""
def __init__(self, stages):
"""Makes a new pipeline from a list of coroutines. There must
be at least two stages.
"""
if len(stages) < 2:
raise ValueError('pipeline must have at least two stages')
self.stages = []
for stage in stages:
if isinstance(stage, types.GeneratorType):
# Default to one thread per stage.
self.stages.append((stage,))
else:
self.stages.append(stage)
def run_sequential(self):
"""Run the pipeline sequentially in the current thread. The
stages are run one after the other. Only the first coroutine
in each stage is used.
"""
coros = [stage[0] for stage in self.stages]
# "Prime" the coroutines.
for coro in coros[1:]:
coro.next()
# Begin the pipeline.
for out in coros[0]:
msgs = _allmsgs(out)
for coro in coros[1:]:
next_msgs = []
for msg in msgs:
out = coro.send(msg)
next_msgs.extend(_allmsgs(out))
msgs = next_msgs
def run_parallel(self, queue_size=DEFAULT_QUEUE_SIZE):
"""Run the pipeline in parallel using one thread per stage. The
messages between the stages are stored in queues of the given
size.
"""
queues = [CountedQueue(queue_size) for i in range(len(self.stages)-1)]
threads = []
# Set up first stage.
for coro in self.stages[0]:
threads.append(FirstPipelineThread(coro, queues[0], threads))
# Middle stages.
for i in range(1, len(self.stages)-1):
for coro in self.stages[i]:
threads.append(MiddlePipelineThread(
coro, queues[i-1], queues[i], threads
))
# Last stage.
for coro in self.stages[-1]:
threads.append(
LastPipelineThread(coro, queues[-1], threads)
)
# Start threads.
for thread in threads:
thread.start()
# Wait for termination. The final thread lasts the longest.
try:
# Using a timeout allows us to receive KeyboardInterrupt
# exceptions during the join().
while threads[-1].isAlive():
threads[-1].join(1)
except:
# Stop all the threads immediately.
for thread in threads:
thread.abort()
raise
finally:
# Make completely sure that all the threads have finished
# before we return. They should already be either finished,
# in normal operation, or aborted, in case of an exception.
for thread in threads[:-1]:
thread.join()
for thread in threads:
exc_info = thread.exc_info
if exc_info:
# Make the exception appear as it was raised originally.
raise exc_info[0], exc_info[1], exc_info[2]
# Smoke test.
if __name__ == '__main__':
import time
# Test a normally-terminating pipeline both in sequence and
# in parallel.
def produce():
for i in range(5):
print 'generating %i' % i
time.sleep(1)
yield i
def work():
num = yield
while True:
print 'processing %i' % num
time.sleep(2)
num = yield num*2
def consume():
while True:
num = yield
time.sleep(1)
print 'received %i' % num
ts_start = time.time()
Pipeline([produce(), work(), consume()]).run_sequential()
ts_seq = time.time()
Pipeline([produce(), work(), consume()]).run_parallel()
ts_par = time.time()
Pipeline([produce(), (work(), work()), consume()]).run_parallel()
ts_end = time.time()
print 'Sequential time:', ts_seq - ts_start
print 'Parallel time:', ts_par - ts_seq
print 'Multiply-parallel time:', ts_end - ts_par
print
# Test a pipeline that raises an exception.
def exc_produce():
for i in range(10):
print 'generating %i' % i
time.sleep(1)
yield i
def exc_work():
num = yield
while True:
print 'processing %i' % num
time.sleep(3)
if num == 3:
raise Exception()
num = yield num * 2
def exc_consume():
while True:
num = yield
#if num == 4:
# raise Exception()
print 'received %i' % num
Pipeline([exc_produce(), exc_work(), exc_consume()]).run_parallel(1)

48
lib/beets/vfs.py Normal file
View File

@@ -0,0 +1,48 @@
# This file is part of beets.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""A simple utility for constructing filesystem-like trees from beets
libraries.
"""
from collections import namedtuple
from lib.beets import util
Node = namedtuple('Node', ['files', 'dirs'])
def _insert(node, path, itemid):
"""Insert an item into a virtual filesystem node."""
if len(path) == 1:
# Last component. Insert file.
node.files[path[0]] = itemid
else:
# In a directory.
dirname = path[0]
rest = path[1:]
if dirname not in node.dirs:
node.dirs[dirname] = Node({}, {})
_insert(node.dirs[dirname], rest, itemid)
def libtree(lib):
"""Generates a filesystem-like directory tree for the files
contained in `lib`. Filesystem nodes are (files, dirs) named
tuples in which both components are dictionaries. The first
maps filenames to Item ids. The second maps directory names to
child node tuples.
"""
root = Node({}, {})
for item in lib.items():
dest = lib.destination(item, fragment=True)
parts = util.components(dest)
_insert(root, parts, item.id)
return root

791
lib/munkres.py Executable file
View File

@@ -0,0 +1,791 @@
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
# Documentation is intended to be processed by Epydoc.
"""
Introduction
============
The Munkres module provides an implementation of the Munkres algorithm
(also called the Hungarian algorithm or the Kuhn-Munkres algorithm),
useful for solving the Assignment Problem.
Assignment Problem
==================
Let *C* be an *n*\ x\ *n* matrix representing the costs of each of *n* workers
to perform any of *n* jobs. The assignment problem is to assign jobs to
workers in a way that minimizes the total cost. Since each worker can perform
only one job and each job can be assigned to only one worker the assignments
represent an independent set of the matrix *C*.
One way to generate the optimal set is to create all permutations of
the indexes necessary to traverse the matrix so that no row and column
are used more than once. For instance, given this matrix (expressed in
Python)::
matrix = [[5, 9, 1],
[10, 3, 2],
[8, 7, 4]]
You could use this code to generate the traversal indexes::
def permute(a, results):
if len(a) == 1:
results.insert(len(results), a)
else:
for i in range(0, len(a)):
element = a[i]
a_copy = [a[j] for j in range(0, len(a)) if j != i]
subresults = []
permute(a_copy, subresults)
for subresult in subresults:
result = [element] + subresult
results.insert(len(results), result)
results = []
permute(range(len(matrix)), results) # [0, 1, 2] for a 3x3 matrix
After the call to permute(), the results matrix would look like this::
[[0, 1, 2],
[0, 2, 1],
[1, 0, 2],
[1, 2, 0],
[2, 0, 1],
[2, 1, 0]]
You could then use that index matrix to loop over the original cost matrix
and calculate the smallest cost of the combinations::
n = len(matrix)
minval = sys.maxint
for row in range(n):
cost = 0
for col in range(n):
cost += matrix[row][col]
minval = min(cost, minval)
print minval
While this approach works fine for small matrices, it does not scale. It
executes in O(*n*!) time: Calculating the permutations for an *n*\ x\ *n*
matrix requires *n*! operations. For a 12x12 matrix, that's 479,001,600
traversals. Even if you could manage to perform each traversal in just one
millisecond, it would still take more than 133 hours to perform the entire
traversal. A 20x20 matrix would take 2,432,902,008,176,640,000 operations. At
an optimistic millisecond per operation, that's more than 77 million years.
The Munkres algorithm runs in O(*n*\ ^3) time, rather than O(*n*!). This
package provides an implementation of that algorithm.
This version is based on
http://www.public.iastate.edu/~ddoty/HungarianAlgorithm.html.
This version was written for Python by Brian Clapper from the (Ada) algorithm
at the above web site. (The ``Algorithm::Munkres`` Perl version, in CPAN, was
clearly adapted from the same web site.)
Usage
=====
Construct a Munkres object::
from munkres import Munkres
m = Munkres()
Then use it to compute the lowest cost assignment from a cost matrix. Here's
a sample program::
from munkres import Munkres, print_matrix
matrix = [[5, 9, 1],
[10, 3, 2],
[8, 7, 4]]
m = Munkres()
indexes = m.compute(matrix)
print_matrix(matrix, msg='Lowest cost through this matrix:')
total = 0
for row, column in indexes:
value = matrix[row][column]
total += value
print '(%d, %d) -> %d' % (row, column, value)
print 'total cost: %d' % total
Running that program produces::
Lowest cost through this matrix:
[5, 9, 1]
[10, 3, 2]
[8, 7, 4]
(0, 0) -> 5
(1, 1) -> 3
(2, 2) -> 4
total cost=12
The instantiated Munkres object can be used multiple times on different
matrices.
Non-square Cost Matrices
========================
The Munkres algorithm assumes that the cost matrix is square. However, it's
possible to use a rectangular matrix if you first pad it with 0 values to make
it square. This module automatically pads rectangular cost matrices to make
them square.
Notes:
- The module operates on a *copy* of the caller's matrix, so any padding will
not be seen by the caller.
- The cost matrix must be rectangular or square. An irregular matrix will
*not* work.
Calculating Profit, Rather than Cost
====================================
The cost matrix is just that: A cost matrix. The Munkres algorithm finds
the combination of elements (one from each row and column) that results in
the smallest cost. It's also possible to use the algorithm to maximize
profit. To do that, however, you have to convert your profit matrix to a
cost matrix. The simplest way to do that is to subtract all elements from a
large value. For example::
from munkres import Munkres, print_matrix
matrix = [[5, 9, 1],
[10, 3, 2],
[8, 7, 4]]
cost_matrix = []
for row in matrix:
cost_row = []
for col in row:
cost_row += [sys.maxint - col]
cost_matrix += [cost_row]
m = Munkres()
indexes = m.compute(cost_matrix)
print_matrix(matrix, msg='Highest profit through this matrix:')
total = 0
for row, column in indexes:
value = matrix[row][column]
total += value
print '(%d, %d) -> %d' % (row, column, value)
print 'total profit=%d' % total
Running that program produces::
Highest profit through this matrix:
[5, 9, 1]
[10, 3, 2]
[8, 7, 4]
(0, 1) -> 9
(1, 0) -> 10
(2, 2) -> 4
total profit=23
The ``munkres`` module provides a convenience method for creating a cost
matrix from a profit matrix. Since it doesn't know whether the matrix contains
floating point numbers, decimals, or integers, you have to provide the
conversion function; but the convenience method takes care of the actual
creation of the cost matrix::
import munkres
cost_matrix = munkres.make_cost_matrix(matrix,
lambda cost: sys.maxint - cost)
So, the above profit-calculation program can be recast as::
from munkres import Munkres, print_matrix, make_cost_matrix
matrix = [[5, 9, 1],
[10, 3, 2],
[8, 7, 4]]
cost_matrix = make_cost_matrix(matrix, lambda cost: sys.maxint - cost)
m = Munkres()
indexes = m.compute(cost_matrix)
print_matrix(matrix, msg='Lowest cost through this matrix:')
total = 0
for row, column in indexes:
value = matrix[row][column]
total += value
print '(%d, %d) -> %d' % (row, column, value)
print 'total profit=%d' % total
References
==========
1. http://www.public.iastate.edu/~ddoty/HungarianAlgorithm.html
2. Harold W. Kuhn. The Hungarian Method for the assignment problem.
*Naval Research Logistics Quarterly*, 2:83-97, 1955.
3. Harold W. Kuhn. Variants of the Hungarian method for assignment
problems. *Naval Research Logistics Quarterly*, 3: 253-258, 1956.
4. Munkres, J. Algorithms for the Assignment and Transportation Problems.
*Journal of the Society of Industrial and Applied Mathematics*,
5(1):32-38, March, 1957.
5. http://en.wikipedia.org/wiki/Hungarian_algorithm
Copyright and License
=====================
This software is released under a BSD license, adapted from
<http://opensource.org/licenses/bsd-license.php>
Copyright (c) 2008 Brian M. Clapper
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name "clapper.org" nor the names of its contributors may be
used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
__docformat__ = 'restructuredtext'
# ---------------------------------------------------------------------------
# Imports
# ---------------------------------------------------------------------------
import sys
# ---------------------------------------------------------------------------
# Exports
# ---------------------------------------------------------------------------
__all__ = ['Munkres', 'make_cost_matrix']
# ---------------------------------------------------------------------------
# Globals
# ---------------------------------------------------------------------------
# Info about the module
__version__ = "1.0.5.4"
__author__ = "Brian Clapper, bmc@clapper.org"
__url__ = "http://software.clapper.org/munkres/"
__copyright__ = "(c) 2008 Brian M. Clapper"
__license__ = "BSD-style license"
# ---------------------------------------------------------------------------
# Classes
# ---------------------------------------------------------------------------
class Munkres:
"""
Calculate the Munkres solution to the classical assignment problem.
See the module documentation for usage.
"""
def __init__(self):
"""Create a new instance"""
self.C = None
self.row_covered = []
self.col_covered = []
self.n = 0
self.Z0_r = 0
self.Z0_c = 0
self.marked = None
self.path = None
def make_cost_matrix(profit_matrix, inversion_function):
"""
**DEPRECATED**
Please use the module function ``make_cost_matrix()``.
"""
import munkres
return munkres.make_cost_matrix(profit_matrix, inversion_function)
make_cost_matrix = staticmethod(make_cost_matrix)
def pad_matrix(self, matrix, pad_value=0):
"""
Pad a possibly non-square matrix to make it square.
:Parameters:
matrix : list of lists
matrix to pad
pad_value : int
value to use to pad the matrix
:rtype: list of lists
:return: a new, possibly padded, matrix
"""
max_columns = 0
total_rows = len(matrix)
for row in matrix:
max_columns = max(max_columns, len(row))
total_rows = max(max_columns, total_rows)
new_matrix = []
for row in matrix:
row_len = len(row)
new_row = row[:]
if total_rows > row_len:
# Row too short. Pad it.
new_row += [0] * (total_rows - row_len)
new_matrix += [new_row]
while len(new_matrix) < total_rows:
new_matrix += [[0] * total_rows]
return new_matrix
def compute(self, cost_matrix):
"""
Compute the indexes for the lowest-cost pairings between rows and
columns in the database. Returns a list of (row, column) tuples
that can be used to traverse the matrix.
:Parameters:
cost_matrix : list of lists
The cost matrix. If this cost matrix is not square, it
will be padded with zeros, via a call to ``pad_matrix()``.
(This method does *not* modify the caller's matrix. It
operates on a copy of the matrix.)
**WARNING**: This code handles square and rectangular
matrices. It does *not* handle irregular matrices.
:rtype: list
:return: A list of ``(row, column)`` tuples that describe the lowest
cost path through the matrix
"""
self.C = self.pad_matrix(cost_matrix)
self.n = len(self.C)
self.original_length = len(cost_matrix)
self.original_width = len(cost_matrix[0])
self.row_covered = [False for i in range(self.n)]
self.col_covered = [False for i in range(self.n)]
self.Z0_r = 0
self.Z0_c = 0
self.path = self.__make_matrix(self.n * 2, 0)
self.marked = self.__make_matrix(self.n, 0)
done = False
step = 1
steps = { 1 : self.__step1,
2 : self.__step2,
3 : self.__step3,
4 : self.__step4,
5 : self.__step5,
6 : self.__step6 }
while not done:
try:
func = steps[step]
step = func()
except KeyError:
done = True
# Look for the starred columns
results = []
for i in range(self.original_length):
for j in range(self.original_width):
if self.marked[i][j] == 1:
results += [(i, j)]
return results
def __copy_matrix(self, matrix):
"""Return an exact copy of the supplied matrix"""
return copy.deepcopy(matrix)
def __make_matrix(self, n, val):
"""Create an *n*x*n* matrix, populating it with the specific value."""
matrix = []
for i in range(n):
matrix += [[val for j in range(n)]]
return matrix
def __step1(self):
"""
For each row of the matrix, find the smallest element and
subtract it from every element in its row. Go to Step 2.
"""
C = self.C
n = self.n
for i in range(n):
minval = min(self.C[i])
# Find the minimum value for this row and subtract that minimum
# from every element in the row.
for j in range(n):
self.C[i][j] -= minval
return 2
def __step2(self):
"""
Find a zero (Z) in the resulting matrix. If there is no starred
zero in its row or column, star Z. Repeat for each element in the
matrix. Go to Step 3.
"""
n = self.n
for i in range(n):
for j in range(n):
if (self.C[i][j] == 0) and \
(not self.col_covered[j]) and \
(not self.row_covered[i]):
self.marked[i][j] = 1
self.col_covered[j] = True
self.row_covered[i] = True
self.__clear_covers()
return 3
def __step3(self):
"""
Cover each column containing a starred zero. If K columns are
covered, the starred zeros describe a complete set of unique
assignments. In this case, Go to DONE, otherwise, Go to Step 4.
"""
n = self.n
count = 0
for i in range(n):
for j in range(n):
if self.marked[i][j] == 1:
self.col_covered[j] = True
count += 1
if count >= n:
step = 7 # done
else:
step = 4
return step
def __step4(self):
"""
Find a noncovered zero and prime it. If there is no starred zero
in the row containing this primed zero, Go to Step 5. Otherwise,
cover this row and uncover the column containing the starred
zero. Continue in this manner until there are no uncovered zeros
left. Save the smallest uncovered value and Go to Step 6.
"""
step = 0
done = False
row = -1
col = -1
star_col = -1
while not done:
(row, col) = self.__find_a_zero()
if row < 0:
done = True
step = 6
else:
self.marked[row][col] = 2
star_col = self.__find_star_in_row(row)
if star_col >= 0:
col = star_col
self.row_covered[row] = True
self.col_covered[col] = False
else:
done = True
self.Z0_r = row
self.Z0_c = col
step = 5
return step
def __step5(self):
"""
Construct a series of alternating primed and starred zeros as
follows. Let Z0 represent the uncovered primed zero found in Step 4.
Let Z1 denote the starred zero in the column of Z0 (if any).
Let Z2 denote the primed zero in the row of Z1 (there will always
be one). Continue until the series terminates at a primed zero
that has no starred zero in its column. Unstar each starred zero
of the series, star each primed zero of the series, erase all
primes and uncover every line in the matrix. Return to Step 3
"""
count = 0
path = self.path
path[count][0] = self.Z0_r
path[count][1] = self.Z0_c
done = False
while not done:
row = self.__find_star_in_col(path[count][1])
if row >= 0:
count += 1
path[count][0] = row
path[count][1] = path[count-1][1]
else:
done = True
if not done:
col = self.__find_prime_in_row(path[count][0])
count += 1
path[count][0] = path[count-1][0]
path[count][1] = col
self.__convert_path(path, count)
self.__clear_covers()
self.__erase_primes()
return 3
def __step6(self):
"""
Add the value found in Step 4 to every element of each covered
row, and subtract it from every element of each uncovered column.
Return to Step 4 without altering any stars, primes, or covered
lines.
"""
minval = self.__find_smallest()
for i in range(self.n):
for j in range(self.n):
if self.row_covered[i]:
self.C[i][j] += minval
if not self.col_covered[j]:
self.C[i][j] -= minval
return 4
def __find_smallest(self):
"""Find the smallest uncovered value in the matrix."""
minval = sys.maxint
for i in range(self.n):
for j in range(self.n):
if (not self.row_covered[i]) and (not self.col_covered[j]):
if minval > self.C[i][j]:
minval = self.C[i][j]
return minval
def __find_a_zero(self):
"""Find the first uncovered element with value 0"""
row = -1
col = -1
i = 0
n = self.n
done = False
while not done:
j = 0
while True:
if (self.C[i][j] == 0) and \
(not self.row_covered[i]) and \
(not self.col_covered[j]):
row = i
col = j
done = True
j += 1
if j >= n:
break
i += 1
if i >= n:
done = True
return (row, col)
def __find_star_in_row(self, row):
"""
Find the first starred element in the specified row. Returns
the column index, or -1 if no starred element was found.
"""
col = -1
for j in range(self.n):
if self.marked[row][j] == 1:
col = j
break
return col
def __find_star_in_col(self, col):
"""
Find the first starred element in the specified row. Returns
the row index, or -1 if no starred element was found.
"""
row = -1
for i in range(self.n):
if self.marked[i][col] == 1:
row = i
break
return row
def __find_prime_in_row(self, row):
"""
Find the first prime element in the specified row. Returns
the column index, or -1 if no starred element was found.
"""
col = -1
for j in range(self.n):
if self.marked[row][j] == 2:
col = j
break
return col
def __convert_path(self, path, count):
for i in range(count+1):
if self.marked[path[i][0]][path[i][1]] == 1:
self.marked[path[i][0]][path[i][1]] = 0
else:
self.marked[path[i][0]][path[i][1]] = 1
def __clear_covers(self):
"""Clear all covered matrix cells"""
for i in range(self.n):
self.row_covered[i] = False
self.col_covered[i] = False
def __erase_primes(self):
"""Erase all prime markings"""
for i in range(self.n):
for j in range(self.n):
if self.marked[i][j] == 2:
self.marked[i][j] = 0
# ---------------------------------------------------------------------------
# Functions
# ---------------------------------------------------------------------------
def make_cost_matrix(profit_matrix, inversion_function):
"""
Create a cost matrix from a profit matrix by calling
'inversion_function' to invert each value. The inversion
function must take one numeric argument (of any type) and return
another numeric argument which is presumed to be the cost inverse
of the original profit.
This is a static method. Call it like this:
.. python::
cost_matrix = Munkres.make_cost_matrix(matrix, inversion_func)
For example:
.. python::
cost_matrix = Munkres.make_cost_matrix(matrix, lambda x : sys.maxint - x)
:Parameters:
profit_matrix : list of lists
The matrix to convert from a profit to a cost matrix
inversion_function : function
The function to use to invert each entry in the profit matrix
:rtype: list of lists
:return: The converted matrix
"""
cost_matrix = []
for row in profit_matrix:
cost_matrix.append([inversion_function(value) for value in row])
return cost_matrix
def print_matrix(matrix, msg=None):
"""
Convenience function: Displays the contents of a matrix of integers.
:Parameters:
matrix : list of lists
Matrix to print
msg : str
Optional message to print before displaying the matrix
"""
import math
if msg is not None:
print msg
# Calculate the appropriate format width.
width = 0
for row in matrix:
for val in row:
width = max(width, int(math.log10(val)) + 1)
# Make the format string
format = '%%%dd' % width
# Print the matrix
for row in matrix:
sep = '['
for val in row:
sys.stdout.write(sep + format % val)
sep = ', '
sys.stdout.write(']\n')
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
if __name__ == '__main__':
matrices = [
# Square
([[400, 150, 400],
[400, 450, 600],
[300, 225, 300]],
850 # expected cost
),
# Rectangular variant
([[400, 150, 400, 1],
[400, 450, 600, 2],
[300, 225, 300, 3]],
452 # expected cost
),
# Square
([[10, 10, 8],
[ 9, 8, 1],
[ 9, 7, 4]],
18
),
# Rectangular variant
([[10, 10, 8, 11],
[ 9, 8, 1, 1],
[ 9, 7, 4, 10]],
15
),
]
m = Munkres()
for cost_matrix, expected_total in matrices:
print_matrix(cost_matrix, msg='cost matrix')
indexes = m.compute(cost_matrix)
total_cost = 0
for r, c in indexes:
x = cost_matrix[r][c]
total_cost += x
print '(%d, %d) -> %d' % (r, c, x)
print 'lowest cost=%d' % total_cost
assert expected_total == total_cost

217
lib/mutagen/__init__.py Normal file
View File

@@ -0,0 +1,217 @@
#! /usr/bin/env python
#
# mutagen aims to be an all purpose media tagging library
# Copyright (C) 2005 Michael Urman
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# $Id: __init__.py 4348 2008-12-02 02:41:15Z piman $
#
"""Mutagen aims to be an all purpose tagging library.
import mutagen.[format]
metadata = mutagen.[format].Open(filename)
metadata acts like a dictionary of tags in the file. Tags are generally a
list of string-like values, but may have additional methods available
depending on tag or format. They may also be entirely different objects
for certain keys, again depending on format.
"""
version = (1, 20)
version_string = ".".join(map(str, version))
import warnings
from lib.mutagen import _util
class Metadata(object):
"""An abstract dict-like object.
Metadata is the base class for many of the tag objects in Mutagen.
"""
def __init__(self, *args, **kwargs):
if args or kwargs:
self.load(*args, **kwargs)
def load(self, *args, **kwargs):
raise NotImplementedError
def save(self, filename=None):
raise NotImplementedError
def delete(self, filename=None):
raise NotImplementedError
class FileType(_util.DictMixin):
"""An abstract object wrapping tags and audio stream information.
Attributes:
info -- stream information (length, bitrate, sample rate)
tags -- metadata tags, if any
Each file format has different potential tags and stream
information.
FileTypes implement an interface very similar to Metadata; the
dict interface, save, load, and delete calls on a FileType call
the appropriate methods on its tag data.
"""
info = None
tags = None
filename = None
_mimes = ["application/octet-stream"]
def __init__(self, filename=None, *args, **kwargs):
if filename is None:
warnings.warn("FileType constructor requires a filename",
DeprecationWarning)
else:
self.load(filename, *args, **kwargs)
def load(self, filename, *args, **kwargs):
raise NotImplementedError
def __getitem__(self, key):
"""Look up a metadata tag key.
If the file has no tags at all, a KeyError is raised.
"""
if self.tags is None: raise KeyError, key
else: return self.tags[key]
def __setitem__(self, key, value):
"""Set a metadata tag.
If the file has no tags, an appropriate format is added (but
not written until save is called).
"""
if self.tags is None:
self.add_tags()
self.tags[key] = value
def __delitem__(self, key):
"""Delete a metadata tag key.
If the file has no tags at all, a KeyError is raised.
"""
if self.tags is None: raise KeyError, key
else: del(self.tags[key])
def keys(self):
"""Return a list of keys in the metadata tag.
If the file has no tags at all, an empty list is returned.
"""
if self.tags is None: return []
else: return self.tags.keys()
def delete(self, filename=None):
"""Remove tags from a file."""
if self.tags is not None:
if filename is None:
filename = self.filename
else:
warnings.warn(
"delete(filename=...) is deprecated, reload the file",
DeprecationWarning)
return self.tags.delete(filename)
def save(self, filename=None, **kwargs):
"""Save metadata tags."""
if filename is None:
filename = self.filename
else:
warnings.warn(
"save(filename=...) is deprecated, reload the file",
DeprecationWarning)
if self.tags is not None:
return self.tags.save(filename, **kwargs)
else: raise ValueError("no tags in file")
def pprint(self):
"""Print stream information and comment key=value pairs."""
stream = "%s (%s)" % (self.info.pprint(), self.mime[0])
try: tags = self.tags.pprint()
except AttributeError:
return stream
else: return stream + ((tags and "\n" + tags) or "")
def add_tags(self):
raise NotImplementedError
def __get_mime(self):
mimes = []
for Kind in type(self).__mro__:
for mime in getattr(Kind, '_mimes', []):
if mime not in mimes:
mimes.append(mime)
return mimes
mime = property(__get_mime)
def File(filename, options=None, easy=False):
"""Guess the type of the file and try to open it.
The file type is decided by several things, such as the first 128
bytes (which usually contains a file type identifier), the
filename extension, and the presence of existing tags.
If no appropriate type could be found, None is returned.
"""
if options is None:
from lib.mutagen.asf import ASF
from lib.mutagen.apev2 import APEv2File
from lib.mutagen.flac import FLAC
if easy:
from lib.mutagen.easyid3 import EasyID3FileType as ID3FileType
else:
from lib.mutagen.id3 import ID3FileType
if easy:
from lib.mutagen.mp3 import EasyMP3 as MP3
else:
from lib.mutagen.mp3 import MP3
from lib.mutagen.oggflac import OggFLAC
from lib.mutagen.oggspeex import OggSpeex
from lib.mutagen.oggtheora import OggTheora
from lib.mutagen.oggvorbis import OggVorbis
if easy:
from lib.mutagen.trueaudio import EasyTrueAudio as TrueAudio
else:
from lib.mutagen.trueaudio import TrueAudio
from lib.mutagen.wavpack import WavPack
if easy:
from lib.mutagen.easymp4 import EasyMP4 as MP4
else:
from lib.mutagen.mp4 import MP4
from lib.mutagen.musepack import Musepack
from lib.mutagen.monkeysaudio import MonkeysAudio
from lib.mutagen.optimfrog import OptimFROG
options = [MP3, TrueAudio, OggTheora, OggSpeex, OggVorbis, OggFLAC,
FLAC, APEv2File, MP4, ID3FileType, WavPack, Musepack,
MonkeysAudio, OptimFROG, ASF]
if not options:
return None
fileobj = file(filename, "rb")
try:
header = fileobj.read(128)
# Sort by name after score. Otherwise import order affects
# Kind sort order, which affects treatment of things with
# equals scores.
results = [(Kind.score(filename, fileobj, header), Kind.__name__)
for Kind in options]
finally:
fileobj.close()
results = zip(results, options)
results.sort()
(score, name), Kind = results[-1]
if score > 0: return Kind(filename)
else: return None

153
lib/mutagen/_constants.py Normal file
View File

@@ -0,0 +1,153 @@
"""Constants used by Mutagen."""
GENRES = [
u"Blues",
u"Classic Rock",
u"Country",
u"Dance",
u"Disco",
u"Funk",
u"Grunge",
u"Hip-Hop",
u"Jazz",
u"Metal",
u"New Age",
u"Oldies",
u"Other",
u"Pop",
u"R&B",
u"Rap",
u"Reggae",
u"Rock",
u"Techno",
u"Industrial",
u"Alternative",
u"Ska",
u"Death Metal",
u"Pranks",
u"Soundtrack",
u"Euro-Techno",
u"Ambient",
u"Trip-Hop",
u"Vocal",
u"Jazz+Funk",
u"Fusion",
u"Trance",
u"Classical",
u"Instrumental",
u"Acid",
u"House",
u"Game",
u"Sound Clip",
u"Gospel",
u"Noise",
u"Alt. Rock",
u"Bass",
u"Soul",
u"Punk",
u"Space",
u"Meditative",
u"Instrumental Pop",
u"Instrumental Rock",
u"Ethnic",
u"Gothic",
u"Darkwave",
u"Techno-Industrial",
u"Electronic",
u"Pop-Folk",
u"Eurodance",
u"Dream",
u"Southern Rock",
u"Comedy",
u"Cult",
u"Gangsta",
u"Top 40",
u"Christian Rap",
u"Pop/Funk",
u"Jungle",
u"Native American",
u"Cabaret",
u"New Wave",
u"Psychadelic",
u"Rave",
u"Showtunes",
u"Trailer",
u"Lo-Fi",
u"Tribal",
u"Acid Punk",
u"Acid Jazz",
u"Polka",
u"Retro",
u"Musical",
u"Rock & Roll",
u"Hard Rock",
u"Folk",
u"Folk/Rock",
u"National Folk",
u"Swing",
u"Fusion",
u"Bebob",
u"Latin",
u"Revival",
u"Celtic",
u"Bluegrass",
u"Avantgarde",
u"Gothic Rock",
u"Progressive Rock",
u"Psychadelic Rock",
u"Symphonic Rock",
u"Slow Rock",
u"Big Band",
u"Chorus",
u"Easy Listening",
u"Acoustic",
u"Humour",
u"Speech",
u"Chanson",
u"Opera",
u"Chamber Music",
u"Sonata",
u"Symphony",
u"Booty Bass",
u"Primus",
u"Porn Groove",
u"Satire",
u"Slow Jam",
u"Club",
u"Tango",
u"Samba",
u"Folklore",
u"Ballad",
u"Power Ballad",
u"Rhythmic Soul",
u"Freestyle",
u"Duet",
u"Punk Rock",
u"Drum Solo",
u"A Capella",
u"Euro-House",
u"Dance Hall",
u"Goa",
u"Drum & Bass",
u"Club-House",
u"Hardcore",
u"Terror",
u"Indie",
u"BritPop",
u"Negerpunk",
u"Polsk Punk",
u"Beat",
u"Christian Gangsta Rap",
u"Heavy Metal",
u"Black Metal",
u"Crossover",
u"Contemporary Christian",
u"Christian Rock",
u"Merengue",
u"Salsa",
u"Thrash Metal",
u"Anime",
u"Jpop",
u"Synthpop"
]
"""The ID3v1 genre list."""

314
lib/mutagen/_util.py Normal file
View File

@@ -0,0 +1,314 @@
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: _util.py 4218 2007-12-02 06:11:20Z piman $
"""Utility classes for Mutagen.
You should not rely on the interfaces here being stable. They are
intended for internal use in Mutagen only.
"""
import struct
from fnmatch import fnmatchcase
class DictMixin(object):
"""Implement the dict API using keys() and __*item__ methods.
Similar to UserDict.DictMixin, this takes a class that defines
__getitem__, __setitem__, __delitem__, and keys(), and turns it
into a full dict-like object.
UserDict.DictMixin is not suitable for this purpose because it's
an old-style class.
This class is not optimized for very large dictionaries; many
functions have linear memory requirements. I recommend you
override some of these functions if speed is required.
"""
def __iter__(self):
return iter(self.keys())
def has_key(self, key):
try: self[key]
except KeyError: return False
else: return True
__contains__ = has_key
iterkeys = lambda self: iter(self.keys())
def values(self):
return map(self.__getitem__, self.keys())
itervalues = lambda self: iter(self.values())
def items(self):
return zip(self.keys(), self.values())
iteritems = lambda s: iter(s.items())
def clear(self):
map(self.__delitem__, self.keys())
def pop(self, key, *args):
if len(args) > 1:
raise TypeError("pop takes at most two arguments")
try: value = self[key]
except KeyError:
if args: return args[0]
else: raise
del(self[key])
return value
def popitem(self):
try:
key = self.keys()[0]
return key, self.pop(key)
except IndexError: raise KeyError("dictionary is empty")
def update(self, other=None, **kwargs):
if other is None:
self.update(kwargs)
other = {}
try: map(self.__setitem__, other.keys(), other.values())
except AttributeError:
for key, value in other:
self[key] = value
def setdefault(self, key, default=None):
try: return self[key]
except KeyError:
self[key] = default
return default
def get(self, key, default=None):
try: return self[key]
except KeyError: return default
def __repr__(self):
return repr(dict(self.items()))
def __cmp__(self, other):
if other is None: return 1
else: return cmp(dict(self.items()), other)
def __len__(self):
return len(self.keys())
class DictProxy(DictMixin):
def __init__(self, *args, **kwargs):
self.__dict = {}
super(DictProxy, self).__init__(*args, **kwargs)
def __getitem__(self, key):
return self.__dict[key]
def __setitem__(self, key, value):
self.__dict[key] = value
def __delitem__(self, key):
del(self.__dict[key])
def keys(self):
return self.__dict.keys()
class cdata(object):
"""C character buffer to Python numeric type conversions."""
from struct import error
short_le = staticmethod(lambda data: struct.unpack('<h', data)[0])
ushort_le = staticmethod(lambda data: struct.unpack('<H', data)[0])
short_be = staticmethod(lambda data: struct.unpack('>h', data)[0])
ushort_be = staticmethod(lambda data: struct.unpack('>H', data)[0])
int_le = staticmethod(lambda data: struct.unpack('<i', data)[0])
uint_le = staticmethod(lambda data: struct.unpack('<I', data)[0])
int_be = staticmethod(lambda data: struct.unpack('>i', data)[0])
uint_be = staticmethod(lambda data: struct.unpack('>I', data)[0])
longlong_le = staticmethod(lambda data: struct.unpack('<q', data)[0])
ulonglong_le = staticmethod(lambda data: struct.unpack('<Q', data)[0])
longlong_be = staticmethod(lambda data: struct.unpack('>q', data)[0])
ulonglong_be = staticmethod(lambda data: struct.unpack('>Q', data)[0])
to_short_le = staticmethod(lambda data: struct.pack('<h', data))
to_ushort_le = staticmethod(lambda data: struct.pack('<H', data))
to_short_be = staticmethod(lambda data: struct.pack('>h', data))
to_ushort_be = staticmethod(lambda data: struct.pack('>H', data))
to_int_le = staticmethod(lambda data: struct.pack('<i', data))
to_uint_le = staticmethod(lambda data: struct.pack('<I', data))
to_int_be = staticmethod(lambda data: struct.pack('>i', data))
to_uint_be = staticmethod(lambda data: struct.pack('>I', data))
to_longlong_le = staticmethod(lambda data: struct.pack('<q', data))
to_ulonglong_le = staticmethod(lambda data: struct.pack('<Q', data))
to_longlong_be = staticmethod(lambda data: struct.pack('>q', data))
to_ulonglong_be = staticmethod(lambda data: struct.pack('>Q', data))
bitswap = ''.join([chr(sum([((val >> i) & 1) << (7-i) for i in range(8)]))
for val in range(256)])
del(i)
del(val)
test_bit = staticmethod(lambda value, n: bool((value >> n) & 1))
def lock(fileobj):
"""Lock a file object 'safely'.
That means a failure to lock because the platform doesn't
support fcntl or filesystem locks is not considered a
failure. This call does block.
Returns whether or not the lock was successful, or
raises an exception in more extreme circumstances (full
lock table, invalid file).
"""
try: import fcntl
except ImportError:
return False
else:
try: fcntl.lockf(fileobj, fcntl.LOCK_EX)
except IOError:
# FIXME: There's possibly a lot of complicated
# logic that needs to go here in case the IOError
# is EACCES or EAGAIN.
return False
else:
return True
def unlock(fileobj):
"""Unlock a file object.
Don't call this on a file object unless a call to lock()
returned true.
"""
# If this fails there's a mismatched lock/unlock pair,
# so we definitely don't want to ignore errors.
import fcntl
fcntl.lockf(fileobj, fcntl.LOCK_UN)
def insert_bytes(fobj, size, offset, BUFFER_SIZE=2**16):
"""Insert size bytes of empty space starting at offset.
fobj must be an open file object, open rb+ or
equivalent. Mutagen tries to use mmap to resize the file, but
falls back to a significantly slower method if mmap fails.
"""
assert 0 < size
assert 0 <= offset
locked = False
fobj.seek(0, 2)
filesize = fobj.tell()
movesize = filesize - offset
fobj.write('\x00' * size)
fobj.flush()
try:
try:
import mmap
map = mmap.mmap(fobj.fileno(), filesize + size)
try: map.move(offset + size, offset, movesize)
finally: map.close()
except (ValueError, EnvironmentError, ImportError):
# handle broken mmap scenarios
locked = lock(fobj)
fobj.truncate(filesize)
fobj.seek(0, 2)
padsize = size
# Don't generate an enormous string if we need to pad
# the file out several megs.
while padsize:
addsize = min(BUFFER_SIZE, padsize)
fobj.write("\x00" * addsize)
padsize -= addsize
fobj.seek(filesize, 0)
while movesize:
# At the start of this loop, fobj is pointing at the end
# of the data we need to move, which is of movesize length.
thismove = min(BUFFER_SIZE, movesize)
# Seek back however much we're going to read this frame.
fobj.seek(-thismove, 1)
nextpos = fobj.tell()
# Read it, so we're back at the end.
data = fobj.read(thismove)
# Seek back to where we need to write it.
fobj.seek(-thismove + size, 1)
# Write it.
fobj.write(data)
# And seek back to the end of the unmoved data.
fobj.seek(nextpos)
movesize -= thismove
fobj.flush()
finally:
if locked:
unlock(fobj)
def delete_bytes(fobj, size, offset, BUFFER_SIZE=2**16):
"""Delete size bytes of empty space starting at offset.
fobj must be an open file object, open rb+ or
equivalent. Mutagen tries to use mmap to resize the file, but
falls back to a significantly slower method if mmap fails.
"""
locked = False
assert 0 < size
assert 0 <= offset
fobj.seek(0, 2)
filesize = fobj.tell()
movesize = filesize - offset - size
assert 0 <= movesize
try:
if movesize > 0:
fobj.flush()
try:
import mmap
map = mmap.mmap(fobj.fileno(), filesize)
try: map.move(offset, offset + size, movesize)
finally: map.close()
except (ValueError, EnvironmentError, ImportError):
# handle broken mmap scenarios
locked = lock(fobj)
fobj.seek(offset + size)
buf = fobj.read(BUFFER_SIZE)
while buf:
fobj.seek(offset)
fobj.write(buf)
offset += len(buf)
fobj.seek(offset + size)
buf = fobj.read(BUFFER_SIZE)
fobj.truncate(filesize - size)
fobj.flush()
finally:
if locked:
unlock(fobj)
def utf8(data):
"""Convert a basestring to a valid UTF-8 str."""
if isinstance(data, str):
return data.decode("utf-8", "replace").encode("utf-8")
elif isinstance(data, unicode):
return data.encode("utf-8")
else: raise TypeError("only unicode/str types can be converted to UTF-8")
def dict_match(d, key, default=None):
try:
return d[key]
except KeyError:
for pattern, value in d.iteritems():
if fnmatchcase(key, pattern):
return value
return default

223
lib/mutagen/_vorbis.py Normal file
View File

@@ -0,0 +1,223 @@
# Vorbis comment support for Mutagen
# Copyright 2005-2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
"""Read and write Vorbis comment data.
Vorbis comments are freeform key/value pairs; keys are
case-insensitive ASCII and values are Unicode strings. A key may have
multiple values.
The specification is at http://www.xiph.org/vorbis/doc/v-comment.html.
"""
import sys
from cStringIO import StringIO
import lib.mutagen
from lib.mutagen._util import DictMixin, cdata
try: set
except NameError:
from sets import Set as set
def is_valid_key(key):
"""Return true if a string is a valid Vorbis comment key.
Valid Vorbis comment keys are printable ASCII between 0x20 (space)
and 0x7D ('}'), excluding '='.
"""
for c in key:
if c < " " or c > "}" or c == "=": return False
else: return bool(key)
istag = is_valid_key
class error(IOError): pass
class VorbisUnsetFrameError(error): pass
class VorbisEncodingError(error): pass
class VComment(lib.mutagen.Metadata, list):
"""A Vorbis comment parser, accessor, and renderer.
All comment ordering is preserved. A VComment is a list of
key/value pairs, and so any Python list method can be used on it.
Vorbis comments are always wrapped in something like an Ogg Vorbis
bitstream or a FLAC metadata block, so this loads string data or a
file-like object, not a filename.
Attributes:
vendor -- the stream 'vendor' (i.e. writer); default 'Mutagen'
"""
vendor = u"Mutagen " + lib.mutagen.version_string
def __init__(self, data=None, *args, **kwargs):
# Collect the args to pass to load, this lets child classes
# override just load and get equivalent magic for the
# constructor.
if data is not None:
if isinstance(data, str):
data = StringIO(data)
elif not hasattr(data, 'read'):
raise TypeError("VComment requires string data or a file-like")
self.load(data, *args, **kwargs)
def load(self, fileobj, errors='replace', framing=True):
"""Parse a Vorbis comment from a file-like object.
Keyword arguments:
errors:
'strict', 'replace', or 'ignore'. This affects Unicode decoding
and how other malformed content is interpreted.
framing -- if true, fail if a framing bit is not present
Framing bits are required by the Vorbis comment specification,
but are not used in FLAC Vorbis comment blocks.
"""
try:
vendor_length = cdata.uint_le(fileobj.read(4))
self.vendor = fileobj.read(vendor_length).decode('utf-8', errors)
count = cdata.uint_le(fileobj.read(4))
for i in range(count):
length = cdata.uint_le(fileobj.read(4))
try: string = fileobj.read(length).decode('utf-8', errors)
except (OverflowError, MemoryError):
raise error("cannot read %d bytes, too large" % length)
try: tag, value = string.split('=', 1)
except ValueError, err:
if errors == "ignore":
continue
elif errors == "replace":
tag, value = u"unknown%d" % i, string
else:
raise VorbisEncodingError, str(err), sys.exc_info()[2]
try: tag = tag.encode('ascii', errors)
except UnicodeEncodeError:
raise VorbisEncodingError, "invalid tag name %r" % tag
else:
if is_valid_key(tag): self.append((tag, value))
if framing and not ord(fileobj.read(1)) & 0x01:
raise VorbisUnsetFrameError("framing bit was unset")
except (cdata.error, TypeError):
raise error("file is not a valid Vorbis comment")
def validate(self):
"""Validate keys and values.
Check to make sure every key used is a valid Vorbis key, and
that every value used is a valid Unicode or UTF-8 string. If
any invalid keys or values are found, a ValueError is raised.
"""
if not isinstance(self.vendor, unicode):
try: self.vendor.decode('utf-8')
except UnicodeDecodeError: raise ValueError
for key, value in self:
try:
if not is_valid_key(key): raise ValueError
except: raise ValueError("%r is not a valid key" % key)
if not isinstance(value, unicode):
try: value.encode("utf-8")
except: raise ValueError("%r is not a valid value" % value)
else: return True
def clear(self):
"""Clear all keys from the comment."""
del(self[:])
def write(self, framing=True):
"""Return a string representation of the data.
Validation is always performed, so calling this function on
invalid data may raise a ValueError.
Keyword arguments:
framing -- if true, append a framing bit (see load)
"""
self.validate()
f = StringIO()
f.write(cdata.to_uint_le(len(self.vendor.encode('utf-8'))))
f.write(self.vendor.encode('utf-8'))
f.write(cdata.to_uint_le(len(self)))
for tag, value in self:
comment = "%s=%s" % (tag, value.encode('utf-8'))
f.write(cdata.to_uint_le(len(comment)))
f.write(comment)
if framing: f.write("\x01")
return f.getvalue()
def pprint(self):
return "\n".join(["%s=%s" % (k.lower(), v) for k, v in self])
class VCommentDict(VComment, DictMixin):
"""A VComment that looks like a dictionary.
This object differs from a dictionary in two ways. First,
len(comment) will still return the number of values, not the
number of keys. Secondly, iterating through the object will
iterate over (key, value) pairs, not keys. Since a key may have
multiple values, the same value may appear multiple times while
iterating.
Since Vorbis comment keys are case-insensitive, all keys are
normalized to lowercase ASCII.
"""
def __getitem__(self, key):
"""A list of values for the key.
This is a copy, so comment['title'].append('a title') will not
work.
"""
key = key.lower().encode('ascii')
values = [value for (k, value) in self if k.lower() == key]
if not values: raise KeyError, key
else: return values
def __delitem__(self, key):
"""Delete all values associated with the key."""
key = key.lower().encode('ascii')
to_delete = filter(lambda x: x[0].lower() == key, self)
if not to_delete:raise KeyError, key
else: map(self.remove, to_delete)
def __contains__(self, key):
"""Return true if the key has any values."""
key = key.lower().encode('ascii')
for k, value in self:
if k.lower() == key: return True
else: return False
def __setitem__(self, key, values):
"""Set a key's value or values.
Setting a value overwrites all old ones. The value may be a
list of Unicode or UTF-8 strings, or a single Unicode or UTF-8
string.
"""
key = key.lower().encode('ascii')
if not isinstance(values, list):
values = [values]
try: del(self[key])
except KeyError: pass
for value in values:
self.append((key, value))
def keys(self):
"""Return all keys in the comment."""
return self and list(set([k.lower() for k, v in self]))
def as_dict(self):
"""Return a copy of the comment data in a real dict."""
return dict((key, self[key]) for key in self.keys())

465
lib/mutagen/apev2.py Normal file
View File

@@ -0,0 +1,465 @@
# An APEv2 tag reader
#
# Copyright 2005 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: apev2.py 4008 2007-04-21 04:02:07Z piman $
"""APEv2 reading and writing.
The APEv2 format is most commonly used with Musepack files, but is
also the format of choice for WavPack and other formats. Some MP3s
also have APEv2 tags, but this can cause problems with many MP3
decoders and taggers.
APEv2 tags, like Vorbis comments, are freeform key=value pairs. APEv2
keys can be any ASCII string with characters from 0x20 to 0x7E,
between 2 and 255 characters long. Keys are case-sensitive, but
readers are recommended to be case insensitive, and it is forbidden to
multiple keys which differ only in case. Keys are usually stored
title-cased (e.g. 'Artist' rather than 'artist').
APEv2 values are slightly more structured than Vorbis comments; values
are flagged as one of text, binary, or an external reference (usually
a URI).
Based off the format specification found at
http://wiki.hydrogenaudio.org/index.php?title=APEv2_specification.
"""
__all__ = ["APEv2", "APEv2File", "Open", "delete"]
import struct
from cStringIO import StringIO
def is_valid_apev2_key(key):
return (2 <= len(key) <= 255 and min(key) >= ' ' and max(key) <= '~' and
key not in ["OggS", "TAG", "ID3", "MP+"])
# There are three different kinds of APE tag values.
# "0: Item contains text information coded in UTF-8
# 1: Item contains binary information
# 2: Item is a locator of external stored information [e.g. URL]
# 3: reserved"
TEXT, BINARY, EXTERNAL = range(3)
HAS_HEADER = 1L << 31
HAS_NO_FOOTER = 1L << 30
IS_HEADER = 1L << 29
class error(IOError): pass
class APENoHeaderError(error, ValueError): pass
class APEUnsupportedVersionError(error, ValueError): pass
class APEBadItemError(error, ValueError): pass
from lib.mutagen import Metadata, FileType
from lib.mutagen._util import DictMixin, cdata, utf8, delete_bytes
class _APEv2Data(object):
# Store offsets of the important parts of the file.
start = header = data = footer = end = None
# Footer or header; seek here and read 32 to get version/size/items/flags
metadata = None
# Actual tag data
tag = None
version = None
size = None
items = None
flags = 0
# The tag is at the start rather than the end. A tag at both
# the start and end of the file (i.e. the tag is the whole file)
# is not considered to be at the start.
is_at_start = False
def __init__(self, fileobj):
self.__find_metadata(fileobj)
self.metadata = max(self.header, self.footer)
if self.metadata is None: return
self.__fill_missing(fileobj)
self.__fix_brokenness(fileobj)
if self.data is not None:
fileobj.seek(self.data)
self.tag = fileobj.read(self.size)
def __find_metadata(self, fileobj):
# Try to find a header or footer.
# Check for a simple footer.
try: fileobj.seek(-32, 2)
except IOError:
fileobj.seek(0, 2)
return
if fileobj.read(8) == "APETAGEX":
fileobj.seek(-8, 1)
self.footer = self.metadata = fileobj.tell()
return
# Check for an APEv2 tag followed by an ID3v1 tag at the end.
try:
fileobj.seek(-128, 2)
if fileobj.read(3) == "TAG":
fileobj.seek(-35, 1) # "TAG" + header length
if fileobj.read(8) == "APETAGEX":
fileobj.seek(-8, 1)
self.footer = fileobj.tell()
return
# ID3v1 tag at the end, maybe preceded by Lyrics3v2.
# (http://www.id3.org/lyrics3200.html)
# (header length - "APETAGEX") - "LYRICS200"
fileobj.seek(15, 1)
if fileobj.read(9) == 'LYRICS200':
fileobj.seek(-15, 1) # "LYRICS200" + size tag
try: offset = int(fileobj.read(6))
except ValueError:
raise IOError
fileobj.seek(-32 - offset - 6, 1)
if fileobj.read(8) == "APETAGEX":
fileobj.seek(-8, 1)
self.footer = fileobj.tell()
return
except IOError:
pass
# Check for a tag at the start.
fileobj.seek(0, 0)
if fileobj.read(8) == "APETAGEX":
self.is_at_start = True
self.header = 0
def __fill_missing(self, fileobj):
fileobj.seek(self.metadata + 8)
self.version = fileobj.read(4)
self.size = cdata.uint_le(fileobj.read(4))
self.items = cdata.uint_le(fileobj.read(4))
self.flags = cdata.uint_le(fileobj.read(4))
if self.header is not None:
self.data = self.header + 32
# If we're reading the header, the size is the header
# offset + the size, which includes the footer.
self.end = self.data + self.size
fileobj.seek(self.end - 32, 0)
if fileobj.read(8) == "APETAGEX":
self.footer = self.end - 32
elif self.footer is not None:
self.end = self.footer + 32
self.data = self.end - self.size
if self.flags & HAS_HEADER:
self.header = self.data - 32
else:
self.header = self.data
else: raise APENoHeaderError("No APE tag found")
def __fix_brokenness(self, fileobj):
# Fix broken tags written with PyMusepack.
if self.header is not None: start = self.header
else: start = self.data
fileobj.seek(start)
while start > 0:
# Clean up broken writing from pre-Mutagen PyMusepack.
# It didn't remove the first 24 bytes of header.
try: fileobj.seek(-24, 1)
except IOError:
break
else:
if fileobj.read(8) == "APETAGEX":
fileobj.seek(-8, 1)
start = fileobj.tell()
else: break
self.start = start
class APEv2(DictMixin, Metadata):
"""A file with an APEv2 tag.
ID3v1 tags are silently ignored and overwritten.
"""
filename = None
def __init__(self, *args, **kwargs):
self.__casemap = {}
self.__dict = {}
super(APEv2, self).__init__(*args, **kwargs)
# Internally all names are stored as lowercase, but the case
# they were set with is remembered and used when saving. This
# is roughly in line with the standard, which says that keys
# are case-sensitive but two keys differing only in case are
# not allowed, and recommends case-insensitive
# implementations.
def pprint(self):
"""Return tag key=value pairs in a human-readable format."""
items = self.items()
items.sort()
return "\n".join(["%s=%s" % (k, v.pprint()) for k, v in items])
def load(self, filename):
"""Load tags from a filename."""
self.filename = filename
fileobj = file(filename, "rb")
try:
data = _APEv2Data(fileobj)
finally:
fileobj.close()
if data.tag:
self.clear()
self.__casemap.clear()
self.__parse_tag(data.tag, data.items)
else:
raise APENoHeaderError("No APE tag found")
def __parse_tag(self, tag, count):
fileobj = StringIO(tag)
for i in range(count):
size = cdata.uint_le(fileobj.read(4))
flags = cdata.uint_le(fileobj.read(4))
# Bits 1 and 2 bits are flags, 0-3
# Bit 0 is read/write flag, ignored
kind = (flags & 6) >> 1
if kind == 3:
raise APEBadItemError("value type must be 0, 1, or 2")
key = value = fileobj.read(1)
while key[-1:] != '\x00' and value:
value = fileobj.read(1)
key += value
if key[-1:] == "\x00":
key = key[:-1]
value = fileobj.read(size)
self[key] = APEValue(value, kind)
def __getitem__(self, key):
if not is_valid_apev2_key(key):
raise KeyError("%r is not a valid APEv2 key" % key)
return self.__dict[key.lower()]
def __delitem__(self, key):
if not is_valid_apev2_key(key):
raise KeyError("%r is not a valid APEv2 key" % key)
del(self.__dict[key.lower()])
def __setitem__(self, key, value):
"""'Magic' value setter.
This function tries to guess at what kind of value you want to
store. If you pass in a valid UTF-8 or Unicode string, it
treats it as a text value. If you pass in a list, it treats it
as a list of string/Unicode values. If you pass in a string
that is not valid UTF-8, it assumes it is a binary value.
If you need to force a specific type of value (e.g. binary
data that also happens to be valid UTF-8, or an external
reference), use the APEValue factory and set the value to the
result of that:
from lib.mutagen.apev2 import APEValue, EXTERNAL
tag['Website'] = APEValue('http://example.org', EXTERNAL)
"""
if not is_valid_apev2_key(key):
raise KeyError("%r is not a valid APEv2 key" % key)
if not isinstance(value, _APEValue):
# let's guess at the content if we're not already a value...
if isinstance(value, unicode):
# unicode? we've got to be text.
value = APEValue(utf8(value), TEXT)
elif isinstance(value, list):
# list? text.
value = APEValue("\0".join(map(utf8, value)), TEXT)
else:
try: dummy = value.decode("utf-8")
except UnicodeError:
# invalid UTF8 text, probably binary
value = APEValue(value, BINARY)
else:
# valid UTF8, probably text
value = APEValue(value, TEXT)
self.__casemap[key.lower()] = key
self.__dict[key.lower()] = value
def keys(self):
return [self.__casemap.get(key, key) for key in self.__dict.keys()]
def save(self, filename=None):
"""Save changes to a file.
If no filename is given, the one most recently loaded is used.
Tags are always written at the end of the file, and include
a header and a footer.
"""
filename = filename or self.filename
try:
fileobj = file(filename, "r+b")
except IOError:
fileobj = file(filename, "w+b")
data = _APEv2Data(fileobj)
if data.is_at_start:
delete_bytes(fileobj, data.end - data.start, data.start)
elif data.start is not None:
fileobj.seek(data.start)
# Delete an ID3v1 tag if present, too.
fileobj.truncate()
fileobj.seek(0, 2)
# "APE tags items should be sorted ascending by size... This is
# not a MUST, but STRONGLY recommended. Actually the items should
# be sorted by importance/byte, but this is not feasible."
tags = [v._internal(k) for k, v in self.items()]
tags.sort(lambda a, b: cmp(len(a), len(b)))
num_tags = len(tags)
tags = "".join(tags)
header = "APETAGEX%s%s" %(
# version, tag size, item count, flags
struct.pack("<4I", 2000, len(tags) + 32, num_tags,
HAS_HEADER | IS_HEADER),
"\0" * 8)
fileobj.write(header)
fileobj.write(tags)
footer = "APETAGEX%s%s" %(
# version, tag size, item count, flags
struct.pack("<4I", 2000, len(tags) + 32, num_tags,
HAS_HEADER),
"\0" * 8)
fileobj.write(footer)
fileobj.close()
def delete(self, filename=None):
"""Remove tags from a file."""
filename = filename or self.filename
fileobj = file(filename, "r+b")
try:
data = _APEv2Data(fileobj)
if data.start is not None and data.size is not None:
delete_bytes(fileobj, data.end - data.start, data.start)
finally:
fileobj.close()
self.clear()
Open = APEv2
def delete(filename):
"""Remove tags from a file."""
try: APEv2(filename).delete()
except APENoHeaderError: pass
def APEValue(value, kind):
"""APEv2 tag value factory.
Use this if you need to specify the value's type manually. Binary
and text data are automatically detected by APEv2.__setitem__.
"""
if kind == TEXT: return APETextValue(value, kind)
elif kind == BINARY: return APEBinaryValue(value, kind)
elif kind == EXTERNAL: return APEExtValue(value, kind)
else: raise ValueError("kind must be TEXT, BINARY, or EXTERNAL")
class _APEValue(object):
def __init__(self, value, kind):
self.kind = kind
self.value = value
def __len__(self):
return len(self.value)
def __str__(self):
return self.value
# Packed format for an item:
# 4B: Value length
# 4B: Value type
# Key name
# 1B: Null
# Key value
def _internal(self, key):
return "%s%s\0%s" %(
struct.pack("<2I", len(self.value), self.kind << 1),
key, self.value)
def __repr__(self):
return "%s(%r, %d)" % (type(self).__name__, self.value, self.kind)
class APETextValue(_APEValue):
"""An APEv2 text value.
Text values are Unicode/UTF-8 strings. They can be accessed like
strings (with a null seperating the values), or arrays of strings."""
def __unicode__(self):
return unicode(str(self), "utf-8")
def __iter__(self):
"""Iterate over the strings of the value (not the characters)"""
return iter(unicode(self).split("\0"))
def __getitem__(self, index):
return unicode(self).split("\0")[index]
def __len__(self):
return self.value.count("\0") + 1
def __cmp__(self, other):
return cmp(unicode(self), other)
def __setitem__(self, index, value):
values = list(self)
values[index] = value.encode("utf-8")
self.value = "\0".join(values).encode("utf-8")
def pprint(self):
return " / ".join(self)
class APEBinaryValue(_APEValue):
"""An APEv2 binary value."""
def pprint(self): return "[%d bytes]" % len(self)
class APEExtValue(_APEValue):
"""An APEv2 external value.
External values are usually URI or IRI strings.
"""
def pprint(self): return "[External] %s" % unicode(self)
class APEv2File(FileType):
class _Info(object):
length = 0
bitrate = 0
def __init__(self, fileobj): pass
pprint = staticmethod(lambda: "Unknown format with APEv2 tag.")
def load(self, filename):
self.filename = filename
self.info = self._Info(file(filename, "rb"))
try: self.tags = APEv2(filename)
except error: self.tags = None
def add_tags(self):
if self.tags is None:
self.tags = APEv2()
else:
raise ValueError("%r already has tags: %r" % (self, self.tags))
def score(filename, fileobj, header):
try: fileobj.seek(-160, 2)
except IOError:
fileobj.seek(0)
footer = fileobj.read()
filename = filename.lower()
return (("APETAGEX" in footer) - header.startswith("ID3"))
score = staticmethod(score)

670
lib/mutagen/asf.py Normal file
View File

@@ -0,0 +1,670 @@
# Copyright 2006-2007 Lukas Lalinsky
# Copyright 2005-2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: asf.py 4224 2007-12-03 09:01:49Z luks $
"""Read and write ASF (Window Media Audio) files."""
__all__ = ["ASF", "Open"]
import struct
from lib.mutagen import FileType, Metadata
from lib.mutagen._util import insert_bytes, delete_bytes, DictMixin
class error(IOError): pass
class ASFError(error): pass
class ASFHeaderError(error): pass
class ASFInfo(object):
"""ASF stream information."""
def __init__(self):
self.length = 0.0
self.sample_rate = 0
self.bitrate = 0
self.channels = 0
def pprint(self):
s = "Windows Media Audio %d bps, %s Hz, %d channels, %.2f seconds" % (
self.bitrate, self.sample_rate, self.channels, self.length)
return s
class ASFTags(list, DictMixin, Metadata):
"""Dictionary containing ASF attributes."""
def pprint(self):
return "\n".join(["%s=%s" % (k, v) for k, v in self])
def __getitem__(self, key):
"""A list of values for the key.
This is a copy, so comment['title'].append('a title') will not
work.
"""
values = [value for (k, value) in self if k == key]
if not values: raise KeyError, key
else: return values
def __delitem__(self, key):
"""Delete all values associated with the key."""
to_delete = filter(lambda x: x[0] == key, self)
if not to_delete: raise KeyError, key
else: map(self.remove, to_delete)
def __contains__(self, key):
"""Return true if the key has any values."""
for k, value in self:
if k == key: return True
else: return False
def __setitem__(self, key, values):
"""Set a key's value or values.
Setting a value overwrites all old ones. The value may be a
list of Unicode or UTF-8 strings, or a single Unicode or UTF-8
string.
"""
if not isinstance(values, list):
values = [values]
try: del(self[key])
except KeyError: pass
for value in values:
if key in _standard_attribute_names:
value = unicode(value)
elif not isinstance(value, ASFBaseAttribute):
if isinstance(value, basestring):
value = ASFUnicodeAttribute(value)
elif isinstance(value, bool):
value = ASFBoolAttribute(value)
elif isinstance(value, int):
value = ASFDWordAttribute(value)
elif isinstance(value, long):
value = ASFQWordAttribute(value)
self.append((key, value))
def keys(self):
"""Return all keys in the comment."""
return self and set(zip(*self)[0])
def as_dict(self):
"""Return a copy of the comment data in a real dict."""
d = {}
for key, value in self:
d.setdefault(key, []).append(value)
return d
class ASFBaseAttribute(object):
"""Generic attribute."""
TYPE = None
def __init__(self, value=None, data=None, language=None,
stream=None, **kwargs):
self.language = language
self.stream = stream
if data:
self.value = self.parse(data, **kwargs)
else:
self.value = value
def data_size(self):
raise NotImplementedError
def __repr__(self):
name = "%s(%r" % (type(self).__name__, self.value)
if self.language:
name += ", language=%d" % self.language
if self.stream:
name += ", stream=%d" % self.stream
name += ")"
return name
def render(self, name):
name = name.encode("utf-16-le") + "\x00\x00"
data = self._render()
return (struct.pack("<H", len(name)) + name +
struct.pack("<HH", self.TYPE, len(data)) + data)
def render_m(self, name):
name = name.encode("utf-16-le") + "\x00\x00"
if self.TYPE == 2:
data = self._render(dword=False)
else:
data = self._render()
return (struct.pack("<HHHHI", 0, self.stream or 0, len(name),
self.TYPE, len(data)) + name + data)
def render_ml(self, name):
name = name.encode("utf-16-le") + "\x00\x00"
if self.TYPE == 2:
data = self._render(dword=False)
else:
data = self._render()
return (struct.pack("<HHHHI", self.language or 0, self.stream or 0,
len(name), self.TYPE, len(data)) + name + data)
class ASFUnicodeAttribute(ASFBaseAttribute):
"""Unicode string attribute."""
TYPE = 0x0000
def parse(self, data):
return data.decode("utf-16-le").strip("\x00")
def _render(self):
return self.value.encode("utf-16-le") + "\x00\x00"
def data_size(self):
return len(self.value) * 2 + 2
def __str__(self):
return self.value
def __cmp__(self, other):
return cmp(unicode(self), other)
class ASFByteArrayAttribute(ASFBaseAttribute):
"""Byte array attribute."""
TYPE = 0x0001
def parse(self, data):
return data
def _render(self):
return self.value
def data_size(self):
return len(self.value)
def __str__(self):
return "[binary data (%s bytes)]" % len(self.value)
def __cmp__(self, other):
return cmp(str(self), other)
class ASFBoolAttribute(ASFBaseAttribute):
"""Bool attribute."""
TYPE = 0x0002
def parse(self, data, dword=True):
if dword:
return struct.unpack("<I", data)[0] == 1
else:
return struct.unpack("<H", data)[0] == 1
def _render(self, dword=True):
if dword:
return struct.pack("<I", int(self.value))
else:
return struct.pack("<H", int(self.value))
def data_size(self):
return 4
def __bool__(self):
return self.value
def __str__(self):
return str(self.value)
def __cmp__(self, other):
return cmp(bool(self), other)
class ASFDWordAttribute(ASFBaseAttribute):
"""DWORD attribute."""
TYPE = 0x0003
def parse(self, data):
return struct.unpack("<L", data)[0]
def _render(self):
return struct.pack("<L", self.value)
def data_size(self):
return 4
def __int__(self):
return self.value
def __str__(self):
return str(self.value)
def __cmp__(self, other):
return cmp(int(self), other)
class ASFQWordAttribute(ASFBaseAttribute):
"""QWORD attribute."""
TYPE = 0x0004
def parse(self, data):
return struct.unpack("<Q", data)[0]
def _render(self):
return struct.pack("<Q", self.value)
def data_size(self):
return 8
def __int__(self):
return self.value
def __str__(self):
return str(self.value)
def __cmp__(self, other):
return cmp(int(self), other)
class ASFWordAttribute(ASFBaseAttribute):
"""WORD attribute."""
TYPE = 0x0005
def parse(self, data):
return struct.unpack("<H", data)[0]
def _render(self):
return struct.pack("<H", self.value)
def data_size(self):
return 2
def __int__(self):
return self.value
def __str__(self):
return str(self.value)
def __cmp__(self, other):
return cmp(int(self), other)
class ASFGUIDAttribute(ASFBaseAttribute):
"""GUID attribute."""
TYPE = 0x0006
def parse(self, data):
return data
def _render(self):
return self.value
def data_size(self):
return len(self.value)
def __str__(self):
return self.value
def __cmp__(self, other):
return cmp(str(self), other)
UNICODE = ASFUnicodeAttribute.TYPE
BYTEARRAY = ASFByteArrayAttribute.TYPE
BOOL = ASFBoolAttribute.TYPE
DWORD = ASFDWordAttribute.TYPE
QWORD = ASFQWordAttribute.TYPE
WORD = ASFWordAttribute.TYPE
GUID = ASFGUIDAttribute.TYPE
def ASFValue(value, kind, **kwargs):
for t, c in _attribute_types.items():
if kind == t:
return c(value=value, **kwargs)
raise ValueError("Unknown value type")
_attribute_types = {
ASFUnicodeAttribute.TYPE: ASFUnicodeAttribute,
ASFByteArrayAttribute.TYPE: ASFByteArrayAttribute,
ASFBoolAttribute.TYPE: ASFBoolAttribute,
ASFDWordAttribute.TYPE: ASFDWordAttribute,
ASFQWordAttribute.TYPE: ASFQWordAttribute,
ASFWordAttribute.TYPE: ASFWordAttribute,
ASFGUIDAttribute.TYPE: ASFGUIDAttribute,
}
_standard_attribute_names = [
"Title",
"Author",
"Copyright",
"Description",
"Rating"
]
class BaseObject(object):
"""Base ASF object."""
GUID = None
def parse(self, asf, data, fileobj, size):
self.data = data
def render(self, asf):
data = self.GUID + struct.pack("<Q", len(self.data) + 24) + self.data
size = len(data)
return data
class UnknownObject(BaseObject):
"""Unknown ASF object."""
def __init__(self, guid):
self.GUID = guid
class HeaderObject(object):
"""ASF header."""
GUID = "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C"
class ContentDescriptionObject(BaseObject):
"""Content description."""
GUID = "\x33\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C"
def parse(self, asf, data, fileobj, size):
super(ContentDescriptionObject, self).parse(asf, data, fileobj, size)
asf.content_description_obj = self
lengths = struct.unpack("<HHHHH", data[:10])
texts = []
pos = 10
for length in lengths:
end = pos + length
if length > 0:
texts.append(data[pos:end].decode("utf-16-le").strip("\x00"))
else:
texts.append(None)
pos = end
title, author, copyright, desc, rating = texts
for key, value in dict(
Title=title,
Author=author,
Copyright=copyright,
Description=desc,
Rating=rating).items():
if value is not None:
asf.tags[key] = value
def render(self, asf):
def render_text(name):
value = asf.tags.get(name, [])
if value:
return value[0].encode("utf-16-le") + "\x00\x00"
else:
return ""
texts = map(render_text, _standard_attribute_names)
data = struct.pack("<HHHHH", *map(len, texts)) + "".join(texts)
return self.GUID + struct.pack("<Q", 24 + len(data)) + data
class ExtendedContentDescriptionObject(BaseObject):
"""Extended content description."""
GUID = "\x40\xA4\xD0\xD2\x07\xE3\xD2\x11\x97\xF0\x00\xA0\xC9\x5E\xA8\x50"
def parse(self, asf, data, fileobj, size):
super(ExtendedContentDescriptionObject, self).parse(asf, data, fileobj, size)
asf.extended_content_description_obj = self
num_attributes, = struct.unpack("<H", data[0:2])
pos = 2
for i in range(num_attributes):
name_length, = struct.unpack("<H", data[pos:pos+2])
pos += 2
name = data[pos:pos+name_length].decode("utf-16-le").strip("\x00")
pos += name_length
value_type, value_length = struct.unpack("<HH", data[pos:pos+4])
pos += 4
value = data[pos:pos+value_length]
pos += value_length
attr = _attribute_types[value_type](data=value)
asf.tags.append((name, attr))
def render(self, asf):
attrs = asf.to_extended_content_description.items()
data = "".join([attr.render(name) for (name, attr) in attrs])
data = struct.pack("<QH", 26 + len(data), len(attrs)) + data
return self.GUID + data
class FilePropertiesObject(BaseObject):
"""File properties."""
GUID = "\xA1\xDC\xAB\x8C\x47\xA9\xCF\x11\x8E\xE4\x00\xC0\x0C\x20\x53\x65"
def parse(self, asf, data, fileobj, size):
super(FilePropertiesObject, self).parse(asf, data, fileobj, size)
length, _, preroll = struct.unpack("<QQQ", data[40:64])
asf.info.length = length / 10000000.0 - preroll / 1000.0
class StreamPropertiesObject(BaseObject):
"""Stream properties."""
GUID = "\x91\x07\xDC\xB7\xB7\xA9\xCF\x11\x8E\xE6\x00\xC0\x0C\x20\x53\x65"
def parse(self, asf, data, fileobj, size):
super(StreamPropertiesObject, self).parse(asf, data, fileobj, size)
channels, sample_rate, bitrate = struct.unpack("<HII", data[56:66])
asf.info.channels = channels
asf.info.sample_rate = sample_rate
asf.info.bitrate = bitrate * 8
class HeaderExtensionObject(BaseObject):
"""Header extension."""
GUID = "\xb5\x03\xbf_.\xa9\xcf\x11\x8e\xe3\x00\xc0\x0c Se"
def parse(self, asf, data, fileobj, size):
super(HeaderExtensionObject, self).parse(asf, data, fileobj, size)
asf.header_extension_obj = self
datasize, = struct.unpack("<I", data[18:22])
datapos = 0
self.objects = []
while datapos < datasize:
guid, size = struct.unpack("<16sQ", data[22+datapos:22+datapos+24])
if guid in _object_types:
obj = _object_types[guid]()
else:
obj = UnknownObject(guid)
obj.parse(asf, data[22+datapos+24:22+datapos+size], fileobj, size)
self.objects.append(obj)
datapos += size
def render(self, asf):
data = "".join([obj.render(asf) for obj in self.objects])
return (self.GUID + struct.pack("<Q", 24 + 16 + 6 + len(data)) +
"\x11\xD2\xD3\xAB\xBA\xA9\xcf\x11" +
"\x8E\xE6\x00\xC0\x0C\x20\x53\x65" +
"\x06\x00" + struct.pack("<I", len(data)) + data)
class MetadataObject(BaseObject):
"""Metadata description."""
GUID = "\xea\xcb\xf8\xc5\xaf[wH\x84g\xaa\x8cD\xfaL\xca"
def parse(self, asf, data, fileobj, size):
super(MetadataObject, self).parse(asf, data, fileobj, size)
asf.metadata_obj = self
num_attributes, = struct.unpack("<H", data[0:2])
pos = 2
for i in range(num_attributes):
(reserved, stream, name_length, value_type,
value_length) = struct.unpack("<HHHHI", data[pos:pos+12])
pos += 12
name = data[pos:pos+name_length].decode("utf-16-le").strip("\x00")
pos += name_length
value = data[pos:pos+value_length]
pos += value_length
args = {'data': value, 'stream': stream}
if value_type == 2:
args['dword'] = False
attr = _attribute_types[value_type](**args)
asf.tags.append((name, attr))
def render(self, asf):
attrs = asf.to_metadata.items()
data = "".join([attr.render_m(name) for (name, attr) in attrs])
return (self.GUID + struct.pack("<QH", 26 + len(data), len(attrs)) +
data)
class MetadataLibraryObject(BaseObject):
"""Metadata library description."""
GUID = "\x94\x1c#D\x98\x94\xd1I\xa1A\x1d\x13NEpT"
def parse(self, asf, data, fileobj, size):
super(MetadataLibraryObject, self).parse(asf, data, fileobj, size)
asf.metadata_library_obj = self
num_attributes, = struct.unpack("<H", data[0:2])
pos = 2
for i in range(num_attributes):
(language, stream, name_length, value_type,
value_length) = struct.unpack("<HHHHI", data[pos:pos+12])
pos += 12
name = data[pos:pos+name_length].decode("utf-16-le").strip("\x00")
pos += name_length
value = data[pos:pos+value_length]
pos += value_length
args = {'data': value, 'language': language, 'stream': stream}
if value_type == 2:
args['dword'] = False
attr = _attribute_types[value_type](**args)
asf.tags.append((name, attr))
def render(self, asf):
attrs = asf.to_metadata_library
data = "".join([attr.render_ml(name) for (name, attr) in attrs])
return (self.GUID + struct.pack("<QH", 26 + len(data), len(attrs)) +
data)
_object_types = {
ExtendedContentDescriptionObject.GUID: ExtendedContentDescriptionObject,
ContentDescriptionObject.GUID: ContentDescriptionObject,
FilePropertiesObject.GUID: FilePropertiesObject,
StreamPropertiesObject.GUID: StreamPropertiesObject,
HeaderExtensionObject.GUID: HeaderExtensionObject,
MetadataLibraryObject.GUID: MetadataLibraryObject,
MetadataObject.GUID: MetadataObject,
}
class ASF(FileType):
"""An ASF file, probably containing WMA or WMV."""
_mimes = ["audio/x-ms-wma", "audio/x-ms-wmv", "video/x-ms-asf",
"audio/x-wma", "video/x-wmv"]
def load(self, filename):
self.filename = filename
fileobj = file(filename, "rb")
try:
self.size = 0
self.size1 = 0
self.size2 = 0
self.offset1 = 0
self.offset2 = 0
self.num_objects = 0
self.info = ASFInfo()
self.tags = ASFTags()
self.__read_file(fileobj)
finally:
fileobj.close()
def save(self):
# Move attributes to the right objects
self.to_extended_content_description = {}
self.to_metadata = {}
self.to_metadata_library = []
for name, value in self.tags:
if name in _standard_attribute_names:
continue
large_value = value.data_size() > 0xFFFF
if (value.language is None and value.stream is None and
name not in self.to_extended_content_description and
not large_value):
self.to_extended_content_description[name] = value
elif (value.language is None and value.stream is not None and
name not in self.to_metadata and not large_value):
self.to_metadata[name] = value
else:
self.to_metadata_library.append((name, value))
# Add missing objects
if not self.content_description_obj:
self.content_description_obj = \
ContentDescriptionObject()
self.objects.append(self.content_description_obj)
if not self.extended_content_description_obj:
self.extended_content_description_obj = \
ExtendedContentDescriptionObject()
self.objects.append(self.extended_content_description_obj)
if not self.header_extension_obj:
self.header_extension_obj = \
HeaderExtensionObject()
self.objects.append(self.header_extension_obj)
if not self.metadata_obj:
self.metadata_obj = \
MetadataObject()
self.header_extension_obj.objects.append(self.metadata_obj)
if not self.metadata_library_obj:
self.metadata_library_obj = \
MetadataLibraryObject()
self.header_extension_obj.objects.append(self.metadata_library_obj)
# Render the header
data = "".join([obj.render(self) for obj in self.objects])
data = (HeaderObject.GUID +
struct.pack("<QL", len(data) + 30, len(self.objects)) +
"\x01\x02" + data)
fileobj = file(self.filename, "rb+")
try:
size = len(data)
if size > self.size:
insert_bytes(fileobj, size - self.size, self.size)
if size < self.size:
delete_bytes(fileobj, self.size - size, 0)
fileobj.seek(0)
fileobj.write(data)
finally:
fileobj.close()
def __read_file(self, fileobj):
header = fileobj.read(30)
if len(header) != 30 or header[:16] != HeaderObject.GUID:
raise ASFHeaderError, "Not an ASF file."
self.extended_content_description_obj = None
self.content_description_obj = None
self.header_extension_obj = None
self.metadata_obj = None
self.metadata_library_obj = None
self.size, self.num_objects = struct.unpack("<QL", header[16:28])
self.objects = []
for i in range(self.num_objects):
self.__read_object(fileobj)
def __read_object(self, fileobj):
guid, size = struct.unpack("<16sQ", fileobj.read(24))
if guid in _object_types:
obj = _object_types[guid]()
else:
obj = UnknownObject(guid)
data = fileobj.read(size - 24)
obj.parse(self, data, fileobj, size)
self.objects.append(obj)
def score(filename, fileobj, header):
return header.startswith(HeaderObject.GUID) * 2
score = staticmethod(score)
Open = ASF

465
lib/mutagen/easyid3.py Normal file
View File

@@ -0,0 +1,465 @@
# Simpler (but far more limited) API for ID3 editing
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# $Id: id3.py 3086 2006-04-04 02:13:21Z piman $
"""Easier access to ID3 tags.
EasyID3 is a wrapper around mutagen.id3.ID3 to make ID3 tags appear
more like Vorbis or APEv2 tags.
"""
from fnmatch import fnmatchcase
import lib.mutagen.id3
from lib.mutagen import Metadata
from lib.mutagen._util import DictMixin, dict_match
from lib.mutagen.id3 import ID3, error, delete, ID3FileType
__all__ = ['EasyID3', 'Open', 'delete']
class EasyID3KeyError(KeyError, ValueError, error):
"""Raised when trying to get/set an invalid key.
Subclasses both KeyError and ValueError for API compatibility,
catching KeyError is preferred.
"""
class EasyID3(DictMixin, Metadata):
"""A file with an ID3 tag.
Like Vorbis comments, EasyID3 keys are case-insensitive ASCII
strings. Only a subset of ID3 frames are supported by default. Use
EasyID3.RegisterKey and its wrappers to support more.
You can also set the GetFallback, SetFallback, and DeleteFallback
to generic key getter/setter/deleter functions, which are called
if no specific handler is registered for a key. Additionally,
ListFallback can be used to supply an arbitrary list of extra
keys. These can be set on EasyID3 or on individual instances after
creation.
To use an EasyID3 class with mutagen.mp3.MP3:
from mutagen.mp3 import EasyMP3 as MP3
MP3(filename)
Because many of the attributes are constructed on the fly, things
like the following will not work:
ezid3["performer"].append("Joe")
Instead, you must do:
values = ezid3["performer"]
values.append("Joe")
ezid3["performer"] = values
"""
Set = {}
Get = {}
Delete = {}
List = {}
# For compatibility.
valid_keys = Get
GetFallback = None
SetFallback = None
DeleteFallback = None
ListFallback = None
def RegisterKey(cls, key,
getter=None, setter=None, deleter=None, lister=None):
"""Register a new key mapping.
A key mapping is four functions, a getter, setter, deleter,
and lister. The key may be either a string or a glob pattern.
The getter, deleted, and lister receive an ID3 instance and
the requested key name. The setter also receives the desired
value, which will be a list of strings.
The getter, setter, and deleter are used to implement __getitem__,
__setitem__, and __delitem__.
The lister is used to implement keys(). It should return a
list of keys that are actually in the ID3 instance, provided
by its associated getter.
"""
key = key.lower()
if getter is not None:
cls.Get[key] = getter
if setter is not None:
cls.Set[key] = setter
if deleter is not None:
cls.Delete[key] = deleter
if lister is not None:
cls.List[key] = lister
RegisterKey = classmethod(RegisterKey)
def RegisterTextKey(cls, key, frameid):
"""Register a text key.
If the key you need to register is a simple one-to-one mapping
of ID3 frame name to EasyID3 key, then you can use this
function:
EasyID3.RegisterTextKey("title", "TIT2")
"""
def getter(id3, key):
return list(id3[frameid])
def setter(id3, key, value):
try:
frame = id3[frameid]
except KeyError:
id3.add(lib.mutagen.id3.Frames[frameid](encoding=3, text=value))
else:
frame.encoding = 3
frame.text = value
def deleter(id3, key):
del(id3[frameid])
cls.RegisterKey(key, getter, setter, deleter)
RegisterTextKey = classmethod(RegisterTextKey)
def RegisterTXXXKey(cls, key, desc):
"""Register a user-defined text frame key.
Some ID3 tags are stored in TXXX frames, which allow a
freeform 'description' which acts as a subkey,
e.g. TXXX:BARCODE.
EasyID3.RegisterTXXXKey('barcode', 'BARCODE').
"""
frameid = "TXXX:" + desc
def getter(id3, key):
return list(id3[frameid])
def setter(id3, key, value):
try:
frame = id3[frameid]
except KeyError:
enc = 0
# Store 8859-1 if we can, per MusicBrainz spec.
for v in value:
if max(v) > u'\x7f':
enc = 3
id3.add(lib.mutagen.id3.TXXX(encoding=enc, text=value, desc=desc))
else:
frame.text = value
def deleter(id3, key):
del(id3[frameid])
cls.RegisterKey(key, getter, setter, deleter)
RegisterTXXXKey = classmethod(RegisterTXXXKey)
def __init__(self, filename=None):
self.__id3 = ID3()
self.load = self.__id3.load
self.save = self.__id3.save
self.delete = self.__id3.delete
if filename is not None:
self.load(filename)
filename = property(lambda s: s.__id3.filename,
lambda s, fn: setattr(s.__id3, 'filename', fn))
_size = property(lambda s: s._id3.size,
lambda s, fn: setattr(s.__id3, '_size', fn))
def __getitem__(self, key):
key = key.lower()
func = dict_match(self.Get, key, self.GetFallback)
if func is not None:
return func(self.__id3, key)
else:
raise EasyID3KeyError("%r is not a valid key" % key)
def __setitem__(self, key, value):
key = key.lower()
if isinstance(value, basestring):
value = [value]
func = dict_match(self.Set, key, self.SetFallback)
if func is not None:
return func(self.__id3, key, value)
else:
raise EasyID3KeyError("%r is not a valid key" % key)
def __delitem__(self, key):
key = key.lower()
func = dict_match(self.Delete, key, self.DeleteFallback)
if func is not None:
return func(self.__id3, key)
else:
raise EasyID3KeyError("%r is not a valid key" % key)
def keys(self):
keys = []
for key in self.Get.keys():
if key in self.List:
keys.extend(self.List[key](self.__id3, key))
elif key in self:
keys.append(key)
if self.ListFallback is not None:
keys.extend(self.ListFallback(self.__id3, ""))
return keys
def pprint(self):
"""Print tag key=value pairs."""
strings = []
for key in sorted(self.keys()):
values = self[key]
for value in values:
strings.append("%s=%s" % (key, value))
return "\n".join(strings)
Open = EasyID3
def genre_get(id3, key):
return id3["TCON"].genres
def genre_set(id3, key, value):
try:
frame = id3["TCON"]
except KeyError:
id3.add(lib.mutagen.id3.TCON(encoding=3, text=value))
else:
frame.encoding = 3
frame.genres = value
def genre_delete(id3, key):
del(id3["TCON"])
def date_get(id3, key):
return [stamp.text for stamp in id3["TDRC"].text]
def date_set(id3, key, value):
id3.add(lib.mutagen.id3.TDRC(encoding=3, text=value))
def date_delete(id3, key):
del(id3["TDRC"])
def performer_get(id3, key):
people = []
wanted_role = key.split(":", 1)[1]
try:
mcl = id3["TMCL"]
except KeyError:
raise KeyError(key)
for role, person in mcl.people:
if role == wanted_role:
people.append(person)
if people:
return people
else:
raise KeyError(key)
def performer_set(id3, key, value):
wanted_role = key.split(":", 1)[1]
try:
mcl = id3["TMCL"]
except KeyError:
mcl = lib.mutagen.id3.TMCL(encoding=3, people=[])
id3.add(mcl)
mcl.encoding = 3
people = [p for p in mcl.people if p[0] != wanted_role]
for v in value:
people.append((wanted_role, v))
mcl.people = people
def performer_delete(id3, key):
wanted_role = key.split(":", 1)[1]
try:
mcl = id3["TMCL"]
except KeyError:
raise KeyError(key)
people = [p for p in mcl.people if p[0] != wanted_role]
if people == mcl.people:
raise KeyError(key)
elif people:
mcl.people = people
else:
del(id3["TMCL"])
def performer_list(id3, key):
try: mcl = id3["TMCL"]
except KeyError:
return []
else:
return list(set("performer:" + p[0] for p in mcl.people))
def musicbrainz_trackid_get(id3, key):
return [id3["UFID:http://musicbrainz.org"].data.decode('ascii')]
def musicbrainz_trackid_set(id3, key, value):
if len(value) != 1:
raise ValueError("only one track ID may be set per song")
value = value[0].encode('ascii')
try:
frame = id3["UFID:http://musicbrainz.org"]
except KeyError:
frame = lib.mutagen.id3.UFID(owner="http://musicbrainz.org", data=value)
id3.add(frame)
else:
frame.data = value
def musicbrainz_trackid_delete(id3, key):
del(id3["UFID:http://musicbrainz.org"])
def website_get(id3, key):
urls = [frame.url for frame in id3.getall("WOAR")]
if urls:
return urls
else:
raise EasyID3KeyError(key)
def website_set(id3, key, value):
id3.delall("WOAR")
for v in value:
id3.add(lib.mutagen.id3.WOAR(url=v))
def website_delete(id3, key):
id3.delall("WOAR")
def gain_get(id3, key):
try:
frame = id3["RVA2:" + key[11:-5]]
except KeyError:
raise EasyID3KeyError(key)
else:
return [u"%+f dB" % frame.gain]
def gain_set(id3, key, value):
if len(value) != 1:
raise ValueError("there must be exactly one gain value, not %r.", value)
gain = float(value[0].split()[0])
try:
frame = id3["RVA2:" + key[11:-5]]
except KeyError:
frame = lib.mutagen.id3.RVA2(desc=key[11:-5], gain=0, peak=0, channel=1)
id3.add(frame)
frame.gain = gain
def gain_delete(id3, key):
try:
frame = id3["RVA2:" + key[11:-5]]
except KeyError:
pass
else:
if frame.peak:
frame.gain = 0.0
else:
del(id3["RVA2:" + key[11:-5]])
def peak_get(id3, key):
try:
frame = id3["RVA2:" + key[11:-5]]
except KeyError:
raise EasyID3KeyError(key)
else:
return [u"%f" % frame.peak]
def peak_set(id3, key, value):
if len(value) != 1:
raise ValueError("there must be exactly one peak value, not %r.", value)
peak = float(value[0])
if peak >= 2 or peak < 0:
raise ValueError("peak must be => 0 and < 2.")
try:
frame = id3["RVA2:" + key[11:-5]]
except KeyError:
frame = lib.mutagen.id3.RVA2(desc=key[11:-5], gain=0, peak=0, channel=1)
id3.add(frame)
frame.peak = peak
def peak_delete(id3, key):
try:
frame = id3["RVA2:" + key[11:-5]]
except KeyError:
pass
else:
if frame.gain:
frame.peak = 0.0
else:
del(id3["RVA2:" + key[11:-5]])
def peakgain_list(id3, key):
keys = []
for frame in id3.getall("RVA2"):
keys.append("replaygain_%s_gain" % frame.desc)
keys.append("replaygain_%s_peak" % frame.desc)
return keys
for frameid, key in {
"TALB": "album",
"TBPM": "bpm",
"TCMP": "compilation", # iTunes extension
"TCOM": "composer",
"TCOP": "copyright",
"TENC": "encodedby",
"TEXT": "lyricist",
"TLEN": "length",
"TMED": "media",
"TMOO": "mood",
"TIT2": "title",
"TIT3": "version",
"TPE1": "artist",
"TPE2": "performer",
"TPE3": "conductor",
"TPE4": "arranger",
"TPOS": "discnumber",
"TPUB": "organization",
"TRCK": "tracknumber",
"TOLY": "author",
"TSO2": "albumartistsort", # iTunes extension
"TSOA": "albumsort",
"TSOC": "composersort", # iTunes extension
"TSOP": "artistsort",
"TSOT": "titlesort",
"TSRC": "isrc",
"TSST": "discsubtitle",
}.iteritems():
EasyID3.RegisterTextKey(key, frameid)
EasyID3.RegisterKey("genre", genre_get, genre_set, genre_delete)
EasyID3.RegisterKey("date", date_get, date_set, date_delete)
EasyID3.RegisterKey(
"performer:*", performer_get, performer_set, performer_delete,
performer_list)
EasyID3.RegisterKey("musicbrainz_trackid", musicbrainz_trackid_get,
musicbrainz_trackid_set, musicbrainz_trackid_delete)
EasyID3.RegisterKey("website", website_get, website_set, website_delete)
EasyID3.RegisterKey("website", website_get, website_set, website_delete)
EasyID3.RegisterKey(
"replaygain_*_gain", gain_get, gain_set, gain_delete, peakgain_list)
EasyID3.RegisterKey("replaygain_*_peak", peak_get, peak_set, peak_delete)
# At various times, information for this came from
# http://musicbrainz.org/docs/specs/metadata_tags.html
# http://bugs.musicbrainz.org/ticket/1383
# http://musicbrainz.org/doc/MusicBrainzTag
for desc, key in {
u"MusicBrainz Artist Id": "musicbrainz_artistid",
u"MusicBrainz Album Id": "musicbrainz_albumid",
u"MusicBrainz Album Artist Id": "musicbrainz_albumartistid",
u"MusicBrainz TRM Id": "musicbrainz_trmid",
u"MusicIP PUID": "musicip_puid",
u"MusicMagic Fingerprint": "musicip_fingerprint",
u"MusicBrainz Album Status": "musicbrainz_albumstatus",
u"MusicBrainz Album Type": "musicbrainz_albumtype",
u"MusicBrainz Album Release Country": "releasecountry",
u"MusicBrainz Disc Id": "musicbrainz_discid",
u"ASIN": "asin",
u"ALBUMARTISTSORT": "albumartistsort",
u"BARCODE": "barcode",
}.iteritems():
EasyID3.RegisterTXXXKey(key, desc)
class EasyID3FileType(ID3FileType):
"""Like ID3FileType, but uses EasyID3 for tags."""
ID3 = EasyID3

249
lib/mutagen/easymp4.py Normal file
View File

@@ -0,0 +1,249 @@
import lib.mutagen.mp4
from lib.mutagen import Metadata
from lib.mutagen._util import DictMixin, dict_match, utf8
from libmutagen.mp4 import MP4, MP4Tags, error, delete
__all__ = ["EasyMP4Tags", "EasyMP4", "delete", "error"]
class EasyMP4KeyError(error, KeyError, ValueError):
pass
class EasyMP4Tags(DictMixin, Metadata):
"""A file with MPEG-4 iTunes metadata.
Like Vorbis comments, EasyMP4Tags keys are case-insensitive ASCII
strings, and values are a list of Unicode strings (and these lists
are always of length 0 or 1). If you need access to the full MP4
metadata feature set, you should use MP4, not EasyMP4.
"""
Set = {}
Get = {}
Delete = {}
List = {}
def __init__(self, *args, **kwargs):
self.__mp4 = MP4Tags(*args, **kwargs)
self.load = self.__mp4.load
self.save = self.__mp4.save
self.delete = self.__mp4.delete
filename = property(lambda s: s.__mp4.filename,
lambda s, fn: setattr(s.__mp4, 'filename', fn))
def RegisterKey(cls, key,
getter=None, setter=None, deleter=None, lister=None):
"""Register a new key mapping.
A key mapping is four functions, a getter, setter, deleter,
and lister. The key may be either a string or a glob pattern.
The getter, deleted, and lister receive an MP4Tags instance
and the requested key name. The setter also receives the
desired value, which will be a list of strings.
The getter, setter, and deleter are used to implement __getitem__,
__setitem__, and __delitem__.
The lister is used to implement keys(). It should return a
list of keys that are actually in the MP4 instance, provided
by its associated getter.
"""
key = key.lower()
if getter is not None:
cls.Get[key] = getter
if setter is not None:
cls.Set[key] = setter
if deleter is not None:
cls.Delete[key] = deleter
if lister is not None:
cls.List[key] = lister
RegisterKey = classmethod(RegisterKey)
def RegisterTextKey(cls, key, atomid):
"""Register a text key.
If the key you need to register is a simple one-to-one mapping
of MP4 atom name to EasyMP4Tags key, then you can use this
function:
EasyMP4Tags.RegisterTextKey("artist", "\xa9ART")
"""
def getter(tags, key):
return tags[atomid]
def setter(tags, key, value):
tags[atomid] = value
def deleter(tags, key):
del(tags[atomid])
cls.RegisterKey(key, getter, setter, deleter)
RegisterTextKey = classmethod(RegisterTextKey)
def RegisterIntKey(cls, key, atomid, min_value=0, max_value=2**16-1):
"""Register a scalar integer key.
"""
def getter(tags, key):
return map(unicode, tags[atomid])
def setter(tags, key, value):
clamp = lambda x: int(min(max(min_value, x), max_value))
tags[atomid] = map(clamp, map(int, value))
def deleter(tags, key):
del(tags[atomid])
cls.RegisterKey(key, getter, setter, deleter)
RegisterIntKey = classmethod(RegisterIntKey)
def RegisterIntPairKey(cls, key, atomid, min_value=0, max_value=2**16-1):
def getter(tags, key):
ret = []
for (track, total) in tags[atomid]:
if total:
ret.append(u"%d/%d" % (track, total))
else:
ret.append(unicode(track))
return ret
def setter(tags, key, value):
clamp = lambda x: int(min(max(min_value, x), max_value))
data = []
for v in value:
try:
tracks, total = v.split("/")
tracks = clamp(int(tracks))
total = clamp(int(total))
except (ValueError, TypeError):
tracks = clamp(int(v))
total = min_value
data.append((tracks, total))
tags[atomid] = data
def deleter(tags, key):
del(tags[atomid])
cls.RegisterKey(key, getter, setter, deleter)
RegisterIntPairKey = classmethod(RegisterIntPairKey)
def RegisterFreeformKey(cls, key, name, mean="com.apple.iTunes"):
"""Register a text key.
If the key you need to register is a simple one-to-one mapping
of MP4 freeform atom (----) and name to EasyMP4Tags key, then
you can use this function:
EasyMP4Tags.RegisterFreeformKey(
"musicbrainz_artistid", "MusicBrainz Artist Id")
"""
atomid = "----:%s:%s" % (mean, name)
def getter(tags, key):
return [s.decode("utf-8", "replace") for s in tags[atomid]]
def setter(tags, key, value):
tags[atomid] = map(utf8, value)
def deleter(tags, key):
del(tags[atomid])
cls.RegisterKey(key, getter, setter, deleter)
RegisterFreeformKey = classmethod(RegisterFreeformKey)
def __getitem__(self, key):
key = key.lower()
func = dict_match(self.Get, key)
if func is not None:
return func(self.__mp4, key)
else:
raise EasyMP4KeyError("%r is not a valid key" % key)
def __setitem__(self, key, value):
key = key.lower()
if isinstance(value, basestring):
value = [value]
func = dict_match(self.Set, key)
if func is not None:
return func(self.__mp4, key, value)
else:
raise EasyMP4KeyError("%r is not a valid key" % key)
def __delitem__(self, key):
key = key.lower()
func = dict_match(self.Delete, key)
if func is not None:
return func(self.__mp4, key)
else:
raise EasyMP4KeyError("%r is not a valid key" % key)
def keys(self):
keys = []
for key in self.Get.keys():
if key in self.List:
keys.extend(self.List[key](self.__mp4, key))
elif key in self:
keys.append(key)
return keys
def pprint(self):
"""Print tag key=value pairs."""
strings = []
for key in sorted(self.keys()):
values = self[key]
for value in values:
strings.append("%s=%s" % (key, value))
return "\n".join(strings)
for atomid, key in {
'\xa9nam': 'title',
'\xa9alb': 'album',
'\xa9ART': 'artist',
'aART': 'albumartist',
'\xa9day': 'date',
'\xa9cmt': 'comment',
'desc': 'description',
'\xa9grp': 'grouping',
'\xa9gen': 'genre',
'cprt': 'copyright',
'soal': 'albumsort',
'soaa': 'albumartistsort',
'soar': 'artistsort',
'sonm': 'titlesort',
'soco': 'composersort',
}.items():
EasyMP4Tags.RegisterTextKey(key, atomid)
for name, key in {
'MusicBrainz Artist Id': 'musicbrainz_artistid',
'MusicBrainz Track Id': 'musicbrainz_trackid',
'MusicBrainz Album Id': 'musicbrainz_albumid',
'MusicBrainz Album Artist Id': 'musicbrainz_albumartistid',
'MusicIP PUID': 'musicip_puid',
'MusicBrainz Album Status': 'musicbrainz_albumstatus',
'MusicBrainz Album Type': 'musicbrainz_albumtype',
'MusicBrainz Release Country': 'releasecountry',
}.items():
EasyMP4Tags.RegisterFreeformKey(key, name)
for name, key in {
"tmpo": "bpm",
}.items():
EasyMP4Tags.RegisterIntKey(key, name)
for name, key in {
"trkn": "tracknumber",
"disk": "discnumber",
}.items():
EasyMP4Tags.RegisterIntPairKey(key, name)
class EasyMP4(MP4):
"""Like MP4, but uses EasyMP4Tags for tags."""
MP4Tags = EasyMP4Tags
Get = EasyMP4Tags.Get
Set = EasyMP4Tags.Set
Delete = EasyMP4Tags.Delete
List = EasyMP4Tags.List
RegisterTextKey = EasyMP4Tags.RegisterTextKey
RegisterKey = EasyMP4Tags.RegisterKey

692
lib/mutagen/flac.py Normal file
View File

@@ -0,0 +1,692 @@
# FLAC comment support for Mutagen
# Copyright 2005 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
"""Read and write FLAC Vorbis comments and stream information.
Read more about FLAC at http://flac.sourceforge.net.
FLAC supports arbitrary metadata blocks. The two most interesting ones
are the FLAC stream information block, and the Vorbis comment block;
these are also the only ones Mutagen can currently read.
This module does not handle Ogg FLAC files.
Based off documentation available at
http://flac.sourceforge.net/format.html
"""
__all__ = ["FLAC", "Open", "delete"]
import struct
from cStringIO import StringIO
from _vorbis import VCommentDict
from lib.mutagen import FileType
from lib.mutagen._util import insert_bytes
from lib.mutagen.id3 import BitPaddedInt
class error(IOError): pass
class FLACNoHeaderError(error): pass
class FLACVorbisError(ValueError, error): pass
def to_int_be(string):
"""Convert an arbitrarily-long string to a long using big-endian
byte order."""
return reduce(lambda a, b: (a << 8) + ord(b), string, 0L)
class MetadataBlock(object):
"""A generic block of FLAC metadata.
This class is extended by specific used as an ancestor for more specific
blocks, and also as a container for data blobs of unknown blocks.
Attributes:
data -- raw binary data for this block
"""
def __init__(self, data):
"""Parse the given data string or file-like as a metadata block.
The metadata header should not be included."""
if data is not None:
if isinstance(data, str): data = StringIO(data)
elif not hasattr(data, 'read'):
raise TypeError(
"StreamInfo requires string data or a file-like")
self.load(data)
def load(self, data): self.data = data.read()
def write(self): return self.data
def writeblocks(blocks):
"""Render metadata block as a byte string."""
data = []
codes = [[block.code, block.write()] for block in blocks]
codes[-1][0] |= 128
for code, datum in codes:
byte = chr(code)
if len(datum) > 2**24:
raise error("block is too long to write")
length = struct.pack(">I", len(datum))[-3:]
data.append(byte + length + datum)
return "".join(data)
writeblocks = staticmethod(writeblocks)
def group_padding(blocks):
"""Consolidate FLAC padding metadata blocks.
The overall size of the rendered blocks does not change, so
this adds several bytes of padding for each merged block."""
paddings = filter(lambda x: isinstance(x, Padding), blocks)
map(blocks.remove, paddings)
padding = Padding()
# total padding size is the sum of padding sizes plus 4 bytes
# per removed header.
size = sum([padding.length for padding in paddings])
padding.length = size + 4 * (len(paddings) - 1)
blocks.append(padding)
group_padding = staticmethod(group_padding)
class StreamInfo(MetadataBlock):
"""FLAC stream information.
This contains information about the audio data in the FLAC file.
Unlike most stream information objects in Mutagen, changes to this
one will rewritten to the file when it is saved. Unless you are
actually changing the audio stream itself, don't change any
attributes of this block.
Attributes:
min_blocksize -- minimum audio block size
max_blocksize -- maximum audio block size
sample_rate -- audio sample rate in Hz
channels -- audio channels (1 for mono, 2 for stereo)
bits_per_sample -- bits per sample
total_samples -- total samples in file
length -- audio length in seconds
"""
code = 0
def __eq__(self, other):
try: return (self.min_blocksize == other.min_blocksize and
self.max_blocksize == other.max_blocksize and
self.sample_rate == other.sample_rate and
self.channels == other.channels and
self.bits_per_sample == other.bits_per_sample and
self.total_samples == other.total_samples)
except: return False
def load(self, data):
self.min_blocksize = int(to_int_be(data.read(2)))
self.max_blocksize = int(to_int_be(data.read(2)))
self.min_framesize = int(to_int_be(data.read(3)))
self.max_framesize = int(to_int_be(data.read(3)))
# first 16 bits of sample rate
sample_first = to_int_be(data.read(2))
# last 4 bits of sample rate, 3 of channels, first 1 of bits/sample
sample_channels_bps = to_int_be(data.read(1))
# last 4 of bits/sample, 36 of total samples
bps_total = to_int_be(data.read(5))
sample_tail = sample_channels_bps >> 4
self.sample_rate = int((sample_first << 4) + sample_tail)
self.channels = int(((sample_channels_bps >> 1) & 7) + 1)
bps_tail = bps_total >> 36
bps_head = (sample_channels_bps & 1) << 4
self.bits_per_sample = int(bps_head + bps_tail + 1)
self.total_samples = bps_total & 0xFFFFFFFFFL
self.length = self.total_samples / float(self.sample_rate)
self.md5_signature = to_int_be(data.read(16))
def write(self):
f = StringIO()
f.write(struct.pack(">I", self.min_blocksize)[-2:])
f.write(struct.pack(">I", self.max_blocksize)[-2:])
f.write(struct.pack(">I", self.min_framesize)[-3:])
f.write(struct.pack(">I", self.max_framesize)[-3:])
# first 16 bits of sample rate
f.write(struct.pack(">I", self.sample_rate >> 4)[-2:])
# 4 bits sample, 3 channel, 1 bps
byte = (self.sample_rate & 0xF) << 4
byte += ((self.channels - 1) & 7) << 1
byte += ((self.bits_per_sample - 1) >> 4) & 1
f.write(chr(byte))
# 4 bits of bps, 4 of sample count
byte = ((self.bits_per_sample - 1) & 0xF) << 4
byte += (self.total_samples >> 32) & 0xF
f.write(chr(byte))
# last 32 of sample count
f.write(struct.pack(">I", self.total_samples & 0xFFFFFFFFL))
# MD5 signature
sig = self.md5_signature
f.write(struct.pack(
">4I", (sig >> 96) & 0xFFFFFFFFL, (sig >> 64) & 0xFFFFFFFFL,
(sig >> 32) & 0xFFFFFFFFL, sig & 0xFFFFFFFFL))
return f.getvalue()
def pprint(self):
return "FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate)
class SeekPoint(tuple):
"""A single seek point in a FLAC file.
Placeholder seek points have first_sample of 0xFFFFFFFFFFFFFFFFL,
and byte_offset and num_samples undefined. Seek points must be
sorted in ascending order by first_sample number. Seek points must
be unique by first_sample number, except for placeholder
points. Placeholder points must occur last in the table and there
may be any number of them.
Attributes:
first_sample -- sample number of first sample in the target frame
byte_offset -- offset from first frame to target frame
num_samples -- number of samples in target frame
"""
def __new__(cls, first_sample, byte_offset, num_samples):
return super(cls, SeekPoint).__new__(cls, (first_sample,
byte_offset, num_samples))
first_sample = property(lambda self: self[0])
byte_offset = property(lambda self: self[1])
num_samples = property(lambda self: self[2])
class SeekTable(MetadataBlock):
"""Read and write FLAC seek tables.
Attributes:
seekpoints -- list of SeekPoint objects
"""
__SEEKPOINT_FORMAT = '>QQH'
__SEEKPOINT_SIZE = struct.calcsize(__SEEKPOINT_FORMAT)
code = 3
def __init__(self, data):
self.seekpoints = []
super(SeekTable, self).__init__(data)
def __eq__(self, other):
try: return (self.seekpoints == other.seekpoints)
except (AttributeError, TypeError): return False
def load(self, data):
self.seekpoints = []
sp = data.read(self.__SEEKPOINT_SIZE)
while len(sp) == self.__SEEKPOINT_SIZE:
self.seekpoints.append(SeekPoint(
*struct.unpack(self.__SEEKPOINT_FORMAT, sp)))
sp = data.read(self.__SEEKPOINT_SIZE)
def write(self):
f = StringIO()
for seekpoint in self.seekpoints:
packed = struct.pack(self.__SEEKPOINT_FORMAT,
seekpoint.first_sample, seekpoint.byte_offset,
seekpoint.num_samples)
f.write(packed)
return f.getvalue()
def __repr__(self):
return "<%s seekpoints=%r>" % (type(self).__name__, self.seekpoints)
class VCFLACDict(VCommentDict):
"""Read and write FLAC Vorbis comments.
FLACs don't use the framing bit at the end of the comment block.
So this extends VCommentDict to not use the framing bit.
"""
code = 4
def load(self, data, errors='replace', framing=False):
super(VCFLACDict, self).load(data, errors=errors, framing=framing)
def write(self, framing=False):
return super(VCFLACDict, self).write(framing=framing)
class CueSheetTrackIndex(tuple):
"""Index for a track in a cuesheet.
For CD-DA, an index_number of 0 corresponds to the track
pre-gap. The first index in a track must have a number of 0 or 1,
and subsequently, index_numbers must increase by 1. Index_numbers
must be unique within a track. And index_offset must be evenly
divisible by 588 samples.
Attributes:
index_number -- index point number
index_offset -- offset in samples from track start
"""
def __new__(cls, index_number, index_offset):
return super(cls, CueSheetTrackIndex).__new__(cls,
(index_number, index_offset))
index_number = property(lambda self: self[0])
index_offset = property(lambda self: self[1])
class CueSheetTrack(object):
"""A track in a cuesheet.
For CD-DA, track_numbers must be 1-99, or 170 for the
lead-out. Track_numbers must be unique within a cue sheet. There
must be atleast one index in every track except the lead-out track
which must have none.
Attributes:
track_number -- track number
start_offset -- track offset in samples from start of FLAC stream
isrc -- ISRC code
type -- 0 for audio, 1 for digital data
pre_emphasis -- true if the track is recorded with pre-emphasis
indexes -- list of CueSheetTrackIndex objects
"""
def __init__(self, track_number, start_offset, isrc='', type_=0,
pre_emphasis=False):
self.track_number = track_number
self.start_offset = start_offset
self.isrc = isrc
self.type = type_
self.pre_emphasis = pre_emphasis
self.indexes = []
def __eq__(self, other):
try: return (self.track_number == other.track_number and
self.start_offset == other.start_offset and
self.isrc == other.isrc and
self.type == other.type and
self.pre_emphasis == other.pre_emphasis and
self.indexes == other.indexes)
except (AttributeError, TypeError): return False
def __repr__(self):
return ("<%s number=%r, offset=%d, isrc=%r, type=%r, "
"pre_emphasis=%r, indexes=%r)>") % (
type(self).__name__, self.track_number, self.start_offset,
self.isrc, self.type, self.pre_emphasis, self.indexes)
class CueSheet(MetadataBlock):
"""Read and write FLAC embedded cue sheets.
Number of tracks should be from 1 to 100. There should always be
exactly one lead-out track and that track must be the last track
in the cue sheet.
Attributes:
media_catalog_number -- media catalog number in ASCII
lead_in_samples -- number of lead-in samples
compact_disc -- true if the cuesheet corresponds to a compact disc
tracks -- list of CueSheetTrack objects
lead_out -- lead-out as CueSheetTrack or None if lead-out was not found
"""
__CUESHEET_FORMAT = '>128sQB258xB'
__CUESHEET_SIZE = struct.calcsize(__CUESHEET_FORMAT)
__CUESHEET_TRACK_FORMAT = '>QB12sB13xB'
__CUESHEET_TRACK_SIZE = struct.calcsize(__CUESHEET_TRACK_FORMAT)
__CUESHEET_TRACKINDEX_FORMAT = '>QB3x'
__CUESHEET_TRACKINDEX_SIZE = struct.calcsize(__CUESHEET_TRACKINDEX_FORMAT)
code = 5
media_catalog_number = ''
lead_in_samples = 88200
compact_disc = True
def __init__(self, data):
self.tracks = []
super(CueSheet, self).__init__(data)
def __eq__(self, other):
try:
return (self.media_catalog_number == other.media_catalog_number and
self.lead_in_samples == other.lead_in_samples and
self.compact_disc == other.compact_disc and
self.tracks == other.tracks)
except (AttributeError, TypeError): return False
def load(self, data):
header = data.read(self.__CUESHEET_SIZE)
media_catalog_number, lead_in_samples, flags, num_tracks = \
struct.unpack(self.__CUESHEET_FORMAT, header)
self.media_catalog_number = media_catalog_number.rstrip('\0')
self.lead_in_samples = lead_in_samples
self.compact_disc = bool(flags & 0x80)
self.tracks = []
for i in range(num_tracks):
track = data.read(self.__CUESHEET_TRACK_SIZE)
start_offset, track_number, isrc_padded, flags, num_indexes = \
struct.unpack(self.__CUESHEET_TRACK_FORMAT, track)
isrc = isrc_padded.rstrip('\0')
type_ = (flags & 0x80) >> 7
pre_emphasis = bool(flags & 0x40)
val = CueSheetTrack(
track_number, start_offset, isrc, type_, pre_emphasis)
for j in range(num_indexes):
index = data.read(self.__CUESHEET_TRACKINDEX_SIZE)
index_offset, index_number = struct.unpack(
self.__CUESHEET_TRACKINDEX_FORMAT, index)
val.indexes.append(
CueSheetTrackIndex(index_number, index_offset))
self.tracks.append(val)
def write(self):
f = StringIO()
flags = 0
if self.compact_disc: flags |= 0x80
packed = struct.pack(
self.__CUESHEET_FORMAT, self.media_catalog_number,
self.lead_in_samples, flags, len(self.tracks))
f.write(packed)
for track in self.tracks:
track_flags = 0
track_flags |= (track.type & 1) << 7
if track.pre_emphasis: track_flags |= 0x40
track_packed = struct.pack(
self.__CUESHEET_TRACK_FORMAT, track.start_offset,
track.track_number, track.isrc, track_flags,
len(track.indexes))
f.write(track_packed)
for index in track.indexes:
index_packed = struct.pack(
self.__CUESHEET_TRACKINDEX_FORMAT,
index.index_offset, index.index_number)
f.write(index_packed)
return f.getvalue()
def __repr__(self):
return ("<%s media_catalog_number=%r, lead_in=%r, compact_disc=%r, "
"tracks=%r>") % (
type(self).__name__, self.media_catalog_number,
self.lead_in_samples, self.compact_disc, self.tracks)
class Picture(MetadataBlock):
"""Read and write FLAC embed pictures.
Attributes:
type -- picture type (same as types for ID3 APIC frames)
mime -- MIME type of the picture
desc -- picture's description
width -- width in pixels
height -- height in pixels
depth -- color depth in bits-per-pixel
colors -- number of colors for indexed palettes (like GIF),
0 for non-indexed
data -- picture data
"""
code = 6
def __init__(self, data=None):
self.type = 0
self.mime = u''
self.desc = u''
self.width = 0
self.height = 0
self.depth = 0
self.colors = 0
self.data = ''
super(Picture, self).__init__(data)
def __eq__(self, other):
try: return (self.type == other.type and
self.mime == other.mime and
self.desc == other.desc and
self.width == other.width and
self.height == other.height and
self.depth == other.depth and
self.colors == other.colors and
self.data == other.data)
except (AttributeError, TypeError): return False
def load(self, data):
self.type, length = struct.unpack('>2I', data.read(8))
self.mime = data.read(length).decode('UTF-8', 'replace')
length, = struct.unpack('>I', data.read(4))
self.desc = data.read(length).decode('UTF-8', 'replace')
(self.width, self.height, self.depth,
self.colors, length) = struct.unpack('>5I', data.read(20))
self.data = data.read(length)
def write(self):
f = StringIO()
mime = self.mime.encode('UTF-8')
f.write(struct.pack('>2I', self.type, len(mime)))
f.write(mime)
desc = self.desc.encode('UTF-8')
f.write(struct.pack('>I', len(desc)))
f.write(desc)
f.write(struct.pack('>5I', self.width, self.height, self.depth,
self.colors, len(self.data)))
f.write(self.data)
return f.getvalue()
def __repr__(self):
return "<%s '%s' (%d bytes)>" % (type(self).__name__, self.mime,
len(self.data))
class Padding(MetadataBlock):
"""Empty padding space for metadata blocks.
To avoid rewriting the entire FLAC file when editing comments,
metadata is often padded. Padding should occur at the end, and no
more than one padding block should be in any FLAC file. Mutagen
handles this with MetadataBlock.group_padding.
"""
code = 1
def __init__(self, data=""): super(Padding, self).__init__(data)
def load(self, data): self.length = len(data.read())
def write(self):
try: return "\x00" * self.length
# On some 64 bit platforms this won't generate a MemoryError
# or OverflowError since you might have enough RAM, but it
# still generates a ValueError. On other 64 bit platforms,
# this will still succeed for extremely large values.
# Those should never happen in the real world, and if they
# do, writeblocks will catch it.
except (OverflowError, ValueError, MemoryError):
raise error("cannot write %d bytes" % self.length)
def __eq__(self, other):
return isinstance(other, Padding) and self.length == other.length
def __repr__(self):
return "<%s (%d bytes)>" % (type(self).__name__, self.length)
class FLAC(FileType):
"""A FLAC audio file.
Attributes:
info -- stream information (length, bitrate, sample rate)
tags -- metadata tags, if any
cuesheet -- CueSheet object, if any
seektable -- SeekTable object, if any
pictures -- list of embedded pictures
"""
_mimes = ["audio/x-flac", "application/x-flac"]
METADATA_BLOCKS = [StreamInfo, Padding, None, SeekTable, VCFLACDict,
CueSheet, Picture]
"""Known metadata block types, indexed by ID."""
def score(filename, fileobj, header):
return (header.startswith("fLaC") +
filename.lower().endswith(".flac") * 3)
score = staticmethod(score)
def __read_metadata_block(self, file):
byte = ord(file.read(1))
size = to_int_be(file.read(3))
try:
data = file.read(size)
if len(data) != size:
raise error(
"file said %d bytes, read %d bytes" % (size, len(data)))
block = self.METADATA_BLOCKS[byte & 0x7F](data)
except (IndexError, TypeError):
block = MetadataBlock(data)
block.code = byte & 0x7F
self.metadata_blocks.append(block)
else:
self.metadata_blocks.append(block)
if block.code == VCFLACDict.code:
if self.tags is None: self.tags = block
else: raise FLACVorbisError("> 1 Vorbis comment block found")
elif block.code == CueSheet.code:
if self.cuesheet is None: self.cuesheet = block
else: raise error("> 1 CueSheet block found")
elif block.code == SeekTable.code:
if self.seektable is None: self.seektable = block
else: raise error("> 1 SeekTable block found")
return (byte >> 7) ^ 1
def add_tags(self):
"""Add a Vorbis comment block to the file."""
if self.tags is None:
self.tags = VCFLACDict()
self.metadata_blocks.append(self.tags)
else: raise FLACVorbisError("a Vorbis comment already exists")
add_vorbiscomment = add_tags
def delete(self, filename=None):
"""Remove Vorbis comments from a file.
If no filename is given, the one most recently loaded is used.
"""
if filename is None: filename = self.filename
for s in list(self.metadata_blocks):
if isinstance(s, VCFLACDict):
self.metadata_blocks.remove(s)
self.tags = None
self.save()
break
vc = property(lambda s: s.tags, doc="Alias for tags; don't use this.")
def load(self, filename):
"""Load file information from a filename."""
self.metadata_blocks = []
self.tags = None
self.cuesheet = None
self.seektable = None
self.filename = filename
fileobj = file(filename, "rb")
try:
self.__check_header(fileobj)
while self.__read_metadata_block(fileobj):
pass
if fileobj.read(2) not in ["\xff\xf8", "\xff\xf9"]:
raise FLACNoHeaderError("End of metadata did not start audio")
finally:
fileobj.close()
try:
self.metadata_blocks[0].length
except (AttributeError, IndexError):
raise FLACNoHeaderError("Stream info block not found")
info = property(lambda s: s.metadata_blocks[0])
def add_picture(self, picture):
"""Add a new picture to the file."""
self.metadata_blocks.append(picture)
def clear_pictures(self):
"""Delete all pictures from the file."""
self.metadata_blocks = filter(lambda b: b.code != Picture.code,
self.metadata_blocks)
def __get_pictures(self):
return filter(lambda b: b.code == Picture.code, self.metadata_blocks)
pictures = property(__get_pictures, doc="List of embedded pictures")
def save(self, filename=None, deleteid3=False):
"""Save metadata blocks to a file.
If no filename is given, the one most recently loaded is used.
"""
if filename is None: filename = self.filename
f = open(filename, 'rb+')
# Ensure we've got padding at the end, and only at the end.
# If adding makes it too large, we'll scale it down later.
self.metadata_blocks.append(Padding('\x00' * 1020))
MetadataBlock.group_padding(self.metadata_blocks)
header = self.__check_header(f)
available = self.__find_audio_offset(f) - header # "fLaC" and maybe ID3
data = MetadataBlock.writeblocks(self.metadata_blocks)
# Delete ID3v2
if deleteid3 and header > 4:
available += header - 4
header = 4
if len(data) > available:
# If we have too much data, see if we can reduce padding.
padding = self.metadata_blocks[-1]
newlength = padding.length - (len(data) - available)
if newlength > 0:
padding.length = newlength
data = MetadataBlock.writeblocks(self.metadata_blocks)
assert len(data) == available
elif len(data) < available:
# If we have too little data, increase padding.
self.metadata_blocks[-1].length += (available - len(data))
data = MetadataBlock.writeblocks(self.metadata_blocks)
assert len(data) == available
if len(data) != available:
# We couldn't reduce the padding enough.
diff = (len(data) - available)
insert_bytes(f, diff, header)
f.seek(header - 4)
f.write("fLaC" + data)
# Delete ID3v1
if deleteid3:
try: f.seek(-128, 2)
except IOError: pass
else:
if f.read(3) == "TAG":
f.seek(-128, 2)
f.truncate()
def __find_audio_offset(self, fileobj):
byte = 0x00
while not (byte >> 7) & 1:
byte = ord(fileobj.read(1))
size = to_int_be(fileobj.read(3))
fileobj.read(size)
return fileobj.tell()
def __check_header(self, fileobj):
size = 4
header = fileobj.read(4)
if header != "fLaC":
size = None
if header[:3] == "ID3":
size = 14 + BitPaddedInt(fileobj.read(6)[2:])
fileobj.seek(size - 4)
if fileobj.read(4) != "fLaC": size = None
if size is None:
raise FLACNoHeaderError(
"%r is not a valid FLAC file" % fileobj.name)
return size
Open = FLAC
def delete(filename):
"""Remove tags from a file."""
FLAC(filename).delete()

2005
lib/mutagen/id3.py Normal file

File diff suppressed because it is too large Load Diff

499
lib/mutagen/m4a.py Normal file
View File

@@ -0,0 +1,499 @@
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: m4a.py 4231 2007-12-15 08:13:53Z luks $
"""Read and write MPEG-4 audio files with iTunes metadata.
This module will read MPEG-4 audio information and metadata,
as found in Apple's M4A (aka MP4, M4B, M4P) files.
There is no official specification for this format. The source code
for TagLib, FAAD, and various MPEG specifications at
http://developer.apple.com/documentation/QuickTime/QTFF/,
http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt,
and http://wiki.multimedia.cx/index.php?title=Apple_QuickTime were all
consulted.
This module does not support 64 bit atom sizes, and so will not
work on metadata over 4GB.
"""
import struct
import sys
from cStringIO import StringIO
from lib.mutagen import FileType, Metadata
from lib.mutagen._constants import GENRES
from lib.mutagen._util import cdata, insert_bytes, delete_bytes, DictProxy
class error(IOError): pass
class M4AMetadataError(error): pass
class M4AStreamInfoError(error): pass
class M4AMetadataValueError(ValueError, M4AMetadataError): pass
import warnings
warnings.warn(
"mutagen.m4a is deprecated; use mutagen.mp4 instead.", DeprecationWarning)
# This is not an exhaustive list of container atoms, but just the
# ones this module needs to peek inside.
_CONTAINERS = ["moov", "udta", "trak", "mdia", "meta", "ilst",
"stbl", "minf", "stsd"]
_SKIP_SIZE = { "meta": 4 }
__all__ = ['M4A', 'Open', 'delete', 'M4ACover']
class M4ACover(str):
"""A cover artwork.
Attributes:
imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG)
"""
FORMAT_JPEG = 0x0D
FORMAT_PNG = 0x0E
def __new__(cls, data, imageformat=None):
self = str.__new__(cls, data)
if imageformat is None: imageformat = M4ACover.FORMAT_JPEG
self.imageformat = imageformat
try: self.format
except AttributeError:
self.format = imageformat
return self
class Atom(object):
"""An individual atom.
Attributes:
children -- list child atoms (or None for non-container atoms)
length -- length of this atom, including length and name
name -- four byte name of the atom, as a str
offset -- location in the constructor-given fileobj of this atom
This structure should only be used internally by Mutagen.
"""
children = None
def __init__(self, fileobj):
self.offset = fileobj.tell()
self.length, self.name = struct.unpack(">I4s", fileobj.read(8))
if self.length == 1:
raise error("64 bit atom sizes are not supported")
elif self.length < 8:
return
if self.name in _CONTAINERS:
self.children = []
fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1)
while fileobj.tell() < self.offset + self.length:
self.children.append(Atom(fileobj))
else:
fileobj.seek(self.offset + self.length, 0)
def render(name, data):
"""Render raw atom data."""
# this raises OverflowError if Py_ssize_t can't handle the atom data
size = len(data) + 8
if size <= 0xFFFFFFFF:
return struct.pack(">I4s", size, name) + data
else:
return struct.pack(">I4sQ", 1, name, size + 8) + data
render = staticmethod(render)
def __getitem__(self, remaining):
"""Look up a child atom, potentially recursively.
e.g. atom['udta', 'meta'] => <Atom name='meta' ...>
"""
if not remaining:
return self
elif self.children is None:
raise KeyError("%r is not a container" % self.name)
for child in self.children:
if child.name == remaining[0]:
return child[remaining[1:]]
else:
raise KeyError, "%r not found" % remaining[0]
def __repr__(self):
klass = self.__class__.__name__
if self.children is None:
return "<%s name=%r length=%r offset=%r>" % (
klass, self.name, self.length, self.offset)
else:
children = "\n".join([" " + line for child in self.children
for line in repr(child).splitlines()])
return "<%s name=%r length=%r offset=%r\n%s>" % (
klass, self.name, self.length, self.offset, children)
class Atoms(object):
"""Root atoms in a given file.
Attributes:
atoms -- a list of top-level atoms as Atom objects
This structure should only be used internally by Mutagen.
"""
def __init__(self, fileobj):
self.atoms = []
fileobj.seek(0, 2)
end = fileobj.tell()
fileobj.seek(0)
while fileobj.tell() < end:
self.atoms.append(Atom(fileobj))
def path(self, *names):
"""Look up and return the complete path of an atom.
For example, atoms.path('moov', 'udta', 'meta') will return a
list of three atoms, corresponding to the moov, udta, and meta
atoms.
"""
path = [self]
for name in names:
path.append(path[-1][name,])
return path[1:]
def __getitem__(self, names):
"""Look up a child atom.
'names' may be a list of atoms (['moov', 'udta']) or a string
specifying the complete path ('moov.udta').
"""
if isinstance(names, basestring):
names = names.split(".")
for child in self.atoms:
if child.name == names[0]:
return child[names[1:]]
else:
raise KeyError, "%s not found" % names[0]
def __repr__(self):
return "\n".join([repr(child) for child in self.atoms])
class M4ATags(DictProxy, Metadata):
"""Dictionary containing Apple iTunes metadata list key/values.
Keys are four byte identifiers, except for freeform ('----')
keys. Values are usually unicode strings, but some atoms have a
special structure:
cpil -- boolean
trkn, disk -- tuple of 16 bit ints (current, total)
tmpo -- 16 bit int
covr -- list of M4ACover objects (which are tagged strs)
gnre -- not supported. Use '\\xa9gen' instead.
The freeform '----' frames use a key in the format '----:mean:name'
where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique
identifier for this frame. The value is a str, but is probably
text that can be decoded as UTF-8.
M4A tag data cannot exist outside of the structure of an M4A file,
so this class should not be manually instantiated.
Unknown non-text tags are removed.
"""
def load(self, atoms, fileobj):
try: ilst = atoms["moov.udta.meta.ilst"]
except KeyError, key:
raise M4AMetadataError(key)
for atom in ilst.children:
fileobj.seek(atom.offset + 8)
data = fileobj.read(atom.length - 8)
parse = self.__atoms.get(atom.name, (M4ATags.__parse_text,))[0]
parse(self, atom, data)
def __key_sort((key1, v1), (key2, v2)):
# iTunes always writes the tags in order of "relevance", try
# to copy it as closely as possible.
order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb",
"\xa9gen", "gnre", "trkn", "disk",
"\xa9day", "cpil", "tmpo", "\xa9too",
"----", "covr", "\xa9lyr"]
order = dict(zip(order, range(len(order))))
last = len(order)
# If there's no key-based way to distinguish, order by length.
# If there's still no way, go by string comparison on the
# values, so we at least have something determinstic.
return (cmp(order.get(key1[:4], last), order.get(key2[:4], last)) or
cmp(len(v1), len(v2)) or cmp(v1, v2))
__key_sort = staticmethod(__key_sort)
def save(self, filename):
"""Save the metadata to the given filename."""
values = []
items = self.items()
items.sort(self.__key_sort)
for key, value in items:
render = self.__atoms.get(
key[:4], (None, M4ATags.__render_text))[1]
values.append(render(self, key, value))
data = Atom.render("ilst", "".join(values))
# Find the old atoms.
fileobj = file(filename, "rb+")
try:
atoms = Atoms(fileobj)
moov = atoms["moov"]
if moov != atoms.atoms[-1]:
# "Free" the old moov block. Something in the mdat
# block is not happy when its offset changes and it
# won't play back. So, rather than try to figure that
# out, just move the moov atom to the end of the file.
offset = self.__move_moov(fileobj, moov)
else:
offset = 0
try:
path = atoms.path("moov", "udta", "meta", "ilst")
except KeyError:
self.__save_new(fileobj, atoms, data, offset)
else:
self.__save_existing(fileobj, atoms, path, data, offset)
finally:
fileobj.close()
def __move_moov(self, fileobj, moov):
fileobj.seek(moov.offset)
data = fileobj.read(moov.length)
fileobj.seek(moov.offset)
free = Atom.render("free", "\x00" * (moov.length - 8))
fileobj.write(free)
fileobj.seek(0, 2)
# Figure out how far we have to shift all our successive
# seek calls, relative to what the atoms say.
old_end = fileobj.tell()
fileobj.write(data)
return old_end - moov.offset
def __save_new(self, fileobj, atoms, ilst, offset):
hdlr = Atom.render("hdlr", "\x00" * 8 + "mdirappl" + "\x00" * 9)
meta = Atom.render("meta", "\x00\x00\x00\x00" + hdlr + ilst)
moov, udta = atoms.path("moov", "udta")
insert_bytes(fileobj, len(meta), udta.offset + offset + 8)
fileobj.seek(udta.offset + offset + 8)
fileobj.write(meta)
self.__update_parents(fileobj, [moov, udta], len(meta), offset)
def __save_existing(self, fileobj, atoms, path, data, offset):
# Replace the old ilst atom.
ilst = path.pop()
delta = len(data) - ilst.length
fileobj.seek(ilst.offset + offset)
if delta > 0:
insert_bytes(fileobj, delta, ilst.offset + offset)
elif delta < 0:
delete_bytes(fileobj, -delta, ilst.offset + offset)
fileobj.seek(ilst.offset + offset)
fileobj.write(data)
self.__update_parents(fileobj, path, delta, offset)
def __update_parents(self, fileobj, path, delta, offset):
# Update all parent atoms with the new size.
for atom in path:
fileobj.seek(atom.offset + offset)
size = cdata.uint_be(fileobj.read(4)) + delta
fileobj.seek(atom.offset + offset)
fileobj.write(cdata.to_uint_be(size))
def __render_data(self, key, flags, data):
data = struct.pack(">2I", flags, 0) + data
return Atom.render(key, Atom.render("data", data))
def __parse_freeform(self, atom, data):
try:
fileobj = StringIO(data)
mean_length = cdata.uint_be(fileobj.read(4))
# skip over 8 bytes of atom name, flags
mean = fileobj.read(mean_length - 4)[8:]
name_length = cdata.uint_be(fileobj.read(4))
name = fileobj.read(name_length - 4)[8:]
value_length = cdata.uint_be(fileobj.read(4))
# Name, flags, and reserved bytes
value = fileobj.read(value_length - 4)[12:]
except struct.error:
# Some ---- atoms have no data atom, I have no clue why
# they actually end up in the file.
pass
else:
self["%s:%s:%s" % (atom.name, mean, name)] = value
def __render_freeform(self, key, value):
dummy, mean, name = key.split(":", 2)
mean = struct.pack(">I4sI", len(mean) + 12, "mean", 0) + mean
name = struct.pack(">I4sI", len(name) + 12, "name", 0) + name
value = struct.pack(">I4s2I", len(value) + 16, "data", 0x1, 0) + value
final = mean + name + value
return Atom.render("----", mean + name + value)
def __parse_pair(self, atom, data):
self[atom.name] = struct.unpack(">2H", data[18:22])
def __render_pair(self, key, value):
track, total = value
if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
data = struct.pack(">4H", 0, track, total, 0)
return self.__render_data(key, 0, data)
else:
raise M4AMetadataValueError("invalid numeric pair %r" % (value,))
def __render_pair_no_trailing(self, key, value):
track, total = value
if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
data = struct.pack(">3H", 0, track, total)
return self.__render_data(key, 0, data)
else:
raise M4AMetadataValueError("invalid numeric pair %r" % (value,))
def __parse_genre(self, atom, data):
# Translate to a freeform genre.
genre = cdata.short_be(data[16:18])
if "\xa9gen" not in self:
try: self["\xa9gen"] = GENRES[genre - 1]
except IndexError: pass
def __parse_tempo(self, atom, data):
self[atom.name] = cdata.short_be(data[16:18])
def __render_tempo(self, key, value):
if 0 <= value < 1 << 16:
return self.__render_data(key, 0x15, cdata.to_ushort_be(value))
else:
raise M4AMetadataValueError("invalid short integer %r" % value)
def __parse_compilation(self, atom, data):
try: self[atom.name] = bool(ord(data[16:17]))
except TypeError: self[atom.name] = False
def __render_compilation(self, key, value):
return self.__render_data(key, 0x15, chr(bool(value)))
def __parse_cover(self, atom, data):
length, name, imageformat = struct.unpack(">I4sI", data[:12])
if name != "data":
raise M4AMetadataError(
"unexpected atom %r inside 'covr'" % name)
if imageformat not in (M4ACover.FORMAT_JPEG, M4ACover.FORMAT_PNG):
imageformat = M4ACover.FORMAT_JPEG
self[atom.name]= M4ACover(data[16:length], imageformat)
def __render_cover(self, key, value):
try: imageformat = value.imageformat
except AttributeError: imageformat = M4ACover.FORMAT_JPEG
data = Atom.render("data", struct.pack(">2I", imageformat, 0) + value)
return Atom.render(key, data)
def __parse_text(self, atom, data):
flags = cdata.uint_be(data[8:12])
if flags == 1:
self[atom.name] = data[16:].decode('utf-8', 'replace')
def __render_text(self, key, value):
return self.__render_data(key, 0x1, value.encode('utf-8'))
def delete(self, filename):
self.clear()
self.save(filename)
__atoms = {
"----": (__parse_freeform, __render_freeform),
"trkn": (__parse_pair, __render_pair),
"disk": (__parse_pair, __render_pair_no_trailing),
"gnre": (__parse_genre, None),
"tmpo": (__parse_tempo, __render_tempo),
"cpil": (__parse_compilation, __render_compilation),
"covr": (__parse_cover, __render_cover),
}
def pprint(self):
values = []
for key, value in self.iteritems():
key = key.decode('latin1')
try: values.append("%s=%s" % (key, value))
except UnicodeDecodeError:
values.append("%s=[%d bytes of data]" % (key, len(value)))
return "\n".join(values)
class M4AInfo(object):
"""MPEG-4 stream information.
Attributes:
bitrate -- bitrate in bits per second, as an int
length -- file length in seconds, as a float
"""
bitrate = 0
def __init__(self, atoms, fileobj):
hdlr = atoms["moov.trak.mdia.hdlr"]
fileobj.seek(hdlr.offset)
if "soun" not in fileobj.read(hdlr.length):
raise M4AStreamInfoError("track has no audio data")
mdhd = atoms["moov.trak.mdia.mdhd"]
fileobj.seek(mdhd.offset)
data = fileobj.read(mdhd.length)
if ord(data[8]) == 0:
offset = 20
fmt = ">2I"
else:
offset = 28
fmt = ">IQ"
end = offset + struct.calcsize(fmt)
unit, length = struct.unpack(fmt, data[offset:end])
self.length = float(length) / unit
try:
atom = atoms["moov.trak.mdia.minf.stbl.stsd"]
fileobj.seek(atom.offset)
data = fileobj.read(atom.length)
self.bitrate = cdata.uint_be(data[-17:-13])
except (ValueError, KeyError):
# Bitrate values are optional.
pass
def pprint(self):
return "MPEG-4 audio, %.2f seconds, %d bps" % (
self.length, self.bitrate)
class M4A(FileType):
"""An MPEG-4 audio file, probably containing AAC.
If more than one track is present in the file, the first is used.
Only audio ('soun') tracks will be read.
"""
_mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"]
def load(self, filename):
self.filename = filename
fileobj = file(filename, "rb")
try:
atoms = Atoms(fileobj)
try: self.info = M4AInfo(atoms, fileobj)
except StandardError, err:
raise M4AStreamInfoError, err, sys.exc_info()[2]
try: self.tags = M4ATags(atoms, fileobj)
except M4AMetadataError:
self.tags = None
except StandardError, err:
raise M4AMetadataError, err, sys.exc_info()[2]
finally:
fileobj.close()
def add_tags(self):
self.tags = M4ATags()
def score(filename, fileobj, header):
return ("ftyp" in header) + ("mp4" in header)
score = staticmethod(score)
Open = M4A
def delete(filename):
"""Remove tags from a file."""
M4A(filename).delete()

View File

@@ -0,0 +1,80 @@
# A Monkey's Audio (APE) reader/tagger
#
# Copyright 2006 Lukas Lalinsky <lalinsky@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: monkeysaudio.py 3976 2007-01-13 22:00:14Z piman $
"""Monkey's Audio streams with APEv2 tags.
Monkey's Audio is a very efficient lossless audio compressor developed
by Matt Ashland.
For more information, see http://www.monkeysaudio.com/.
"""
__all__ = ["MonkeysAudio", "Open", "delete"]
import struct
from lib.mutagen.apev2 import APEv2File, error, delete
from lib.mutagen._util import cdata
class MonkeysAudioHeaderError(error): pass
class MonkeysAudioInfo(object):
"""Monkey's Audio stream information.
Attributes:
channels -- number of audio channels
length -- file length in seconds, as a float
sample_rate -- audio sampling rate in Hz
bits_per_sample -- bits per sample
version -- Monkey's Audio stream version, as a float (eg: 3.99)
"""
def __init__(self, fileobj):
header = fileobj.read(76)
if len(header) != 76 or not header.startswith("MAC "):
raise MonkeysAudioHeaderError("not a Monkey's Audio file")
self.version = cdata.ushort_le(header[4:6])
if self.version >= 3980:
(blocks_per_frame, final_frame_blocks, total_frames,
self.bits_per_sample, self.channels,
self.sample_rate) = struct.unpack("<IIIHHI", header[56:76])
else:
compression_level = cdata.ushort_le(header[6:8])
self.channels, self.sample_rate = struct.unpack(
"<HI", header[10:16])
total_frames, final_frame_blocks = struct.unpack(
"<II", header[24:32])
if self.version >= 3950:
blocks_per_frame = 73728 * 4
elif self.version >= 3900 or (self.version >= 3800 and
compression_level == 4):
blocks_per_frame = 73728
else:
blocks_per_frame = 9216
self.version /= 1000.0
self.length = 0.0
if self.sample_rate != 0 and total_frames > 0:
total_blocks = ((total_frames - 1) * blocks_per_frame +
final_frame_blocks)
self.length = float(total_blocks) / self.sample_rate
def pprint(self):
return "Monkey's Audio %.2f, %.2f seconds, %d Hz" % (
self.version, self.length, self.sample_rate)
class MonkeysAudio(APEv2File):
_Info = MonkeysAudioInfo
_mimes = ["audio/ape", "audio/x-ape"]
def score(filename, fileobj, header):
return header.startswith("MAC ") + filename.lower().endswith(".ape")
score = staticmethod(score)
Open = MonkeysAudio

243
lib/mutagen/mp3.py Normal file
View File

@@ -0,0 +1,243 @@
# MP3 stream header information support for Mutagen.
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
"""MPEG audio stream information and tags."""
import os
import struct
from lib.mutagen.id3 import ID3FileType, BitPaddedInt, delete
__all__ = ["MP3", "Open", "delete", "MP3"]
class error(RuntimeError): pass
class HeaderNotFoundError(error, IOError): pass
class InvalidMPEGHeader(error, IOError): pass
# Mode values.
STEREO, JOINTSTEREO, DUALCHANNEL, MONO = range(4)
class MPEGInfo(object):
"""MPEG audio stream information
Parse information about an MPEG audio file. This also reads the
Xing VBR header format.
This code was implemented based on the format documentation at
http://www.dv.co.yu/mpgscript/mpeghdr.htm.
Useful attributes:
length -- audio length, in seconds
bitrate -- audio bitrate, in bits per second
sketchy -- if true, the file may not be valid MPEG audio
Useless attributes:
version -- MPEG version (1, 2, 2.5)
layer -- 1, 2, or 3
mode -- One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3)
protected -- whether or not the file is "protected"
padding -- whether or not audio frames are padded
sample_rate -- audio sample rate, in Hz
"""
# Map (version, layer) tuples to bitrates.
__BITRATE = {
(1, 1): range(0, 480, 32),
(1, 2): [0, 32, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320,384],
(1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320],
(2, 1): [0, 32, 48, 56, 64, 80, 96, 112,128,144,160,176,192,224,256],
(2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160],
}
__BITRATE[(2, 3)] = __BITRATE[(2, 2)]
for i in range(1, 4): __BITRATE[(2.5, i)] = __BITRATE[(2, i)]
# Map version to sample rates.
__RATES = {
1: [44100, 48000, 32000],
2: [22050, 24000, 16000],
2.5: [11025, 12000, 8000]
}
sketchy = False
def __init__(self, fileobj, offset=None):
"""Parse MPEG stream information from a file-like object.
If an offset argument is given, it is used to start looking
for stream information and Xing headers; otherwise, ID3v2 tags
will be skipped automatically. A correct offset can make
loading files significantly faster.
"""
try: size = os.path.getsize(fileobj.name)
except (IOError, OSError, AttributeError):
fileobj.seek(0, 2)
size = fileobj.tell()
# If we don't get an offset, try to skip an ID3v2 tag.
if offset is None:
fileobj.seek(0, 0)
idata = fileobj.read(10)
try: id3, insize = struct.unpack('>3sxxx4s', idata)
except struct.error: id3, insize = '', 0
insize = BitPaddedInt(insize)
if id3 == 'ID3' and insize > 0:
offset = insize
else: offset = 0
# Try to find two valid headers (meaning, very likely MPEG data)
# at the given offset, 30% through the file, 60% through the file,
# and 90% through the file.
for i in [offset, 0.3 * size, 0.6 * size, 0.9 * size]:
try: self.__try(fileobj, int(i), size - offset)
except error, e: pass
else: break
# If we can't find any two consecutive frames, try to find just
# one frame back at the original offset given.
else:
self.__try(fileobj, offset, size - offset, False)
self.sketchy = True
def __try(self, fileobj, offset, real_size, check_second=True):
# This is going to be one really long function; bear with it,
# because there's not really a sane point to cut it up.
fileobj.seek(offset, 0)
# We "know" we have an MPEG file if we find two frames that look like
# valid MPEG data. If we can't find them in 32k of reads, something
# is horribly wrong (the longest frame can only be about 4k). This
# is assuming the offset didn't lie.
data = fileobj.read(32768)
frame_1 = data.find("\xff")
while 0 <= frame_1 <= len(data) - 4:
frame_data = struct.unpack(">I", data[frame_1:frame_1 + 4])[0]
if (frame_data >> 16) & 0xE0 != 0xE0:
frame_1 = data.find("\xff", frame_1 + 2)
else:
version = (frame_data >> 19) & 0x3
layer = (frame_data >> 17) & 0x3
protection = (frame_data >> 16) & 0x1
bitrate = (frame_data >> 12) & 0xF
sample_rate = (frame_data >> 10) & 0x3
padding = (frame_data >> 9) & 0x1
private = (frame_data >> 8) & 0x1
self.mode = (frame_data >> 6) & 0x3
mode_extension = (frame_data >> 4) & 0x3
copyright = (frame_data >> 3) & 0x1
original = (frame_data >> 2) & 0x1
emphasis = (frame_data >> 0) & 0x3
if (version == 1 or layer == 0 or sample_rate == 0x3 or
bitrate == 0 or bitrate == 0xF):
frame_1 = data.find("\xff", frame_1 + 2)
else: break
else:
raise HeaderNotFoundError("can't sync to an MPEG frame")
# There is a serious problem here, which is that many flags
# in an MPEG header are backwards.
self.version = [2.5, None, 2, 1][version]
self.layer = 4 - layer
self.protected = not protection
self.padding = bool(padding)
self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate]
self.bitrate *= 1000
self.sample_rate = self.__RATES[self.version][sample_rate]
if self.layer == 1:
frame_length = (12 * self.bitrate / self.sample_rate + padding) * 4
frame_size = 384
elif self.version >= 2 and self.layer == 3:
frame_length = 72 * self.bitrate / self.sample_rate + padding
frame_size = 576
else:
frame_length = 144 * self.bitrate / self.sample_rate + padding
frame_size = 1152
if check_second:
possible = frame_1 + frame_length
if possible > len(data) + 4:
raise HeaderNotFoundError("can't sync to second MPEG frame")
frame_data = struct.unpack(">H", data[possible:possible + 2])[0]
if frame_data & 0xFFE0 != 0xFFE0:
raise HeaderNotFoundError("can't sync to second MPEG frame")
frame_count = real_size / float(frame_length)
samples = frame_size * frame_count
self.length = samples / self.sample_rate
# Try to find/parse the Xing header, which trumps the above length
# and bitrate calculation.
fileobj.seek(offset, 0)
data = fileobj.read(32768)
try:
xing = data[:-4].index("Xing")
except ValueError:
# Try to find/parse the VBRI header, which trumps the above length
# calculation.
try:
vbri = data[:-24].index("VBRI")
except ValueError: pass
else:
# If a VBRI header was found, this is definitely MPEG audio.
self.sketchy = False
vbri_version = struct.unpack('>H', data[vbri + 4:vbri + 6])[0]
if vbri_version == 1:
frame_count = struct.unpack(
'>I', data[vbri + 14:vbri + 18])[0]
samples = float(frame_size * frame_count)
self.length = (samples / self.sample_rate) or self.length
else:
# If a Xing header was found, this is definitely MPEG audio.
self.sketchy = False
flags = struct.unpack('>I', data[xing + 4:xing + 8])[0]
if flags & 0x1:
frame_count = struct.unpack('>I', data[xing + 8:xing + 12])[0]
samples = float(frame_size * frame_count)
self.length = (samples / self.sample_rate) or self.length
if flags & 0x2:
bytes = struct.unpack('>I', data[xing + 12:xing + 16])[0]
self.bitrate = int((bytes * 8) // self.length)
# If the bitrate * the length is nowhere near the file
# length, recalculate using the bitrate and file length.
# Don't do this for very small files.
fileobj.seek(2, 0)
size = fileobj.tell()
expected = (self.bitrate / 8) * self.length
if not (size / 2 < expected < size * 2) and size > 2**16:
self.length = size / float(self.bitrate * 8)
def pprint(self):
s = "MPEG %s layer %d, %d bps, %s Hz, %.2f seconds" % (
self.version, self.layer, self.bitrate, self.sample_rate,
self.length)
if self.sketchy: s += " (sketchy)"
return s
class MP3(ID3FileType):
"""An MPEG audio (usually MPEG-1 Layer 3) file."""
_Info = MPEGInfo
_mimes = ["audio/mp3", "audio/x-mp3", "audio/mpeg", "audio/mpg",
"audio/x-mpeg"]
def score(filename, fileobj, header):
filename = filename.lower()
return (header.startswith("ID3") * 2 + filename.endswith(".mp3") +
filename.endswith(".mp2") + filename.endswith(".mpg") +
filename.endswith(".mpeg"))
score = staticmethod(score)
Open = MP3
class EasyMP3(MP3):
"""Like MP3, but uses EasyID3 for tags."""
from lib.mutagen.easyid3 import EasyID3 as ID3

682
lib/mutagen/mp4.py Normal file
View File

@@ -0,0 +1,682 @@
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: mp4.py 4233 2007-12-28 07:24:59Z luks $
"""Read and write MPEG-4 audio files with iTunes metadata.
This module will read MPEG-4 audio information and metadata,
as found in Apple's MP4 (aka M4A, M4B, M4P) files.
There is no official specification for this format. The source code
for TagLib, FAAD, and various MPEG specifications at
http://developer.apple.com/documentation/QuickTime/QTFF/,
http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt,
http://standards.iso.org/ittf/PubliclyAvailableStandards/c041828_ISO_IEC_14496-12_2005(E).zip,
and http://wiki.multimedia.cx/index.php?title=Apple_QuickTime were all
consulted.
"""
import struct
import sys
from lib.mutagen import FileType, Metadata
from lib.mutagen._constants import GENRES
from lib.mutagen._util import cdata, insert_bytes, delete_bytes, DictProxy, utf8
class error(IOError): pass
class MP4MetadataError(error): pass
class MP4StreamInfoError(error): pass
class MP4MetadataValueError(ValueError, MP4MetadataError): pass
# This is not an exhaustive list of container atoms, but just the
# ones this module needs to peek inside.
_CONTAINERS = ["moov", "udta", "trak", "mdia", "meta", "ilst",
"stbl", "minf", "moof", "traf"]
_SKIP_SIZE = { "meta": 4 }
__all__ = ['MP4', 'Open', 'delete', 'MP4Cover']
class MP4Cover(str):
"""A cover artwork.
Attributes:
imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG)
"""
FORMAT_JPEG = 0x0D
FORMAT_PNG = 0x0E
def __new__(cls, data, imageformat=None):
self = str.__new__(cls, data)
if imageformat is None: imageformat = MP4Cover.FORMAT_JPEG
self.imageformat = imageformat
try: self.format
except AttributeError:
self.format = imageformat
return self
class Atom(object):
"""An individual atom.
Attributes:
children -- list child atoms (or None for non-container atoms)
length -- length of this atom, including length and name
name -- four byte name of the atom, as a str
offset -- location in the constructor-given fileobj of this atom
This structure should only be used internally by Mutagen.
"""
children = None
def __init__(self, fileobj):
self.offset = fileobj.tell()
self.length, self.name = struct.unpack(">I4s", fileobj.read(8))
if self.length == 1:
self.length, = struct.unpack(">Q", fileobj.read(8))
elif self.length < 8:
return
if self.name in _CONTAINERS:
self.children = []
fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1)
while fileobj.tell() < self.offset + self.length:
self.children.append(Atom(fileobj))
else:
fileobj.seek(self.offset + self.length, 0)
def render(name, data):
"""Render raw atom data."""
# this raises OverflowError if Py_ssize_t can't handle the atom data
size = len(data) + 8
if size <= 0xFFFFFFFF:
return struct.pack(">I4s", size, name) + data
else:
return struct.pack(">I4sQ", 1, name, size + 8) + data
render = staticmethod(render)
def findall(self, name, recursive=False):
"""Recursively find all child atoms by specified name."""
if self.children is not None:
for child in self.children:
if child.name == name:
yield child
if recursive:
for atom in child.findall(name, True):
yield atom
def __getitem__(self, remaining):
"""Look up a child atom, potentially recursively.
e.g. atom['udta', 'meta'] => <Atom name='meta' ...>
"""
if not remaining:
return self
elif self.children is None:
raise KeyError("%r is not a container" % self.name)
for child in self.children:
if child.name == remaining[0]:
return child[remaining[1:]]
else:
raise KeyError, "%r not found" % remaining[0]
def __repr__(self):
klass = self.__class__.__name__
if self.children is None:
return "<%s name=%r length=%r offset=%r>" % (
klass, self.name, self.length, self.offset)
else:
children = "\n".join([" " + line for child in self.children
for line in repr(child).splitlines()])
return "<%s name=%r length=%r offset=%r\n%s>" % (
klass, self.name, self.length, self.offset, children)
class Atoms(object):
"""Root atoms in a given file.
Attributes:
atoms -- a list of top-level atoms as Atom objects
This structure should only be used internally by Mutagen.
"""
def __init__(self, fileobj):
self.atoms = []
fileobj.seek(0, 2)
end = fileobj.tell()
fileobj.seek(0)
while fileobj.tell() + 8 <= end:
self.atoms.append(Atom(fileobj))
def path(self, *names):
"""Look up and return the complete path of an atom.
For example, atoms.path('moov', 'udta', 'meta') will return a
list of three atoms, corresponding to the moov, udta, and meta
atoms.
"""
path = [self]
for name in names:
path.append(path[-1][name,])
return path[1:]
def __getitem__(self, names):
"""Look up a child atom.
'names' may be a list of atoms (['moov', 'udta']) or a string
specifying the complete path ('moov.udta').
"""
if isinstance(names, basestring):
names = names.split(".")
for child in self.atoms:
if child.name == names[0]:
return child[names[1:]]
else:
raise KeyError, "%s not found" % names[0]
def __repr__(self):
return "\n".join([repr(child) for child in self.atoms])
class MP4Tags(DictProxy, Metadata):
"""Dictionary containing Apple iTunes metadata list key/values.
Keys are four byte identifiers, except for freeform ('----')
keys. Values are usually unicode strings, but some atoms have a
special structure:
Text values (multiple values per key are supported):
'\xa9nam' -- track title
'\xa9alb' -- album
'\xa9ART' -- artist
'aART' -- album artist
'\xa9wrt' -- composer
'\xa9day' -- year
'\xa9cmt' -- comment
'desc' -- description (usually used in podcasts)
'purd' -- purchase date
'\xa9grp' -- grouping
'\xa9gen' -- genre
'\xa9lyr' -- lyrics
'purl' -- podcast URL
'egid' -- podcast episode GUID
'catg' -- podcast category
'keyw' -- podcast keywords
'\xa9too' -- encoded by
'cprt' -- copyright
'soal' -- album sort order
'soaa' -- album artist sort order
'soar' -- artist sort order
'sonm' -- title sort order
'soco' -- composer sort order
'sosn' -- show sort order
'tvsh' -- show name
Boolean values:
'cpil' -- part of a compilation
'pgap' -- part of a gapless album
'pcst' -- podcast (iTunes reads this only on import)
Tuples of ints (multiple values per key are supported):
'trkn' -- track number, total tracks
'disk' -- disc number, total discs
Others:
'tmpo' -- tempo/BPM, 16 bit int
'covr' -- cover artwork, list of MP4Cover objects (which are
tagged strs)
'gnre' -- ID3v1 genre. Not supported, use '\xa9gen' instead.
The freeform '----' frames use a key in the format '----:mean:name'
where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique
identifier for this frame. The value is a str, but is probably
text that can be decoded as UTF-8. Multiple values per key are
supported.
MP4 tag data cannot exist outside of the structure of an MP4 file,
so this class should not be manually instantiated.
Unknown non-text tags are removed.
"""
def load(self, atoms, fileobj):
try: ilst = atoms["moov.udta.meta.ilst"]
except KeyError, key:
raise MP4MetadataError(key)
for atom in ilst.children:
fileobj.seek(atom.offset + 8)
data = fileobj.read(atom.length - 8)
info = self.__atoms.get(atom.name, (type(self).__parse_text, None))
info[0](self, atom, data, *info[2:])
def __key_sort((key1, v1), (key2, v2)):
# iTunes always writes the tags in order of "relevance", try
# to copy it as closely as possible.
order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb",
"\xa9gen", "gnre", "trkn", "disk",
"\xa9day", "cpil", "pgap", "pcst", "tmpo",
"\xa9too", "----", "covr", "\xa9lyr"]
order = dict(zip(order, range(len(order))))
last = len(order)
# If there's no key-based way to distinguish, order by length.
# If there's still no way, go by string comparison on the
# values, so we at least have something determinstic.
return (cmp(order.get(key1[:4], last), order.get(key2[:4], last)) or
cmp(len(v1), len(v2)) or cmp(v1, v2))
__key_sort = staticmethod(__key_sort)
def save(self, filename):
"""Save the metadata to the given filename."""
values = []
items = self.items()
items.sort(self.__key_sort)
for key, value in items:
info = self.__atoms.get(key[:4], (None, type(self).__render_text))
try:
values.append(info[1](self, key, value, *info[2:]))
except (TypeError, ValueError), s:
raise MP4MetadataValueError, s, sys.exc_info()[2]
data = Atom.render("ilst", "".join(values))
# Find the old atoms.
fileobj = file(filename, "rb+")
try:
atoms = Atoms(fileobj)
try:
path = atoms.path("moov", "udta", "meta", "ilst")
except KeyError:
self.__save_new(fileobj, atoms, data)
else:
self.__save_existing(fileobj, atoms, path, data)
finally:
fileobj.close()
def __pad_ilst(self, data, length=None):
if length is None:
length = ((len(data) + 1023) & ~1023) - len(data)
return Atom.render("free", "\x00" * length)
def __save_new(self, fileobj, atoms, ilst):
hdlr = Atom.render("hdlr", "\x00" * 8 + "mdirappl" + "\x00" * 9)
meta = Atom.render(
"meta", "\x00\x00\x00\x00" + hdlr + ilst + self.__pad_ilst(ilst))
try:
path = atoms.path("moov", "udta")
except KeyError:
# moov.udta not found -- create one
path = atoms.path("moov")
meta = Atom.render("udta", meta)
offset = path[-1].offset + 8
insert_bytes(fileobj, len(meta), offset)
fileobj.seek(offset)
fileobj.write(meta)
self.__update_parents(fileobj, path, len(meta))
self.__update_offsets(fileobj, atoms, len(meta), offset)
def __save_existing(self, fileobj, atoms, path, data):
# Replace the old ilst atom.
ilst = path.pop()
offset = ilst.offset
length = ilst.length
# Check for padding "free" atoms
meta = path[-1]
index = meta.children.index(ilst)
try:
prev = meta.children[index-1]
if prev.name == "free":
offset = prev.offset
length += prev.length
except IndexError:
pass
try:
next = meta.children[index+1]
if next.name == "free":
length += next.length
except IndexError:
pass
delta = len(data) - length
if delta > 0 or (delta < 0 and delta > -8):
data += self.__pad_ilst(data)
delta = len(data) - length
insert_bytes(fileobj, delta, offset)
elif delta < 0:
data += self.__pad_ilst(data, -delta - 8)
delta = 0
fileobj.seek(offset)
fileobj.write(data)
self.__update_parents(fileobj, path, delta)
self.__update_offsets(fileobj, atoms, delta, offset)
def __update_parents(self, fileobj, path, delta):
"""Update all parent atoms with the new size."""
for atom in path:
fileobj.seek(atom.offset)
size = cdata.uint_be(fileobj.read(4))
if size == 1: # 64bit
# skip name (4B) and read size (8B)
size = cdata.ulonglong_be(fileobj.read(12)[4:])
fileobj.seek(atom.offset + 8)
fileobj.write(cdata.to_ulonglong_be(size + delta))
else: # 32bit
fileobj.seek(atom.offset)
fileobj.write(cdata.to_uint_be(size + delta))
def __update_offset_table(self, fileobj, fmt, atom, delta, offset):
"""Update offset table in the specified atom."""
if atom.offset > offset:
atom.offset += delta
fileobj.seek(atom.offset + 12)
data = fileobj.read(atom.length - 12)
fmt = fmt % cdata.uint_be(data[:4])
offsets = struct.unpack(fmt, data[4:])
offsets = [o + (0, delta)[offset < o] for o in offsets]
fileobj.seek(atom.offset + 16)
fileobj.write(struct.pack(fmt, *offsets))
def __update_tfhd(self, fileobj, atom, delta, offset):
if atom.offset > offset:
atom.offset += delta
fileobj.seek(atom.offset + 9)
data = fileobj.read(atom.length - 9)
flags = cdata.uint_be("\x00" + data[:3])
if flags & 1:
o = cdata.ulonglong_be(data[7:15])
if o > offset:
o += delta
fileobj.seek(atom.offset + 16)
fileobj.write(cdata.to_ulonglong_be(o))
def __update_offsets(self, fileobj, atoms, delta, offset):
"""Update offset tables in all 'stco' and 'co64' atoms."""
if delta == 0:
return
moov = atoms["moov"]
for atom in moov.findall('stco', True):
self.__update_offset_table(fileobj, ">%dI", atom, delta, offset)
for atom in moov.findall('co64', True):
self.__update_offset_table(fileobj, ">%dQ", atom, delta, offset)
try:
for atom in atoms["moof"].findall('tfhd', True):
self.__update_tfhd(fileobj, atom, delta, offset)
except KeyError:
pass
def __parse_data(self, atom, data):
pos = 0
while pos < atom.length - 8:
length, name, flags = struct.unpack(">I4sI", data[pos:pos+12])
if name != "data":
raise MP4MetadataError(
"unexpected atom %r inside %r" % (name, atom.name))
yield flags, data[pos+16:pos+length]
pos += length
def __render_data(self, key, flags, value):
return Atom.render(key, "".join([
Atom.render("data", struct.pack(">2I", flags, 0) + data)
for data in value]))
def __parse_freeform(self, atom, data):
length = cdata.uint_be(data[:4])
mean = data[12:length]
pos = length
length = cdata.uint_be(data[pos:pos+4])
name = data[pos+12:pos+length]
pos += length
value = []
while pos < atom.length - 8:
length, atom_name = struct.unpack(">I4s", data[pos:pos+8])
if atom_name != "data":
raise MP4MetadataError(
"unexpected atom %r inside %r" % (atom_name, atom.name))
value.append(data[pos+16:pos+length])
pos += length
if value:
self["%s:%s:%s" % (atom.name, mean, name)] = value
def __render_freeform(self, key, value):
dummy, mean, name = key.split(":", 2)
mean = struct.pack(">I4sI", len(mean) + 12, "mean", 0) + mean
name = struct.pack(">I4sI", len(name) + 12, "name", 0) + name
if isinstance(value, basestring):
value = [value]
return Atom.render("----", mean + name + "".join([
struct.pack(">I4s2I", len(data) + 16, "data", 1, 0) + data
for data in value]))
def __parse_pair(self, atom, data):
self[atom.name] = [struct.unpack(">2H", data[2:6]) for
flags, data in self.__parse_data(atom, data)]
def __render_pair(self, key, value):
data = []
for (track, total) in value:
if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
data.append(struct.pack(">4H", 0, track, total, 0))
else:
raise MP4MetadataValueError(
"invalid numeric pair %r" % ((track, total),))
return self.__render_data(key, 0, data)
def __render_pair_no_trailing(self, key, value):
data = []
for (track, total) in value:
if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
data.append(struct.pack(">3H", 0, track, total))
else:
raise MP4MetadataValueError(
"invalid numeric pair %r" % ((track, total),))
return self.__render_data(key, 0, data)
def __parse_genre(self, atom, data):
# Translate to a freeform genre.
genre = cdata.short_be(data[16:18])
if "\xa9gen" not in self:
try: self["\xa9gen"] = [GENRES[genre - 1]]
except IndexError: pass
def __parse_tempo(self, atom, data):
self[atom.name] = [cdata.ushort_be(value[1]) for
value in self.__parse_data(atom, data)]
def __render_tempo(self, key, value):
try:
if len(value) == 0:
return self.__render_data(key, 0x15, "")
if min(value) < 0 or max(value) >= 2**16:
raise MP4MetadataValueError(
"invalid 16 bit integers: %r" % value)
except TypeError:
raise MP4MetadataValueError(
"tmpo must be a list of 16 bit integers")
values = map(cdata.to_ushort_be, value)
return self.__render_data(key, 0x15, values)
def __parse_bool(self, atom, data):
try: self[atom.name] = bool(ord(data[16:17]))
except TypeError: self[atom.name] = False
def __render_bool(self, key, value):
return self.__render_data(key, 0x15, [chr(bool(value))])
def __parse_cover(self, atom, data):
self[atom.name] = []
pos = 0
while pos < atom.length - 8:
length, name, imageformat = struct.unpack(">I4sI", data[pos:pos+12])
if name != "data":
raise MP4MetadataError(
"unexpected atom %r inside 'covr'" % name)
if imageformat not in (MP4Cover.FORMAT_JPEG, MP4Cover.FORMAT_PNG):
imageformat = MP4Cover.FORMAT_JPEG
cover = MP4Cover(data[pos+16:pos+length], imageformat)
self[atom.name].append(
MP4Cover(data[pos+16:pos+length], imageformat))
pos += length
def __render_cover(self, key, value):
atom_data = []
for cover in value:
try: imageformat = cover.imageformat
except AttributeError: imageformat = MP4Cover.FORMAT_JPEG
atom_data.append(
Atom.render("data", struct.pack(">2I", imageformat, 0) + cover))
return Atom.render(key, "".join(atom_data))
def __parse_text(self, atom, data, expected_flags=1):
value = [text.decode('utf-8', 'replace') for flags, text
in self.__parse_data(atom, data)
if flags == expected_flags]
if value:
self[atom.name] = value
def __render_text(self, key, value, flags=1):
if isinstance(value, basestring):
value = [value]
return self.__render_data(
key, flags, map(utf8, value))
def delete(self, filename):
self.clear()
self.save(filename)
__atoms = {
"----": (__parse_freeform, __render_freeform),
"trkn": (__parse_pair, __render_pair),
"disk": (__parse_pair, __render_pair_no_trailing),
"gnre": (__parse_genre, None),
"tmpo": (__parse_tempo, __render_tempo),
"cpil": (__parse_bool, __render_bool),
"pgap": (__parse_bool, __render_bool),
"pcst": (__parse_bool, __render_bool),
"covr": (__parse_cover, __render_cover),
"purl": (__parse_text, __render_text, 0),
"egid": (__parse_text, __render_text, 0),
}
def pprint(self):
values = []
for key, value in self.iteritems():
key = key.decode('latin1')
if key == "covr":
values.append("%s=%s" % (key, ", ".join(
["[%d bytes of data]" % len(data) for data in value])))
elif isinstance(value, list):
values.append("%s=%s" % (key, " / ".join(map(unicode, value))))
else:
values.append("%s=%s" % (key, value))
return "\n".join(values)
class MP4Info(object):
"""MPEG-4 stream information.
Attributes:
bitrate -- bitrate in bits per second, as an int
length -- file length in seconds, as a float
channels -- number of audio channels
sample_rate -- audio sampling rate in Hz
bits_per_sample -- bits per sample
"""
bitrate = 0
channels = 0
sample_rate = 0
bits_per_sample = 0
def __init__(self, atoms, fileobj):
for trak in list(atoms["moov"].findall("trak")):
hdlr = trak["mdia", "hdlr"]
fileobj.seek(hdlr.offset)
data = fileobj.read(hdlr.length)
if data[16:20] == "soun":
break
else:
raise MP4StreamInfoError("track has no audio data")
mdhd = trak["mdia", "mdhd"]
fileobj.seek(mdhd.offset)
data = fileobj.read(mdhd.length)
if ord(data[8]) == 0:
offset = 20
fmt = ">2I"
else:
offset = 28
fmt = ">IQ"
end = offset + struct.calcsize(fmt)
unit, length = struct.unpack(fmt, data[offset:end])
self.length = float(length) / unit
try:
atom = trak["mdia", "minf", "stbl", "stsd"]
fileobj.seek(atom.offset)
data = fileobj.read(atom.length)
if data[20:24] == "mp4a":
length = cdata.uint_be(data[16:20])
(self.channels, self.bits_per_sample, _,
self.sample_rate) = struct.unpack(">3HI", data[40:50])
# ES descriptor type
if data[56:60] == "esds" and ord(data[64:65]) == 0x03:
pos = 65
# skip extended descriptor type tag, length, ES ID
# and stream priority
if data[pos:pos+3] == "\x80\x80\x80":
pos += 3
pos += 4
# decoder config descriptor type
if ord(data[pos]) == 0x04:
pos += 1
# skip extended descriptor type tag, length,
# object type ID, stream type, buffer size
# and maximum bitrate
if data[pos:pos+3] == "\x80\x80\x80":
pos += 3
pos += 10
# average bitrate
self.bitrate = cdata.uint_be(data[pos:pos+4])
except (ValueError, KeyError):
# stsd atoms are optional
pass
def pprint(self):
return "MPEG-4 audio, %.2f seconds, %d bps" % (
self.length, self.bitrate)
class MP4(FileType):
"""An MPEG-4 audio file, probably containing AAC.
If more than one track is present in the file, the first is used.
Only audio ('soun') tracks will be read.
"""
MP4Tags = MP4Tags
_mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"]
def load(self, filename):
self.filename = filename
fileobj = file(filename, "rb")
try:
atoms = Atoms(fileobj)
try: self.info = MP4Info(atoms, fileobj)
except StandardError, err:
raise MP4StreamInfoError, err, sys.exc_info()[2]
try: self.tags = self.MP4Tags(atoms, fileobj)
except MP4MetadataError:
self.tags = None
except StandardError, err:
raise MP4MetadataError, err, sys.exc_info()[2]
finally:
fileobj.close()
def add_tags(self):
self.tags = self.MP4Tags()
def score(filename, fileobj, header):
return ("ftyp" in header) + ("mp4" in header)
score = staticmethod(score)
Open = MP4
def delete(filename):
"""Remove tags from a file."""
MP4(filename).delete()

118
lib/mutagen/musepack.py Normal file
View File

@@ -0,0 +1,118 @@
# A Musepack reader/tagger
#
# Copyright 2006 Lukas Lalinsky <lalinsky@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: musepack.py 4013 2007-04-23 09:18:22Z luks $
"""Musepack audio streams with APEv2 tags.
Musepack is an audio format originally based on the MPEG-1 Layer-2
algorithms. Stream versions 4 through 7 are supported.
For more information, see http://www.musepack.net/.
"""
__all__ = ["Musepack", "Open", "delete"]
import struct
from lib.mutagen.apev2 import APEv2File, error, delete
from lib.mutagen.id3 import BitPaddedInt
from lib.mutagen._util import cdata
class MusepackHeaderError(error): pass
RATES = [44100, 48000, 37800, 32000]
class MusepackInfo(object):
"""Musepack stream information.
Attributes:
channels -- number of audio channels
length -- file length in seconds, as a float
sample_rate -- audio sampling rate in Hz
bitrate -- audio bitrate, in bits per second
version -- Musepack stream version
Optional Attributes:
title_gain, title_peak -- Replay Gain and peak data for this song
album_gain, album_peak -- Replay Gain and peak data for this album
These attributes are only available in stream version 7. The
gains are a float, +/- some dB. The peaks are a percentage [0..1] of
the maximum amplitude. This means to get a number comparable to
VorbisGain, you must multiply the peak by 2.
"""
def __init__(self, fileobj):
header = fileobj.read(32)
if len(header) != 32:
raise MusepackHeaderError("not a Musepack file")
# Skip ID3v2 tags
if header[:3] == "ID3":
size = 10 + BitPaddedInt(header[6:10])
fileobj.seek(size)
header = fileobj.read(32)
if len(header) != 32:
raise MusepackHeaderError("not a Musepack file")
# SV7
if header.startswith("MP+"):
self.version = ord(header[3]) & 0xF
if self.version < 7:
raise MusepackHeaderError("not a Musepack file")
frames = cdata.uint_le(header[4:8])
flags = cdata.uint_le(header[8:12])
self.title_peak, self.title_gain = struct.unpack(
"<Hh", header[12:16])
self.album_peak, self.album_gain = struct.unpack(
"<Hh", header[16:20])
self.title_gain /= 100.0
self.album_gain /= 100.0
self.title_peak /= 65535.0
self.album_peak /= 65535.0
self.sample_rate = RATES[(flags >> 16) & 0x0003]
self.bitrate = 0
# SV4-SV6
else:
header_dword = cdata.uint_le(header[0:4])
self.version = (header_dword >> 11) & 0x03FF;
if self.version < 4 or self.version > 6:
raise MusepackHeaderError("not a Musepack file")
self.bitrate = (header_dword >> 23) & 0x01FF;
self.sample_rate = 44100
if self.version >= 5:
frames = cdata.uint_le(header[4:8])
else:
frames = cdata.ushort_le(header[6:8])
if self.version < 6:
frames -= 1
self.channels = 2
self.length = float(frames * 1152 - 576) / self.sample_rate
if not self.bitrate and self.length != 0:
fileobj.seek(0, 2)
self.bitrate = int(fileobj.tell() * 8 / (self.length * 1000) + 0.5)
def pprint(self):
if self.version >= 7:
rg_data = ", Gain: %+0.2f (title), %+0.2f (album)" %(
self.title_gain, self.album_gain)
else:
rg_data = ""
return "Musepack, %.2f seconds, %d Hz%s" % (
self.length, self.sample_rate, rg_data)
class Musepack(APEv2File):
_Info = MusepackInfo
_mimes = ["audio/x-musepack", "audio/x-mpc"]
def score(filename, fileobj, header):
return header.startswith("MP+") + filename.endswith(".mpc")
score = staticmethod(score)
Open = Musepack

498
lib/mutagen/ogg.py Normal file
View File

@@ -0,0 +1,498 @@
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: ogg.py 3975 2007-01-13 21:51:17Z piman $
"""Read and write Ogg bitstreams and pages.
This module reads and writes a subset of the Ogg bitstream format
version 0. It does *not* read or write Ogg Vorbis files! For that,
you should use mutagen.oggvorbis.
This implementation is based on the RFC 3533 standard found at
http://www.xiph.org/ogg/doc/rfc3533.txt.
"""
import struct
import sys
import zlib
from cStringIO import StringIO
from lib.mutagen import FileType
from lib.mutagen._util import cdata, insert_bytes, delete_bytes
class error(IOError):
"""Ogg stream parsing errors."""
pass
class OggPage(object):
"""A single Ogg page (not necessarily a single encoded packet).
A page is a header of 26 bytes, followed by the length of the
data, followed by the data.
The constructor is givin a file-like object pointing to the start
of an Ogg page. After the constructor is finished it is pointing
to the start of the next page.
Attributes:
version -- stream structure version (currently always 0)
position -- absolute stream position (default -1)
serial -- logical stream serial number (default 0)
sequence -- page sequence number within logical stream (default 0)
offset -- offset this page was read from (default None)
complete -- if the last packet on this page is complete (default True)
packets -- list of raw packet data (default [])
Note that if 'complete' is false, the next page's 'continued'
property must be true (so set both when constructing pages).
If a file-like object is supplied to the constructor, the above
attributes will be filled in based on it.
"""
version = 0
__type_flags = 0
position = 0L
serial = 0
sequence = 0
offset = None
complete = True
def __init__(self, fileobj=None):
self.packets = []
if fileobj is None:
return
self.offset = fileobj.tell()
header = fileobj.read(27)
if len(header) == 0:
raise EOFError
try:
(oggs, self.version, self.__type_flags, self.position,
self.serial, self.sequence, crc, segments) = struct.unpack(
"<4sBBqIIiB", header)
except struct.error:
raise error("unable to read full header; got %r" % header)
if oggs != "OggS":
raise error("read %r, expected %r, at 0x%x" % (
oggs, "OggS", fileobj.tell() - 27))
if self.version != 0:
raise error("version %r unsupported" % self.version)
total = 0
lacings = []
lacing_bytes = fileobj.read(segments)
if len(lacing_bytes) != segments:
raise error("unable to read %r lacing bytes" % segments)
for c in map(ord, lacing_bytes):
total += c
if c < 255:
lacings.append(total)
total = 0
if total:
lacings.append(total)
self.complete = False
self.packets = map(fileobj.read, lacings)
if map(len, self.packets) != lacings:
raise error("unable to read full data")
def __eq__(self, other):
"""Two Ogg pages are the same if they write the same data."""
try:
return (self.write() == other.write())
except AttributeError:
return False
def __repr__(self):
attrs = ['version', 'position', 'serial', 'sequence', 'offset',
'complete', 'continued', 'first', 'last']
values = ["%s=%r" % (attr, getattr(self, attr)) for attr in attrs]
return "<%s %s, %d bytes in %d packets>" % (
type(self).__name__, " ".join(values), sum(map(len, self.packets)),
len(self.packets))
def write(self):
"""Return a string encoding of the page header and data.
A ValueError is raised if the data is too big to fit in a
single page.
"""
data = [
struct.pack("<4sBBqIIi", "OggS", self.version, self.__type_flags,
self.position, self.serial, self.sequence, 0)
]
lacing_data = []
for datum in self.packets:
quot, rem = divmod(len(datum), 255)
lacing_data.append("\xff" * quot + chr(rem))
lacing_data = "".join(lacing_data)
if not self.complete and lacing_data.endswith("\x00"):
lacing_data = lacing_data[:-1]
data.append(chr(len(lacing_data)))
data.append(lacing_data)
data.extend(self.packets)
data = "".join(data)
# Python's CRC is swapped relative to Ogg's needs.
crc = ~zlib.crc32(data.translate(cdata.bitswap), -1)
# Although we're using to_int_be, this actually makes the CRC
# a proper le integer, since Python's CRC is byteswapped.
crc = cdata.to_int_be(crc).translate(cdata.bitswap)
data = data[:22] + crc + data[26:]
return data
def __size(self):
size = 27 # Initial header size
for datum in self.packets:
quot, rem = divmod(len(datum), 255)
size += quot + 1
if not self.complete and rem == 0:
# Packet contains a multiple of 255 bytes and is not
# terminated, so we don't have a \x00 at the end.
size -= 1
size += sum(map(len, self.packets))
return size
size = property(__size, doc="Total frame size.")
def __set_flag(self, bit, val):
mask = 1 << bit
if val: self.__type_flags |= mask
else: self.__type_flags &= ~mask
continued = property(
lambda self: cdata.test_bit(self.__type_flags, 0),
lambda self, v: self.__set_flag(0, v),
doc="The first packet is continued from the previous page.")
first = property(
lambda self: cdata.test_bit(self.__type_flags, 1),
lambda self, v: self.__set_flag(1, v),
doc="This is the first page of a logical bitstream.")
last = property(
lambda self: cdata.test_bit(self.__type_flags, 2),
lambda self, v: self.__set_flag(2, v),
doc="This is the last page of a logical bitstream.")
def renumber(klass, fileobj, serial, start):
"""Renumber pages belonging to a specified logical stream.
fileobj must be opened with mode r+b or w+b.
Starting at page number 'start', renumber all pages belonging
to logical stream 'serial'. Other pages will be ignored.
fileobj must point to the start of a valid Ogg page; any
occuring after it and part of the specified logical stream
will be numbered. No adjustment will be made to the data in
the pages nor the granule position; only the page number, and
so also the CRC.
If an error occurs (e.g. non-Ogg data is found), fileobj will
be left pointing to the place in the stream the error occured,
but the invalid data will be left intact (since this function
does not change the total file size).
"""
number = start
while True:
try: page = OggPage(fileobj)
except EOFError:
break
else:
if page.serial != serial:
# Wrong stream, skip this page.
continue
# Changing the number can't change the page size,
# so seeking back based on the current size is safe.
fileobj.seek(-page.size, 1)
page.sequence = number
fileobj.write(page.write())
fileobj.seek(page.offset + page.size, 0)
number += 1
renumber = classmethod(renumber)
def to_packets(klass, pages, strict=False):
"""Construct a list of packet data from a list of Ogg pages.
If strict is true, the first page must start a new packet,
and the last page must end the last packet.
"""
serial = pages[0].serial
sequence = pages[0].sequence
packets = []
if strict:
if pages[0].continued:
raise ValueError("first packet is continued")
if not pages[-1].complete:
raise ValueError("last packet does not complete")
elif pages and pages[0].continued:
packets.append("")
for page in pages:
if serial != page.serial:
raise ValueError("invalid serial number in %r" % page)
elif sequence != page.sequence:
raise ValueError("bad sequence number in %r" % page)
else: sequence += 1
if page.continued: packets[-1] += page.packets[0]
else: packets.append(page.packets[0])
packets.extend(page.packets[1:])
return packets
to_packets = classmethod(to_packets)
def from_packets(klass, packets, sequence=0,
default_size=4096, wiggle_room=2048):
"""Construct a list of Ogg pages from a list of packet data.
The algorithm will generate pages of approximately
default_size in size (rounded down to the nearest multiple of
255). However, it will also allow pages to increase to
approximately default_size + wiggle_room if allowing the
wiggle room would finish a packet (only one packet will be
finished in this way per page; if the next packet would fit
into the wiggle room, it still starts on a new page).
This method reduces packet fragmentation when packet sizes are
slightly larger than the default page size, while still
ensuring most pages are of the average size.
Pages are numbered started at 'sequence'; other information is
uninitialized.
"""
chunk_size = (default_size // 255) * 255
pages = []
page = OggPage()
page.sequence = sequence
for packet in packets:
page.packets.append("")
while packet:
data, packet = packet[:chunk_size], packet[chunk_size:]
if page.size < default_size and len(page.packets) < 255:
page.packets[-1] += data
else:
# If we've put any packet data into this page yet,
# we need to mark it incomplete. However, we can
# also have just started this packet on an already
# full page, in which case, just start the new
# page with this packet.
if page.packets[-1]:
page.complete = False
if len(page.packets) == 1:
page.position = -1L
else:
page.packets.pop(-1)
pages.append(page)
page = OggPage()
page.continued = not pages[-1].complete
page.sequence = pages[-1].sequence + 1
page.packets.append(data)
if len(packet) < wiggle_room:
page.packets[-1] += packet
packet = ""
if page.packets:
pages.append(page)
return pages
from_packets = classmethod(from_packets)
def replace(klass, fileobj, old_pages, new_pages):
"""Replace old_pages with new_pages within fileobj.
old_pages must have come from reading fileobj originally.
new_pages are assumed to have the 'same' data as old_pages,
and so the serial and sequence numbers will be copied, as will
the flags for the first and last pages.
fileobj will be resized and pages renumbered as necessary. As
such, it must be opened r+b or w+b.
"""
# Number the new pages starting from the first old page.
first = old_pages[0].sequence
for page, seq in zip(new_pages, range(first, first + len(new_pages))):
page.sequence = seq
page.serial = old_pages[0].serial
new_pages[0].first = old_pages[0].first
new_pages[0].last = old_pages[0].last
new_pages[0].continued = old_pages[0].continued
new_pages[-1].first = old_pages[-1].first
new_pages[-1].last = old_pages[-1].last
new_pages[-1].complete = old_pages[-1].complete
if not new_pages[-1].complete and len(new_pages[-1].packets) == 1:
new_pages[-1].position = -1L
new_data = "".join(map(klass.write, new_pages))
# Make room in the file for the new data.
delta = len(new_data)
fileobj.seek(old_pages[0].offset, 0)
insert_bytes(fileobj, delta, old_pages[0].offset)
fileobj.seek(old_pages[0].offset, 0)
fileobj.write(new_data)
new_data_end = old_pages[0].offset + delta
# Go through the old pages and delete them. Since we shifted
# the data down the file, we need to adjust their offsets. We
# also need to go backwards, so we don't adjust the deltas of
# the other pages.
old_pages.reverse()
for old_page in old_pages:
adj_offset = old_page.offset + delta
delete_bytes(fileobj, old_page.size, adj_offset)
# Finally, if there's any discrepency in length, we need to
# renumber the pages for the logical stream.
if len(old_pages) != len(new_pages):
fileobj.seek(new_data_end, 0)
serial = new_pages[-1].serial
sequence = new_pages[-1].sequence + 1
klass.renumber(fileobj, serial, sequence)
replace = classmethod(replace)
def find_last(klass, fileobj, serial):
"""Find the last page of the stream 'serial'.
If the file is not multiplexed this function is fast. If it is,
it must read the whole the stream.
This finds the last page in the actual file object, or the last
page in the stream (with eos set), whichever comes first.
"""
# For non-muxed streams, look at the last page.
try: fileobj.seek(-256*256, 2)
except IOError:
# The file is less than 64k in length.
fileobj.seek(0)
data = fileobj.read()
try: index = data.rindex("OggS")
except ValueError:
raise error("unable to find final Ogg header")
stringobj = StringIO(data[index:])
best_page = None
try:
page = OggPage(stringobj)
except error:
pass
else:
if page.serial == serial:
if page.last: return page
else: best_page = page
else: best_page = None
# The stream is muxed, so use the slow way.
fileobj.seek(0)
try:
page = OggPage(fileobj)
while not page.last:
page = OggPage(fileobj)
while page.serial != serial:
page = OggPage(fileobj)
best_page = page
return page
except error:
return best_page
except EOFError:
return best_page
find_last = classmethod(find_last)
class OggFileType(FileType):
"""An generic Ogg file."""
_Info = None
_Tags = None
_Error = None
_mimes = ["application/ogg", "application/x-ogg"]
def load(self, filename):
"""Load file information from a filename."""
self.filename = filename
fileobj = file(filename, "rb")
try:
try:
self.info = self._Info(fileobj)
self.tags = self._Tags(fileobj, self.info)
if self.info.length:
# The streaminfo gave us real length information,
# don't waste time scanning the Ogg.
return
last_page = OggPage.find_last(fileobj, self.info.serial)
samples = last_page.position
try:
denom = self.info.sample_rate
except AttributeError:
denom = self.info.fps
self.info.length = samples / float(denom)
except error, e:
raise self._Error, e, sys.exc_info()[2]
except EOFError:
raise self._Error, "no appropriate stream found"
finally:
fileobj.close()
def delete(self, filename=None):
"""Remove tags from a file.
If no filename is given, the one most recently loaded is used.
"""
if filename is None:
filename = self.filename
self.tags.clear()
fileobj = file(filename, "rb+")
try:
try: self.tags._inject(fileobj)
except error, e:
raise self._Error, e, sys.exc_info()[2]
except EOFError:
raise self._Error, "no appropriate stream found"
finally:
fileobj.close()
def save(self, filename=None):
"""Save a tag to a file.
If no filename is given, the one most recently loaded is used.
"""
if filename is None:
filename = self.filename
fileobj = file(filename, "rb+")
try:
try: self.tags._inject(fileobj)
except error, e:
raise self._Error, e, sys.exc_info()[2]
except EOFError:
raise self._Error, "no appropriate stream found"
finally:
fileobj.close()

127
lib/mutagen/oggflac.py Normal file
View File

@@ -0,0 +1,127 @@
# Ogg FLAC support.
#
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: oggflac.py 3976 2007-01-13 22:00:14Z piman $
"""Read and write Ogg FLAC comments.
This module handles FLAC files wrapped in an Ogg bitstream. The first
FLAC stream found is used. For 'naked' FLACs, see mutagen.flac.
This module is based off the specification at
http://flac.sourceforge.net/ogg_mapping.html.
"""
__all__ = ["OggFLAC", "Open", "delete"]
import struct
from cStringIO import StringIO
from lib.mutagen.flac import StreamInfo, VCFLACDict
from lib.mutagen.ogg import OggPage, OggFileType, error as OggError
class error(OggError): pass
class OggFLACHeaderError(error): pass
class OggFLACStreamInfo(StreamInfo):
"""Ogg FLAC general header and stream info.
This encompasses the Ogg wrapper for the FLAC STREAMINFO metadata
block, as well as the Ogg codec setup that precedes it.
Attributes (in addition to StreamInfo's):
packets -- number of metadata packets
serial -- Ogg logical stream serial number
"""
packets = 0
serial = 0
def load(self, data):
page = OggPage(data)
while not page.packets[0].startswith("\x7FFLAC"):
page = OggPage(data)
major, minor, self.packets, flac = struct.unpack(
">BBH4s", page.packets[0][5:13])
if flac != "fLaC":
raise OggFLACHeaderError("invalid FLAC marker (%r)" % flac)
elif (major, minor) != (1, 0):
raise OggFLACHeaderError(
"unknown mapping version: %d.%d" % (major, minor))
self.serial = page.serial
# Skip over the block header.
stringobj = StringIO(page.packets[0][17:])
super(OggFLACStreamInfo, self).load(StringIO(page.packets[0][17:]))
def pprint(self):
return "Ogg " + super(OggFLACStreamInfo, self).pprint()
class OggFLACVComment(VCFLACDict):
def load(self, data, info, errors='replace'):
# data should be pointing at the start of an Ogg page, after
# the first FLAC page.
pages = []
complete = False
while not complete:
page = OggPage(data)
if page.serial == info.serial:
pages.append(page)
complete = page.complete or (len(page.packets) > 1)
comment = StringIO(OggPage.to_packets(pages)[0][4:])
super(OggFLACVComment, self).load(comment, errors=errors)
def _inject(self, fileobj):
"""Write tag data into the FLAC Vorbis comment packet/page."""
# Ogg FLAC has no convenient data marker like Vorbis, but the
# second packet - and second page - must be the comment data.
fileobj.seek(0)
page = OggPage(fileobj)
while not page.packets[0].startswith("\x7FFLAC"):
page = OggPage(fileobj)
first_page = page
while not (page.sequence == 1 and page.serial == first_page.serial):
page = OggPage(fileobj)
old_pages = [page]
while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1):
page = OggPage(fileobj)
if page.serial == first_page.serial:
old_pages.append(page)
packets = OggPage.to_packets(old_pages, strict=False)
# Set the new comment block.
data = self.write()
data = packets[0][0] + struct.pack(">I", len(data))[-3:] + data
packets[0] = data
new_pages = OggPage.from_packets(packets, old_pages[0].sequence)
OggPage.replace(fileobj, old_pages, new_pages)
class OggFLAC(OggFileType):
"""An Ogg FLAC file."""
_Info = OggFLACStreamInfo
_Tags = OggFLACVComment
_Error = OggFLACHeaderError
_mimes = ["audio/x-oggflac"]
def score(filename, fileobj, header):
return (header.startswith("OggS") * (
("FLAC" in header) + ("fLaC" in header)))
score = staticmethod(score)
Open = OggFLAC
def delete(filename):
"""Remove tags from a file."""
OggFLAC(filename).delete()

123
lib/mutagen/oggspeex.py Normal file
View File

@@ -0,0 +1,123 @@
# Ogg Speex support.
#
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: oggspeex.py 3976 2007-01-13 22:00:14Z piman $
"""Read and write Ogg Speex comments.
This module handles Speex files wrapped in an Ogg bitstream. The
first Speex stream found is used.
Read more about Ogg Speex at http://www.speex.org/. This module is
based on the specification at http://www.speex.org/manual2/node7.html
and clarifications after personal communication with Jean-Marc,
http://lists.xiph.org/pipermail/speex-dev/2006-July/004676.html.
"""
__all__ = ["OggSpeex", "Open", "delete"]
from lib.mutagen._vorbis import VCommentDict
from lib.mutagen.ogg import OggPage, OggFileType, error as OggError
from lib.mutagen._util import cdata
class error(OggError): pass
class OggSpeexHeaderError(error): pass
class OggSpeexInfo(object):
"""Ogg Speex stream information.
Attributes:
bitrate - nominal bitrate in bits per second
channels - number of channels
length - file length in seconds, as a float
The reference encoder does not set the bitrate; in this case,
the bitrate will be 0.
"""
length = 0
def __init__(self, fileobj):
page = OggPage(fileobj)
while not page.packets[0].startswith("Speex "):
page = OggPage(fileobj)
if not page.first:
raise OggSpeexHeaderError(
"page has ID header, but doesn't start a stream")
self.sample_rate = cdata.uint_le(page.packets[0][36:40])
self.channels = cdata.uint_le(page.packets[0][48:52])
self.bitrate = max(0, cdata.int_le(page.packets[0][52:56]))
self.serial = page.serial
def pprint(self):
return "Ogg Speex, %.2f seconds" % self.length
class OggSpeexVComment(VCommentDict):
"""Speex comments embedded in an Ogg bitstream."""
def __init__(self, fileobj, info):
pages = []
complete = False
while not complete:
page = OggPage(fileobj)
if page.serial == info.serial:
pages.append(page)
complete = page.complete or (len(page.packets) > 1)
data = OggPage.to_packets(pages)[0] + "\x01"
super(OggSpeexVComment, self).__init__(data, framing=False)
def _inject(self, fileobj):
"""Write tag data into the Speex comment packet/page."""
fileobj.seek(0)
# Find the first header page, with the stream info.
# Use it to get the serial number.
page = OggPage(fileobj)
while not page.packets[0].startswith("Speex "):
page = OggPage(fileobj)
# Look for the next page with that serial number, it'll start
# the comment packet.
serial = page.serial
page = OggPage(fileobj)
while page.serial != serial:
page = OggPage(fileobj)
# Then find all the pages with the comment packet.
old_pages = [page]
while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1):
page = OggPage(fileobj)
if page.serial == old_pages[0].serial:
old_pages.append(page)
packets = OggPage.to_packets(old_pages, strict=False)
# Set the new comment packet.
packets[0] = self.write(framing=False)
new_pages = OggPage.from_packets(packets, old_pages[0].sequence)
OggPage.replace(fileobj, old_pages, new_pages)
class OggSpeex(OggFileType):
"""An Ogg Speex file."""
_Info = OggSpeexInfo
_Tags = OggSpeexVComment
_Error = OggSpeexHeaderError
_mimes = ["audio/x-speex"]
def score(filename, fileobj, header):
return (header.startswith("OggS") * ("Speex " in header))
score = staticmethod(score)
Open = OggSpeex
def delete(filename):
"""Remove tags from a file."""
OggSpeex(filename).delete()

111
lib/mutagen/oggtheora.py Normal file
View File

@@ -0,0 +1,111 @@
# Ogg Theora support.
#
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: oggtheora.py 3976 2007-01-13 22:00:14Z piman $
"""Read and write Ogg Theora comments.
This module handles Theora files wrapped in an Ogg bitstream. The
first Theora stream found is used.
Based on the specification at http://theora.org/doc/Theora_I_spec.pdf.
"""
__all__ = ["OggTheora", "Open", "delete"]
import struct
from lib.mutagen._vorbis import VCommentDict
from lib.mutagen.ogg import OggPage, OggFileType, error as OggError
class error(OggError): pass
class OggTheoraHeaderError(error): pass
class OggTheoraInfo(object):
"""Ogg Theora stream information.
Attributes:
length - file length in seconds, as a float
fps - video frames per second, as a float
"""
length = 0
def __init__(self, fileobj):
page = OggPage(fileobj)
while not page.packets[0].startswith("\x80theora"):
page = OggPage(fileobj)
if not page.first:
raise OggTheoraHeaderError(
"page has ID header, but doesn't start a stream")
data = page.packets[0]
vmaj, vmin = struct.unpack("2B", data[7:9])
if (vmaj, vmin) != (3, 2):
raise OggTheoraHeaderError(
"found Theora version %d.%d != 3.2" % (vmaj, vmin))
fps_num, fps_den = struct.unpack(">2I", data[22:30])
self.fps = fps_num / float(fps_den)
self.bitrate = struct.unpack(">I", data[37:40] + "\x00")[0]
self.serial = page.serial
def pprint(self):
return "Ogg Theora, %.2f seconds, %d bps" % (self.length, self.bitrate)
class OggTheoraCommentDict(VCommentDict):
"""Theora comments embedded in an Ogg bitstream."""
def __init__(self, fileobj, info):
pages = []
complete = False
while not complete:
page = OggPage(fileobj)
if page.serial == info.serial:
pages.append(page)
complete = page.complete or (len(page.packets) > 1)
data = OggPage.to_packets(pages)[0][7:]
super(OggTheoraCommentDict, self).__init__(data + "\x01")
def _inject(self, fileobj):
"""Write tag data into the Theora comment packet/page."""
fileobj.seek(0)
page = OggPage(fileobj)
while not page.packets[0].startswith("\x81theora"):
page = OggPage(fileobj)
old_pages = [page]
while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1):
page = OggPage(fileobj)
if page.serial == old_pages[0].serial:
old_pages.append(page)
packets = OggPage.to_packets(old_pages, strict=False)
packets[0] = "\x81theora" + self.write(framing=False)
new_pages = OggPage.from_packets(packets, old_pages[0].sequence)
OggPage.replace(fileobj, old_pages, new_pages)
class OggTheora(OggFileType):
"""An Ogg Theora file."""
_Info = OggTheoraInfo
_Tags = OggTheoraCommentDict
_Error = OggTheoraHeaderError
_mimes = ["video/x-theora"]
def score(filename, fileobj, header):
return (header.startswith("OggS") *
(("\x80theora" in header) + ("\x81theora" in header)))
score = staticmethod(score)
Open = OggTheora
def delete(filename):
"""Remove tags from a file."""
OggTheora(filename).delete()

128
lib/mutagen/oggvorbis.py Normal file
View File

@@ -0,0 +1,128 @@
# Ogg Vorbis support.
#
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: oggvorbis.py 3976 2007-01-13 22:00:14Z piman $
"""Read and write Ogg Vorbis comments.
This module handles Vorbis files wrapped in an Ogg bitstream. The
first Vorbis stream found is used.
Read more about Ogg Vorbis at http://vorbis.com/. This module is based
on the specification at http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html.
"""
__all__ = ["OggVorbis", "Open", "delete"]
import struct
from lib.mutagen._vorbis import VCommentDict
from lib.mutagen.ogg import OggPage, OggFileType, error as OggError
class error(OggError): pass
class OggVorbisHeaderError(error): pass
class OggVorbisInfo(object):
"""Ogg Vorbis stream information.
Attributes:
length - file length in seconds, as a float
bitrate - nominal ('average') bitrate in bits per second, as an int
"""
length = 0
def __init__(self, fileobj):
page = OggPage(fileobj)
while not page.packets[0].startswith("\x01vorbis"):
page = OggPage(fileobj)
if not page.first:
raise OggVorbisHeaderError(
"page has ID header, but doesn't start a stream")
(self.channels, self.sample_rate, max_bitrate, nominal_bitrate,
min_bitrate) = struct.unpack("<B4i", page.packets[0][11:28])
self.serial = page.serial
max_bitrate = max(0, max_bitrate)
min_bitrate = max(0, min_bitrate)
nominal_bitrate = max(0, nominal_bitrate)
if nominal_bitrate == 0:
self.bitrate = (max_bitrate + min_bitrate) // 2
elif max_bitrate and max_bitrate < nominal_bitrate:
# If the max bitrate is less than the nominal, we know
# the nominal is wrong.
self.bitrate = max_bitrate
elif min_bitrate > nominal_bitrate:
self.bitrate = min_bitrate
else:
self.bitrate = nominal_bitrate
if self.bitrate == 0 and self.length > 0:
fileobj.seek(0, 2)
self.bitrate = int((fileobj.tell() * 8) / self.length)
def pprint(self):
return "Ogg Vorbis, %.2f seconds, %d bps" % (self.length, self.bitrate)
class OggVCommentDict(VCommentDict):
"""Vorbis comments embedded in an Ogg bitstream."""
def __init__(self, fileobj, info):
pages = []
complete = False
while not complete:
page = OggPage(fileobj)
if page.serial == info.serial:
pages.append(page)
complete = page.complete or (len(page.packets) > 1)
data = OggPage.to_packets(pages)[0][7:] # Strip off "\x03vorbis".
super(OggVCommentDict, self).__init__(data)
def _inject(self, fileobj):
"""Write tag data into the Vorbis comment packet/page."""
# Find the old pages in the file; we'll need to remove them,
# plus grab any stray setup packet data out of them.
fileobj.seek(0)
page = OggPage(fileobj)
while not page.packets[0].startswith("\x03vorbis"):
page = OggPage(fileobj)
old_pages = [page]
while not (old_pages[-1].complete or len(old_pages[-1].packets) > 1):
page = OggPage(fileobj)
if page.serial == old_pages[0].serial:
old_pages.append(page)
packets = OggPage.to_packets(old_pages, strict=False)
# Set the new comment packet.
packets[0] = "\x03vorbis" + self.write()
new_pages = OggPage.from_packets(packets, old_pages[0].sequence)
OggPage.replace(fileobj, old_pages, new_pages)
class OggVorbis(OggFileType):
"""An Ogg Vorbis file."""
_Info = OggVorbisInfo
_Tags = OggVCommentDict
_Error = OggVorbisHeaderError
_mimes = ["audio/vorbis", "audio/x-vorbis"]
def score(filename, fileobj, header):
return (header.startswith("OggS") * ("\x01vorbis" in header))
score = staticmethod(score)
Open = OggVorbis
def delete(filename):
"""Remove tags from a file."""
OggVorbis(filename).delete()

64
lib/mutagen/optimfrog.py Normal file
View File

@@ -0,0 +1,64 @@
# OptimFROG reader/tagger
#
# Copyright 2006 Lukas Lalinsky <lalinsky@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: optimfrog.py 3923 2006-10-21 15:07:13Z luks $
"""OptimFROG audio streams with APEv2 tags.
OptimFROG is a lossless audio compression program. Its main goal is to
reduce at maximum the size of audio files, while permitting bit
identical restoration for all input. It is similar with the ZIP
compression, but it is highly specialized to compress audio data.
Only versions 4.5 and higher are supported.
For more information, see http://www.losslessaudio.org/
"""
__all__ = ["OptimFROG", "Open", "delete"]
import struct
from lib.mutagen.apev2 import APEv2File, error, delete
class OptimFROGHeaderError(error): pass
class OptimFROGInfo(object):
"""OptimFROG stream information.
Attributes:
channels - number of audio channels
length - file length in seconds, as a float
sample_rate - audio sampling rate in Hz
"""
def __init__(self, fileobj):
header = fileobj.read(76)
if (len(header) != 76 or not header.startswith("OFR ") or
struct.unpack("<I", header[4:8])[0] not in [12, 15]):
raise OptimFROGHeaderError("not an OptimFROG file")
(total_samples, total_samples_high, sample_type, self.channels,
self.sample_rate) = struct.unpack("<IHBBI", header[8:20])
total_samples += total_samples_high << 32
self.channels += 1
if self.sample_rate:
self.length = float(total_samples) / (self.channels *
self.sample_rate)
else:
self.length = 0.0
def pprint(self):
return "OptimFROG, %.2f seconds, %d Hz" % (self.length,
self.sample_rate)
class OptimFROG(APEv2File):
_Info = OptimFROGInfo
def score(filename, fileobj, header):
return (header.startswith("OFR") + filename.endswith(".ofr") +
filename.endswith(".ofs"))
score = staticmethod(score)

62
lib/mutagen/trueaudio.py Normal file
View File

@@ -0,0 +1,62 @@
# True Audio support for Mutagen
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
"""True Audio audio stream information and tags.
True Audio is a lossless format designed for real-time encoding and
decoding. This module is based on the documentation at
http://www.true-audio.com/TTA_Lossless_Audio_Codec_-_Format_Description
True Audio files use ID3 tags.
"""
__all__ = ["TrueAudio", "Open", "delete", "EasyTrueAudio"]
from lib.mutagen.id3 import ID3FileType, delete
from lib.mutagen._util import cdata
class error(RuntimeError): pass
class TrueAudioHeaderError(error, IOError): pass
class TrueAudioInfo(object):
"""True Audio stream information.
Attributes:
length - audio length, in seconds
sample_rate - audio sample rate, in Hz
"""
def __init__(self, fileobj, offset):
fileobj.seek(offset or 0)
header = fileobj.read(18)
if len(header) != 18 or not header.startswith("TTA"):
raise TrueAudioHeaderError("TTA header not found")
self.sample_rate = cdata.int_le(header[10:14])
samples = cdata.uint_le(header[14:18])
self.length = float(samples) / self.sample_rate
def pprint(self):
return "True Audio, %.2f seconds, %d Hz." % (
self.length, self.sample_rate)
class TrueAudio(ID3FileType):
"""A True Audio file."""
_Info = TrueAudioInfo
_mimes = ["audio/x-tta"]
def score(filename, fileobj, header):
return (header.startswith("ID3") + header.startswith("TTA") +
filename.lower().endswith(".tta") * 2)
score = staticmethod(score)
Open = TrueAudio
class EasyTrueAudio(TrueAudio):
"""Like MP3, but uses EasyID3 for tags."""
from lib.mutagen.easyid3 import EasyID3 as ID3

57
lib/mutagen/wavpack.py Normal file
View File

@@ -0,0 +1,57 @@
# A WavPack reader/tagger
#
# Copyright 2006 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# $Id: wavpack.py 3997 2007-02-25 21:44:53Z piman $
"""WavPack reading and writing.
WavPack is a lossless format that uses APEv2 tags. Read
http://www.wavpack.com/ for more information.
"""
__all__ = ["WavPack", "Open", "delete"]
from lib.mutagen.apev2 import APEv2File, error, delete
from lib.mutagen._util import cdata
class WavPackHeaderError(error): pass
RATES = [6000, 8000, 9600, 11025, 12000, 16000, 22050, 24000, 32000, 44100,
48000, 64000, 88200, 96000, 192000]
class WavPackInfo(object):
"""WavPack stream information.
Attributes:
channels - number of audio channels (1 or 2)
length - file length in seconds, as a float
sample_rate - audio sampling rate in Hz
version - WavPack stream version
"""
def __init__(self, fileobj):
header = fileobj.read(28)
if len(header) != 28 or not header.startswith("wvpk"):
raise WavPackHeaderError("not a WavPack file")
samples = cdata.uint_le(header[12:16])
flags = cdata.uint_le(header[24:28])
self.version = cdata.short_le(header[8:10])
self.channels = bool(flags & 4) or 2
self.sample_rate = RATES[(flags >> 23) & 0xF]
self.length = float(samples) / self.sample_rate
def pprint(self):
return "WavPack, %.2f seconds, %d Hz" % (self.length, self.sample_rate)
class WavPack(APEv2File):
_Info = WavPackInfo
_mimes = ["audio/x-wavpack"]
def score(filename, fileobj, header):
return header.startswith("wvpk") * 2
score = staticmethod(score)

View File

@@ -217,7 +217,7 @@ class Headphones:
releaseid = u.extractUuid(release.id)
inc = ws.ReleaseIncludes(artist=True, releaseEvents= True, tracks= True, releaseGroup=True)
results = ws.Query().getReleaseById(releaseid, inc)
time.sleep(0.6)
time.sleep(1)
for event in results.releaseEvents:
if event.country == 'US':
@@ -350,23 +350,37 @@ class Headphones:
path = config['General']['path_to_xml']
except:
path = 'Absolute path to iTunes XML or Top-Level Music Directory'
try:
path2 = config['General']['path_to_itunes']
except:
path2 = 'Enter a directory to scan'
page = [templates._header]
page.append(templates._logobar)
page.append(templates._nav)
page.append('''<div class="table"><div class="config"><h1>Import or Sync Your iTunes Library/Music Folder</h1><br />
Enter the full path to your iTunes XML file or music folder<br /><br />
i.e. /Users/"username"/Music/iTunes/iTunes Music Library.xml<br />
<i>or</i> /Users/"username"/Music/iTunes/iTunes Media/Music <br /><br />(artists should have their own directories for folder import to work)
<br /><br />note: This process can take a LONG time!<br /><br />
Once you click "Submit" you can navigate away from this
page while the process runs.<br /><br /><br />
page.append('''
<div class="table"><div class="config"><h1>Scan Music Library</h1><br />
Where do you keep your music?<br /><br />
You can put in any directory, and it will scan for audio files in that folder
(including all subdirectories)<br /><br /> For example: '/Users/name/Music'
<br /> <br />
It may take a while depending on how many files you have. You can navigate away from the page<br />
as soon as you click 'Submit'
<br /><br />
<form action="musicScan" method="GET" align="center">
<input type="text" value="%s" onfocus="if
(this.value==this.defaultValue) this.value='';" name="path" size="70" />
<input type="submit" /></form><br /><br /></div></div>
<div class="table"><div class="config"><h1>Import or Sync Your iTunes Library/Music Folder</h1><br />
This is here for legacy purposes (try the Music Scanner above!) <br /><br />
If you'd rather import an iTunes .xml file, you can enter the full path here. <br /><br />
<form action="importItunes" method="GET" align="center">
<input type="text" value="%s" onfocus="if
(this.value==this.defaultValue) this.value='';" name="path" size="70" />
<input type="submit" /></form><br /><br /></div></div>
<div class="table"><div class="config"><h1>Force Search</h1><br />
<a href="forceSearch">Force Check for Wanted Albums</a><br /><br />
<a href="forceUpdate">Force Update Active Artists </a><br /><br /><br /></div></div>''' % path)
<a href="forceUpdate">Force Update Active Artists </a><br /><br /><br /></div></div>''' % (path2, path))
page.append(templates._footer)
return page
manage.exposed = True
@@ -380,6 +394,15 @@ class Headphones:
raise cherrypy.HTTPRedirect("home")
importItunes.exposed = True
def musicScan(self, path):
config = configobj.ConfigObj(config_file)
config['General']['path_to_itunes'] = path
config.write()
import itunesimport
itunesimport.scanMusic(path)
raise cherrypy.HTTPRedirect("home")
musicScan.exposed = True
def forceUpdate(self):
import updater
updater.dbUpdate()